All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
hash.h
Go to the documentation of this file.
1 //
2 // Copyright 2016 Pixar
3 //
4 // Licensed under the Apache License, Version 2.0 (the "Apache License")
5 // with the following modification; you may not use this file except in
6 // compliance with the Apache License and the following modification to it:
7 // Section 6. Trademarks. is deleted and replaced with:
8 //
9 // 6. Trademarks. This License does not grant permission to use the trade
10 // names, trademarks, service marks, or product names of the Licensor
11 // and its affiliates, except as required to comply with Section 4(c) of
12 // the License and to reproduce the content of the NOTICE file.
13 //
14 // You may obtain a copy of the Apache License at
15 //
16 // http://www.apache.org/licenses/LICENSE-2.0
17 //
18 // Unless required by applicable law or agreed to in writing, software
19 // distributed under the Apache License with the above modification is
20 // distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
21 // KIND, either express or implied. See the Apache License for the specific
22 // language governing permissions and limitations under the Apache License.
23 //
24 #ifndef PXR_BASE_TF_HASH_H
25 #define PXR_BASE_TF_HASH_H
26 
29 
30 #include "pxr/pxr.h"
31 #include "pxr/base/tf/tf.h"
32 #include "pxr/base/tf/api.h"
33 
34 #include <cstring>
35 #include <string>
36 #include <type_traits>
37 #include <utility>
38 #include <vector>
39 
40 PXR_NAMESPACE_OPEN_SCOPE
41 
42 // Support integers.
43 template <class HashState, class T>
44 std::enable_if_t<std::is_integral<T>::value>
45 TfHashAppend(HashState &h, T integral)
46 {
47  h.Append(integral);
48 }
49 
50 // Simple metafunction that returns an unsigned integral type given a size in
51 // bytes.
52 template <size_t Size> struct Tf_SizedUnsignedInt;
53 template <> struct Tf_SizedUnsignedInt<1> { using type = uint8_t; };
54 template <> struct Tf_SizedUnsignedInt<2> { using type = uint16_t; };
55 template <> struct Tf_SizedUnsignedInt<4> { using type = uint32_t; };
56 template <> struct Tf_SizedUnsignedInt<8> { using type = uint64_t; };
57 
58 // Support enums.
59 template <class HashState, class Enum>
60 std::enable_if_t<std::is_enum<Enum>::value>
61 TfHashAppend(HashState &h, Enum e)
62 {
63  h.Append(static_cast<std::underlying_type_t<Enum>>(e));
64 }
65 
66 // Support floating point.
67 template <class HashState, class T>
68 std::enable_if_t<std::is_floating_point<T>::value>
69 TfHashAppend(HashState &h, T fp)
70 {
71  // We want both positive and negative zero to hash the same, so we have to
72  // check against zero here.
73  typename Tf_SizedUnsignedInt<sizeof(T)>::type intbuf = 0;
74  if (fp != static_cast<T>(0)) {
75  memcpy(&intbuf, &fp, sizeof(T));
76  }
77  h.Append(intbuf);
78 }
79 
80 // Support std::pair.
81 template <class HashState, class T, class U>
82 inline void
83 TfHashAppend(HashState &h, std::pair<T, U> const &p)
84 {
85  h.Append(p.first);
86  h.Append(p.second);
87 }
88 
89 // Support std::vector.
90 template <class HashState, class T>
91 inline void
92 TfHashAppend(HashState &h, std::vector<T> const &vec)
93 {
94  h.AppendContiguous(vec.data(), vec.size());
95 }
96 
97 // Support for hashing std::string.
98 template <class HashState>
99 inline void
100 TfHashAppend(HashState &h, const std::string& s)
101 {
102  return h.AppendContiguous(s.c_str(), s.length());
103 }
104 
105 // Support for hashing pointers, but we explicitly delete the version for
106 // [const] char pointers. See more below.
107 template <class HashState, class T>
108 inline void
109 TfHashAppend(HashState &h, const T* ptr) {
110  return h.Append(reinterpret_cast<uintptr_t>(ptr));
111 }
112 
113 // We refuse to hash [const] char *. You're almost certainly trying to hash the
114 // pointed-to string and this will not do that (it will hash the pointer
115 // itself). To hash a c-style null terminated string, you can use
116 // TfHashAsCStr() to indicate the intent, or use TfHashCString. If you
117 // really want to hash the pointer then use static_cast<const void*>(ptr) or
118 // TfHashCharPtr.
119 template <class HashState>
120 inline void TfHashAppend(HashState &h, char const *ptr) = delete;
121 template <class HashState>
122 inline void TfHashAppend(HashState &h, char *ptr) = delete;
123 
127 {
128  explicit TfCStrHashWrapper(char const *cstr) : cstr(cstr) {}
129  char const *cstr;
130 };
131 
139 inline TfCStrHashWrapper
140 TfHashAsCStr(char const *cstr)
141 {
142  return TfCStrHashWrapper(cstr);
143 }
144 
145 template <class HashState>
146 inline void TfHashAppend(HashState &h, TfCStrHashWrapper hcstr)
147 {
148  return h.AppendContiguous(hcstr.cstr, std::strlen(hcstr.cstr));
149 }
150 
151 // Implementation detail: dispatch based on hash capability: Try TfHashAppend
152 // first, otherwise try hash_value. We'd like to otherwise try std::hash<T>,
153 // but std::hash<> is not SFINAE-friendly until c++17 and this code needs to
154 // support c++14 currently. We rely on a combination of expression SFINAE and
155 // establishing preferred order by passing a 0 constant and having the overloads
156 // take int (highest priority), long (next priority) and '...' (lowest
157 // priority).
158 
159 // std::hash version, attempted last. Consider adding when we move to
160 // C++17 or newer.
161 /*
162 template <class HashState, class T>
163 inline auto Tf_HashImpl(HashState &h, T &&obj, ...)
164  -> decltype(std::hash<typename std::decay<T>::type>()(
165  std::forward<T>(obj)), void())
166 {
167  TfHashAppend(
168  h, std::hash<typename std::decay<T>::type>()(std::forward<T>(obj)));
169 }
170 */
171 
172 // hash_value, attempted second.
173 template <class HashState, class T>
174 inline auto Tf_HashImpl(HashState &h, T &&obj, long)
175  -> decltype(hash_value(std::forward<T>(obj)), void())
176 {
177  TfHashAppend(h, hash_value(std::forward<T>(obj)));
178 }
179 
180 // TfHashAppend, attempted first.
181 template <class HashState, class T>
182 inline auto Tf_HashImpl(HashState &h, T &&obj, int)
183  -> decltype(TfHashAppend(h, std::forward<T>(obj)), void())
184 {
185  TfHashAppend(h, std::forward<T>(obj));
186 }
187 
188 // Implementation detail, CRTP base that provides the public interface for hash
189 // state implementations.
190 template <class Derived>
191 class Tf_HashStateAPI
192 {
193 public:
194  // Append several objects to the hash state.
195  template <class... Args>
196  void Append(Args &&... args) {
197  _AppendImpl(args...);
198  }
199 
200  // Append contiguous objects to the hash state.
201  template <class T>
202  void AppendContiguous(T const *elems, size_t numElems) {
203  this->_AsDerived()._AppendContiguous(elems, numElems);
204  }
205 
206  // Append a range of objects to the hash state.
207  template <class Iter>
208  void AppendRange(Iter f, Iter l) {
209  this->_AsDerived()._AppendRange(f, l);
210  }
211 
212  // Return the hash code for the current state.
213  size_t GetCode() const {
214  return this->_AsDerived()._GetCode();
215  }
216 
217 private:
218  template <class T, class... Args>
219  void _AppendImpl(T &&obj, Args &&... rest) {
220  this->_AsDerived()._Append(std::forward<T>(obj));
221  _AppendImpl(rest...);
222  }
223  void _AppendImpl() const {
224  // base case intentionally empty.
225  }
226 
227  Derived &_AsDerived() {
228  return *static_cast<Derived *>(this);
229  }
230 
231  Derived const &_AsDerived() const {
232  return *static_cast<Derived const *>(this);
233  }
234 };
235 
236 // Implementation detail, accumulates hashes.
237 class Tf_HashState : public Tf_HashStateAPI<Tf_HashState>
238 {
239 private:
240  friend class Tf_HashStateAPI<Tf_HashState>;
241 
242  // Go thru Tf_HashImpl for non-integers.
243  template <class T>
244  std::enable_if_t<!std::is_integral<std::decay_t<T>>::value>
245  _Append(T &&obj) {
246  Tf_HashImpl(*this, std::forward<T>(obj), 0);
247  }
248 
249  // Integers bottom out here.
250  template <class T>
251  std::enable_if_t<std::is_integral<T>::value>
252  _Append(T i) {
253  if (!_didOne) {
254  _state = i;
255  _didOne = true;
256  }
257  else {
258  _state = _Combine(_state, i);
259  }
260  }
261 
262  // Append contiguous objects.
263  template <class T>
264  std::enable_if_t<std::is_integral<T>::value>
265  _AppendContiguous(T const *elems, size_t numElems) {
266  _AppendBytes(reinterpret_cast<char const *>(elems),
267  numElems * sizeof(T));
268  }
269 
270  // Append contiguous objects.
271  template <class T>
272  std::enable_if_t<!std::is_integral<T>::value>
273  _AppendContiguous(T const *elems, size_t numElems) {
274  while (numElems--) {
275  Append(*elems++);
276  }
277  }
278 
279  // Append a range.
280  template <class Iter>
281  void _AppendRange(Iter f, Iter l) {
282  while (f != l) {
283  _Append(*f++);
284  }
285  }
286 
288  TF_API void _AppendBytes(char const *bytes, size_t numBytes);
289 
290  // Return the hash code for the accumulated hash state.
291  size_t _GetCode() const {
292  // This is based on Knuth's multiplicative hash for integers. The
293  // constant is the closest prime to the binary expansion of the inverse
294  // golden ratio. The best way to produce a hash table bucket index from
295  // the result is to shift the result right, since the higher order bits
296  // have the most entropy. But since we can't know the number of buckets
297  // in a table that's using this, we just reverse the byte order instead,
298  // to get the highest entropy bits into the low-order bytes.
299  return _SwapByteOrder(_state * 11400714819323198549ULL);
300  }
301 
302  // This turns into a single bswap/pshufb type instruction on most compilers.
303  inline uint64_t
304  _SwapByteOrder(uint64_t val) const {
305  val =
306  ((val & 0xFF00000000000000u) >> 56u) |
307  ((val & 0x00FF000000000000u) >> 40u) |
308  ((val & 0x0000FF0000000000u) >> 24u) |
309  ((val & 0x000000FF00000000u) >> 8u) |
310  ((val & 0x00000000FF000000u) << 8u) |
311  ((val & 0x0000000000FF0000u) << 24u) |
312  ((val & 0x000000000000FF00u) << 40u) |
313  ((val & 0x00000000000000FFu) << 56u);
314  return val;
315  }
316 
317  size_t _Combine(size_t x, size_t y) const {
318  // This is our hash combiner. The task is, given two hash codes x and
319  // y, compute a single hash code. One way to do this is to exclusive-or
320  // the two codes together, but this can produce bad results if they
321  // differ by some fixed amount, For example if the input codes are
322  // multiples of 32, and the two codes we're given are N and N + 32 (this
323  // happens commonly when the hashed values are memory addresses) then
324  // the resulting hash codes for successive pairs of these produces many
325  // repeating values (32, 96, 32, XXX, 32, 96, 32, YYY...). That's a lot
326  // of collisions.
327  //
328  // Instead we combine hash values by assigning numbers to the lattice
329  // points in the plane, and then treating the inputs x and y as
330  // coordinates identifying a lattice point. Then the combined hash
331  // value is just the number assigned to the lattice point. This way
332  // each unique input pair (x, y) gets a unique output hash code.
333  //
334  // We number lattice points by triangular numbers like this:
335  //
336  // X 0 1 2 3 4 5
337  // Y
338  // 0 0 2 5 9 14 20
339  // 1 1 4 8 13 19 26
340  // 2 3 7 12 18 25 33
341  // 3 6 11 17 24 32 41
342  // 4 10 16 23 31 40 50
343  // 5 15 22 30 39 49 60
344  //
345  // This takes a couple of additions and a multiplication, which is a bit
346  // more expensive than something like an exclusive or, but the quality
347  // improvement outweighs the added expense.
348  x += y;
349  return y + x * (x + 1) / 2;
350  }
351 
352  size_t _state = 0;
353  bool _didOne = false;
354 };
355 
447 class TfHash {
448 public:
451  template <class T>
452  auto operator()(T &&obj) const ->
453  decltype(Tf_HashImpl(std::declval<Tf_HashState &>(),
454  std::forward<T>(obj), 0), size_t()) {
455  Tf_HashState h;
456  Tf_HashImpl(h, std::forward<T>(obj), 0);
457  return h.GetCode();
458  }
459 
461  template <class... Args>
462  static size_t Combine(Args &&... args) {
463  Tf_HashState h;
464  _CombineImpl(h, args...);
465  return h.GetCode();
466  }
467 
468 private:
469  template <class HashState, class T, class... Args>
470  static void _CombineImpl(HashState &h, T &&obj, Args &&... rest) {
471  Tf_HashImpl(h, std::forward<T>(obj), 0);
472  _CombineImpl(h, rest...);
473  }
474 
475  template <class HashState>
476  static void _CombineImpl(HashState &h) {
477  // base case does nothing
478  }
479 };
480 
483  size_t operator()(const char* ptr) const;
484 };
485 
488  size_t operator()(const char* ptr) const;
489 };
490 
493  bool operator()(const char* lhs, const char* rhs) const;
494 };
495 
496 PXR_NAMESPACE_CLOSE_SCOPE
497 
498 #endif
A hash function object that hashes the address of a char pointer.
Definition: hash.h:482
A structure that wraps a char pointer, indicating intent that it should be hashed as a c-style null t...
Definition: hash.h:126
A user-extensible hashing mechanism for use with runtime hash tables.
Definition: hash.h:447
auto operator()(T &&obj) const -> decltype(Tf_HashImpl(std::declval< Tf_HashState & >(), std::forward< T >(obj), 0), size_t())
Produce a hash code for obj.
Definition: hash.h:452
A hash function object that hashes null-terminated c-string content.
Definition: hash.h:487
A function object that compares two c-strings for equality.
Definition: hash.h:492
TfCStrHashWrapper TfHashAsCStr(char const *cstr)
Indicate that a char pointer is intended to be hashed as a C-style null terminated string...
Definition: hash.h:140
static size_t Combine(Args &&...args)
Produce a hash code by combining the hash codes of several objects.
Definition: hash.h:462
A file containing basic constants and definitions.