Loading...
Searching...
No Matches
hash.h
Go to the documentation of this file.
1//
2// Copyright 2016 Pixar
3//
4// Licensed under the Apache License, Version 2.0 (the "Apache License")
5// with the following modification; you may not use this file except in
6// compliance with the Apache License and the following modification to it:
7// Section 6. Trademarks. is deleted and replaced with:
8//
9// 6. Trademarks. This License does not grant permission to use the trade
10// names, trademarks, service marks, or product names of the Licensor
11// and its affiliates, except as required to comply with Section 4(c) of
12// the License and to reproduce the content of the NOTICE file.
13//
14// You may obtain a copy of the Apache License at
15//
16// http://www.apache.org/licenses/LICENSE-2.0
17//
18// Unless required by applicable law or agreed to in writing, software
19// distributed under the Apache License with the above modification is
20// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
21// KIND, either express or implied. See the Apache License for the specific
22// language governing permissions and limitations under the Apache License.
23//
24#ifndef PXR_BASE_TF_HASH_H
25#define PXR_BASE_TF_HASH_H
26
29
30#include "pxr/pxr.h"
31#include "pxr/base/tf/tf.h"
32#include "pxr/base/tf/api.h"
33
34#include <cstring>
35#include <string>
36#include <map>
37#include <memory>
38#include <set>
39#include <typeindex>
40#include <type_traits>
41#include <utility>
42#include <vector>
43
44PXR_NAMESPACE_OPEN_SCOPE
45
46// Support integers.
47template <class HashState, class T>
48std::enable_if_t<std::is_integral<T>::value>
49TfHashAppend(HashState &h, T integral)
50{
51 h.Append(integral);
52}
53
54// Simple metafunction that returns an unsigned integral type given a size in
55// bytes.
56template <size_t Size> struct Tf_SizedUnsignedInt;
57template <> struct Tf_SizedUnsignedInt<1> { using type = uint8_t; };
58template <> struct Tf_SizedUnsignedInt<2> { using type = uint16_t; };
59template <> struct Tf_SizedUnsignedInt<4> { using type = uint32_t; };
60template <> struct Tf_SizedUnsignedInt<8> { using type = uint64_t; };
61
62// Support enums.
63template <class HashState, class Enum>
64std::enable_if_t<std::is_enum<Enum>::value>
65TfHashAppend(HashState &h, Enum e)
66{
67 h.Append(static_cast<std::underlying_type_t<Enum>>(e));
68}
69
70// Support floating point.
71template <class HashState, class T>
72std::enable_if_t<std::is_floating_point<T>::value>
73TfHashAppend(HashState &h, T fp)
74{
75 // We want both positive and negative zero to hash the same, so we have to
76 // check against zero here.
77 typename Tf_SizedUnsignedInt<sizeof(T)>::type intbuf = 0;
78 if (fp != static_cast<T>(0)) {
79 memcpy(&intbuf, &fp, sizeof(T));
80 }
81 h.Append(intbuf);
82}
83
84// Support std::pair.
85template <class HashState, class T, class U>
86inline void
87TfHashAppend(HashState &h, std::pair<T, U> const &p)
88{
89 h.Append(p.first);
90 h.Append(p.second);
91}
92
93// Support std::vector. std::vector<bool> specialized below.
94template <class HashState, class T>
95inline void
96TfHashAppend(HashState &h, std::vector<T> const &vec)
97{
98 static_assert(!std::is_same_v<std::remove_cv_t<T>, bool>,
99 "Unexpected usage of vector of 'bool'."
100 "Expected explicit overload.");
101 h.AppendContiguous(vec.data(), vec.size());
102}
103
104// Support std::vector<bool>.
105template <class HashState>
106inline void
107TfHashAppend(HashState &h, std::vector<bool> const &vec)
108{
109 h.Append(std::hash<std::vector<bool>>{}(vec));
110}
111
112// Support std::set.
113// NOTE: Supporting std::unordered_set is complicated because the traversal
114// order is not guaranteed
115template <class HashState, class T, class Compare>
116inline void
117TfHashAppend(HashState& h, std::set<T, Compare> const &elements)
118{
119 h.AppendRange(std::begin(elements), std::end(elements));
120}
121
122// Support std::map.
123// NOTE: Supporting std::unordered_map is complicated because the traversal
124// order is not guaranteed
125template <class HashState, class Key, class Value, class Compare>
126inline void
127TfHashAppend(HashState& h, std::map<Key, Value, Compare> const &elements)
128{
129 h.AppendRange(std::begin(elements), std::end(elements));
130}
131
132// Support for hashing std::string.
133template <class HashState>
134inline void
135TfHashAppend(HashState &h, const std::string& s)
136{
137 return h.AppendContiguous(s.c_str(), s.length());
138}
139
140// Support for hashing pointers, but we explicitly delete the version for
141// [const] char pointers. See more below.
142template <class HashState, class T>
143inline void
144TfHashAppend(HashState &h, const T* ptr) {
145 return h.Append(reinterpret_cast<uintptr_t>(ptr));
146}
147
148// We refuse to hash [const] char *. You're almost certainly trying to hash the
149// pointed-to string and this will not do that (it will hash the pointer
150// itself). To hash a c-style null terminated string, you can use
151// TfHashAsCStr() to indicate the intent, or use TfHashCString. If you
152// really want to hash the pointer then use static_cast<const void*>(ptr) or
153// TfHashCharPtr.
154template <class HashState>
155inline void TfHashAppend(HashState &h, char const *ptr) = delete;
156template <class HashState>
157inline void TfHashAppend(HashState &h, char *ptr) = delete;
158
162{
163 explicit TfCStrHashWrapper(char const *cstr) : cstr(cstr) {}
164 char const *cstr;
165};
166
175TfHashAsCStr(char const *cstr)
176{
177 return TfCStrHashWrapper(cstr);
178}
179
180template <class HashState>
181inline void TfHashAppend(HashState &h, TfCStrHashWrapper hcstr)
182{
183 return h.AppendContiguous(hcstr.cstr, std::strlen(hcstr.cstr));
184}
185
186// Implementation detail: dispatch based on hash capability: Try TfHashAppend
187// first, otherwise try std::hash, followed by hash_value. We rely on a
188// combination of expression SFINAE and establishing preferred order by passing
189// a 0 constant and having the overloads take int (highest priority), long
190// (next priority) and '...' (lowest priority).
191
192// std::hash version, attempted second.
193template <class HashState, class T>
194inline auto Tf_HashImpl(HashState &h, T &&obj, long)
195 -> decltype(std::hash<typename std::decay<T>::type>()(
196 std::forward<T>(obj)), void())
197{
198 TfHashAppend(
199 h, std::hash<typename std::decay<T>::type>()(std::forward<T>(obj)));
200}
201
202// hash_value, attempted last.
203template <class HashState, class T>
204inline auto Tf_HashImpl(HashState &h, T &&obj, ...)
205 -> decltype(hash_value(std::forward<T>(obj)), void())
206{
207 TfHashAppend(h, hash_value(std::forward<T>(obj)));
208}
209
210// TfHashAppend, attempted first.
211template <class HashState, class T>
212inline auto Tf_HashImpl(HashState &h, T &&obj, int)
213 -> decltype(TfHashAppend(h, std::forward<T>(obj)), void())
214{
215 TfHashAppend(h, std::forward<T>(obj));
216}
217
218// Implementation detail, CRTP base that provides the public interface for hash
219// state implementations.
220template <class Derived>
221class Tf_HashStateAPI
222{
223public:
224 // Append several objects to the hash state.
225 template <class... Args>
226 void Append(Args &&... args) {
227 _AppendImpl(std::forward<Args>(args)...);
228 }
229
230 // Append contiguous objects to the hash state.
231 template <class T>
232 void AppendContiguous(T const *elems, size_t numElems) {
233 this->_AsDerived()._AppendContiguous(elems, numElems);
234 }
235
236 // Append a range of objects to the hash state.
237 template <class Iter>
238 void AppendRange(Iter f, Iter l) {
239 this->_AsDerived()._AppendRange(f, l);
240 }
241
242 // Return the hash code for the current state.
243 size_t GetCode() const {
244 return this->_AsDerived()._GetCode();
245 }
246
247private:
248 template <class T, class... Args>
249 void _AppendImpl(T &&obj, Args &&... rest) {
250 this->_AsDerived()._Append(std::forward<T>(obj));
251 _AppendImpl(std::forward<Args>(rest)...);
252 }
253 void _AppendImpl() const {
254 // base case intentionally empty.
255 }
256
257 Derived &_AsDerived() {
258 return *static_cast<Derived *>(this);
259 }
260
261 Derived const &_AsDerived() const {
262 return *static_cast<Derived const *>(this);
263 }
264};
265
266// Implementation detail, accumulates hashes.
267class Tf_HashState : public Tf_HashStateAPI<Tf_HashState>
268{
269private:
270 friend class Tf_HashStateAPI<Tf_HashState>;
271
272 // Go thru Tf_HashImpl for non-integers.
273 template <class T>
274 std::enable_if_t<!std::is_integral<std::decay_t<T>>::value>
275 _Append(T &&obj) {
276 Tf_HashImpl(*this, std::forward<T>(obj), 0);
277 }
278
279 // Integers bottom out here.
280 template <class T>
281 std::enable_if_t<std::is_integral<T>::value>
282 _Append(T i) {
283 if (!_didOne) {
284 _state = i;
285 _didOne = true;
286 }
287 else {
288 _state = _Combine(_state, i);
289 }
290 }
291
292 // Append contiguous objects.
293 template <class T>
294 std::enable_if_t<std::is_integral<T>::value>
295 _AppendContiguous(T const *elems, size_t numElems) {
296 _AppendBytes(reinterpret_cast<char const *>(elems),
297 numElems * sizeof(T));
298 }
299
300 // Append contiguous objects.
301 template <class T>
302 std::enable_if_t<!std::is_integral<T>::value>
303 _AppendContiguous(T const *elems, size_t numElems) {
304 while (numElems--) {
305 Append(*elems++);
306 }
307 }
308
309 // Append a range.
310 template <class Iter>
311 void _AppendRange(Iter f, Iter l) {
312 while (f != l) {
313 _Append(*f++);
314 }
315 }
316
318 TF_API void _AppendBytes(char const *bytes, size_t numBytes);
319
320 // Return the hash code for the accumulated hash state.
321 size_t _GetCode() const {
322 // This is based on Knuth's multiplicative hash for integers. The
323 // constant is the closest prime to the binary expansion of the inverse
324 // golden ratio. The best way to produce a hash table bucket index from
325 // the result is to shift the result right, since the higher order bits
326 // have the most entropy. But since we can't know the number of buckets
327 // in a table that's using this, we just reverse the byte order instead,
328 // to get the highest entropy bits into the low-order bytes.
329 return _SwapByteOrder(_state * 11400714819323198549ULL);
330 }
331
332 // This turns into a single bswap/pshufb type instruction on most compilers.
333 inline uint64_t
334 _SwapByteOrder(uint64_t val) const {
335 val =
336 ((val & 0xFF00000000000000u) >> 56u) |
337 ((val & 0x00FF000000000000u) >> 40u) |
338 ((val & 0x0000FF0000000000u) >> 24u) |
339 ((val & 0x000000FF00000000u) >> 8u) |
340 ((val & 0x00000000FF000000u) << 8u) |
341 ((val & 0x0000000000FF0000u) << 24u) |
342 ((val & 0x000000000000FF00u) << 40u) |
343 ((val & 0x00000000000000FFu) << 56u);
344 return val;
345 }
346
347 size_t _Combine(size_t x, size_t y) const {
348 // This is our hash combiner. The task is, given two hash codes x and
349 // y, compute a single hash code. One way to do this is to exclusive-or
350 // the two codes together, but this can produce bad results if they
351 // differ by some fixed amount, For example if the input codes are
352 // multiples of 32, and the two codes we're given are N and N + 32 (this
353 // happens commonly when the hashed values are memory addresses) then
354 // the resulting hash codes for successive pairs of these produces many
355 // repeating values (32, 96, 32, XXX, 32, 96, 32, YYY...). That's a lot
356 // of collisions.
357 //
358 // Instead we combine hash values by assigning numbers to the lattice
359 // points in the plane, and then treating the inputs x and y as
360 // coordinates identifying a lattice point. Then the combined hash
361 // value is just the number assigned to the lattice point. This way
362 // each unique input pair (x, y) gets a unique output hash code.
363 //
364 // We number lattice points by triangular numbers like this:
365 //
366 // X 0 1 2 3 4 5
367 // Y
368 // 0 0 2 5 9 14 20
369 // 1 1 4 8 13 19 26
370 // 2 3 7 12 18 25 33
371 // 3 6 11 17 24 32 41
372 // 4 10 16 23 31 40 50
373 // 5 15 22 30 39 49 60
374 //
375 // This takes a couple of additions and a multiplication, which is a bit
376 // more expensive than something like an exclusive or, but the quality
377 // improvement outweighs the added expense.
378 x += y;
379 return y + x * (x + 1) / 2;
380 }
381
382 size_t _state = 0;
383 bool _didOne = false;
384};
385
477class TfHash {
478public:
481 template <class T>
482 auto operator()(T &&obj) const ->
483 decltype(Tf_HashImpl(std::declval<Tf_HashState &>(),
484 std::forward<T>(obj), 0), size_t()) {
485 Tf_HashState h;
486 Tf_HashImpl(h, std::forward<T>(obj), 0);
487 return h.GetCode();
488 }
489
491 template <class... Args>
492 static size_t Combine(Args &&... args) {
493 Tf_HashState h;
494 _CombineImpl(h, std::forward<Args>(args)...);
495 return h.GetCode();
496 }
497
498private:
499 template <class HashState, class T, class... Args>
500 static void _CombineImpl(HashState &h, T &&obj, Args &&... rest) {
501 Tf_HashImpl(h, std::forward<T>(obj), 0);
502 _CombineImpl(h, std::forward<Args>(rest)...);
503 }
504
505 template <class HashState>
506 static void _CombineImpl(HashState &h) {
507 // base case does nothing
508 }
509};
510
513 size_t operator()(const char* ptr) const;
514};
515
518 size_t operator()(const char* ptr) const;
519};
520
523 bool operator()(const char* lhs, const char* rhs) const;
524};
525
526PXR_NAMESPACE_CLOSE_SCOPE
527
528#endif
A user-extensible hashing mechanism for use with runtime hash tables.
Definition: hash.h:477
static size_t Combine(Args &&... args)
Produce a hash code by combining the hash codes of several objects.
Definition: hash.h:492
auto operator()(T &&obj) const -> decltype(Tf_HashImpl(std::declval< Tf_HashState & >(), std::forward< T >(obj), 0), size_t())
Produce a hash code for obj.
Definition: hash.h:482
A structure that wraps a char pointer, indicating intent that it should be hashed as a c-style null t...
Definition: hash.h:162
A function object that compares two c-strings for equality.
Definition: hash.h:522
A hash function object that hashes null-terminated c-string content.
Definition: hash.h:517
A hash function object that hashes the address of a char pointer.
Definition: hash.h:512
TfCStrHashWrapper TfHashAsCStr(char const *cstr)
Indicate that a char pointer is intended to be hashed as a C-style null terminated string.
Definition: hash.h:175
A file containing basic constants and definitions.
size_t hash_value(const TfToken &x)
Overload hash_value for TfToken.
Definition: token.h:454