Loading...
Searching...
No Matches
pool.h
1//
2// Copyright 2019 Pixar
3//
4// Licensed under the Apache License, Version 2.0 (the "Apache License")
5// with the following modification; you may not use this file except in
6// compliance with the Apache License and the following modification to it:
7// Section 6. Trademarks. is deleted and replaced with:
8//
9// 6. Trademarks. This License does not grant permission to use the trade
10// names, trademarks, service marks, or product names of the Licensor
11// and its affiliates, except as required to comply with Section 4(c) of
12// the License and to reproduce the content of the NOTICE file.
13//
14// You may obtain a copy of the Apache License at
15//
16// http://www.apache.org/licenses/LICENSE-2.0
17//
18// Unless required by applicable law or agreed to in writing, software
19// distributed under the Apache License with the above modification is
20// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
21// KIND, either express or implied. See the Apache License for the specific
22// language governing permissions and limitations under the Apache License.
23//
24#ifndef PXR_USD_SDF_POOL_H
25#define PXR_USD_SDF_POOL_H
26
27#include "pxr/pxr.h"
28#include "pxr/usd/sdf/api.h"
29
31#include "pxr/base/arch/hints.h"
35
36#include <tbb/concurrent_queue.h>
37
38#include <atomic>
39#include <thread>
40
41PXR_NAMESPACE_OPEN_SCOPE
42
43// A helper struct for thread_local that uses nullptr initialization as a
44// sentinel to prevent guard variable use from being invoked after first
45// initialization.
46template <class T>
47struct Sdf_FastThreadLocalBase
48{
49 static T &Get() {
50 static thread_local T *theTPtr = nullptr;
51 if (ARCH_LIKELY(theTPtr)) {
52 return *theTPtr;
53 }
54 static thread_local T theT;
55 T *p = &theT;
56 theTPtr = p;
57 return *p;
58 }
59};
60
61// Fixed-size scalable pool allocator with 32-bit "handles" returned. Reserves
62// virtual memory in big regions. It's optimized for per-thread allocations,
63// and intended to be used for SdfPath. The Tag template argument just serves
64// as to distinguish one pool instantiation from another with otherwise
65// equivalent template arguments. The ElemSize argument is the size of an
66// allocated element in bytes. It must be at least 4 bytes. The RegionBits
67// argument determines how many contiguous "chunks" of virtual memory the pool
68// will use. A good number for this is 8, meaning we'll have at most 256
69// "regions" or "chunks" of contiguous virtual memory, each 2^24 times ElemSize
70// bytes in size. To allocate from reserved chunks, each thread acquires a span
71// to hold ElemsPerSpan elements from the range, then uses that to dole out
72// individual allocations. When freed, allocations are placed on a thread-local
73// free list, and eventually shared back for use by other threads when the free
74// list gets large.
75template <class Tag,
76 unsigned ElemSize,
77 unsigned RegionBits,
78 unsigned ElemsPerSpan=16384>
79class Sdf_Pool
80{
81 static_assert(ElemSize >= sizeof(uint32_t),
82 "ElemSize must be at least sizeof(uint32_t)");
83
84public:
85 // Number of pool elements per region.
86 static constexpr uint64_t ElemsPerRegion = 1ull << (32-RegionBits);
87
88 // Maximum index of an element in a region.
89 static constexpr uint32_t MaxIndex = ElemsPerRegion - 1;
90
91 // Mask to extract region number from a handle value.
92 static constexpr uint32_t RegionMask = ((1 << RegionBits)-1);
93
94 friend struct Handle;
95 // A Handle refers to an item in the pool. It just wraps around a uint32_t
96 // that represents the item's index and the item's region.
97 struct Handle {
98 constexpr Handle() noexcept = default;
99 constexpr Handle(std::nullptr_t) noexcept : value(0) {}
100 Handle(unsigned region, uint32_t index)
101 : value((index << RegionBits) | region) {}
102 Handle &operator=(Handle const &) = default;
103 Handle &operator=(std::nullptr_t) { return *this = Handle(); }
104 inline char *GetPtr() const noexcept {
105 ARCH_PRAGMA_PUSH
106 ARCH_PRAGMA_MAYBE_UNINITIALIZED
107 return Sdf_Pool::_GetPtr(value & RegionMask, value >> RegionBits);
108 ARCH_PRAGMA_POP
109 }
110 static inline Handle GetHandle(char const *ptr) noexcept {
111 return Sdf_Pool::_GetHandle(ptr);
112 }
113 explicit operator bool() const {
114 return value != 0;
115 }
116 inline bool operator ==(Handle const &r) const noexcept {
117 return value == r.value;
118 }
119 inline bool operator !=(Handle const &r) const noexcept {
120 return value != r.value;
121 }
122 inline bool operator <(Handle const &r) const noexcept {
123 return value < r.value;
124 }
125 inline void swap(Handle &r) noexcept {
126 std::swap(value, r.value);
127 }
128 uint32_t value = 0;
129 };
130
131private:
132 // We maintain per-thread free lists of pool items.
133 struct _FreeList {
134 inline void Pop() {
135 char *p = head.GetPtr();
136 Handle *hp = reinterpret_cast<Handle *>(p);
137 head = *hp;
138 --size;
139 }
140 inline void Push(Handle h) {
141 ++size;
142 char *p = h.GetPtr();
143 Handle *hp = reinterpret_cast<Handle *>(p);
144 *hp = head;
145 head = h;
146 }
147 Handle head;
148 size_t size = 0;
149 };
150
151 // A pool span represents a "chunk" of new pool space that threads allocate
152 // from when their free lists are empty. When both the free list and the
153 // pool span are exhausted, a thread will look for a shared free list, or
154 // will obtain a new chunk of pool space to use.
155 struct _PoolSpan {
156 size_t size() const { return endIndex - beginIndex; }
157 inline Handle Alloc() {
158 return Handle(region, beginIndex++);
159 }
160 inline bool empty() const {
161 return beginIndex == endIndex;
162 }
163 unsigned region;
164 uint32_t beginIndex;
165 uint32_t endIndex;
166 };
167
168 struct _PerThreadData {
169 // Local free-list of elems returned to the pool.
170 _FreeList freeList;
171 // Contiguous range of reserved but as-yet-unalloc'd space.
172 _PoolSpan span;
173 };
174
175 // The region state structure represents the global state of the pool. This
176 // is a pool-global structure that is used to reserve new spans of pool data
177 // by threads when needed. See the Reserve() member, and the _ReserveSpan()
178 // function that does most of the state manipulation.
179 struct _RegionState {
180 static constexpr uint32_t LockedState = ~0;
181
182 _RegionState() = default;
183 constexpr _RegionState(unsigned region, uint32_t index)
184 : _state((index << RegionBits) | region) {}
185
186 // Make a new state that reserves up to \p num elements. There must be
187 // space left remaining.
188 inline _RegionState Reserve(unsigned num) const;
189
190 static constexpr _RegionState GetInitState() {
191 return _RegionState(0, 0);
192 }
193
194 static constexpr _RegionState GetLockedState() {
195 return _RegionState(LockedState, LockedState);
196 }
197
198 constexpr bool operator==(_RegionState other) const {
199 return _state == other._state;
200 }
201
202 uint32_t GetIndex() const {
203 return _state >> RegionBits;
204 }
205
206 unsigned GetRegion() const {
207 return _state & RegionMask;
208 }
209
210 bool IsLocked() const {
211 return _state == LockedState;
212 }
213
214 // low RegionBits bits are region id, rest are index.
215 uint32_t _state;
216 };
217
218public:
219 static inline Handle Allocate();
220 static inline void Free(Handle h);
221
222private:
223
224 // Given a region id and index, form the pointer into the pool.
225 static inline char *_GetPtr(unsigned region, uint32_t index) {
226 // Suppress undefined-var-template warnings from clang; _regionStarts
227 // is expected to be instantiated in another translation unit via
228 // the SDF_INSTANTIATE_POOL macro.
229 ARCH_PRAGMA_PUSH
230 ARCH_PRAGMA_UNDEFINED_VAR_TEMPLATE
231 return _regionStarts[region] + (index * ElemSize);
232 ARCH_PRAGMA_POP
233 }
234
235 // Given a pointer into the pool, produce its corresponding Handle. Don't
236 // do this unless you really have to, it has to do a bit of a search.
237 static inline Handle _GetHandle(char const *ptr) {
238 if (ptr) {
239 for (unsigned region = 1; region != NumRegions+1; ++region) {
240 // Suppress undefined-var-template warnings from clang; _regionStarts
241 // is expected to be instantiated in another translation unit via
242 // the SDF_INSTANTIATE_POOL macro.
243 ARCH_PRAGMA_PUSH
244 ARCH_PRAGMA_UNDEFINED_VAR_TEMPLATE
245 uintptr_t start = (uintptr_t)_regionStarts[region];
246 ARCH_PRAGMA_POP
247
248 // We rely on modular arithmetic so that if ptr is less than
249 // start, the diff will be larger than ElemsPerRegion*ElemSize.
250 uintptr_t diff = (uintptr_t)ptr - start;
251 if (diff < (uintptr_t)(ElemsPerRegion*ElemSize)) {
252 return Handle(
253 region, static_cast<uint32_t>(diff / ElemSize));
254 }
255 }
256 }
257 return nullptr;
258 }
259
260 // Try to take a shared free list.
261 static bool _TakeSharedFreeList(_FreeList &out) {
262 return _sharedFreeLists->try_pop(out);
263 }
264
265 // Give a free list to be shared by other threads.
266 static void _ShareFreeList(_FreeList &in) {
267 _sharedFreeLists->push(in);
268 in = {};
269 }
270
271 // Reserve a new span of pool space.
272 static inline void _ReserveSpan(_PoolSpan &out);
273
274 static constexpr int NumRegions = 1 << RegionBits;
275
276 using _ThreadData = Sdf_FastThreadLocalBase<_PerThreadData>;
277 SDF_API static _ThreadData _threadData;
278 SDF_API static char *_regionStarts[NumRegions+1];
279 SDF_API static std::atomic<_RegionState> _regionState;
280 SDF_API static TfStaticData<tbb::concurrent_queue<_FreeList>> _sharedFreeLists;
281};
282
283PXR_NAMESPACE_CLOSE_SCOPE
284
285#endif // PXR_USD_SDF_POOL_H
Low-level utilities for informing users of various internal and external diagnostic conditions.
Create or return a previously created object instance of global data.
Definition: staticData.h:113
Demangle C++ typenames generated by the typeid() facility.
Compiler hints.
STL namespace.
Pragmas for controlling compiler-specific behaviors.