24#ifndef PXR_BASE_TF_UNICODE_UTILS_H
25#define PXR_BASE_TF_UNICODE_UTILS_H
32#include "pxr/base/tf/api.h"
39PXR_NAMESPACE_OPEN_SCOPE
65 static constexpr std::pair<uint32_t, uint32_t>
79 constexpr uint32_t AsUInt32()
const {
return _value; }
83 return left._value == right._value;
87 return left._value != right._value;
104 return static_cast<unsigned char>(value) < 128 ?
118 using iterator_category = std::forward_iterator_tag;
120 using difference_type = std::ptrdiff_t;
121 using pointer = void;
136 const std::string_view::const_iterator& it,
137 const std::string_view::const_iterator& end) : _it(it), _end(end) {
153 std::string_view::const_iterator
GetBase()
const
164 return (this->_it == rhs._it);
173 return (this->_it != rhs._it);
187 _EncodingLength increment = _GetEncodingLength();
196 auto isContinuation = [](
const char c) {
197 const auto uc =
static_cast<unsigned char>(c);
198 return (uc >=
static_cast<unsigned char>(
'\x80')) &&
199 (uc <
static_cast<unsigned char>(
'\xc0'));
201 while ((increment > 1) && !_IsPastTheEnd() && isContinuation(*_it)) {
225 return lhs._IsPastTheEnd();
228 friend bool operator==(PastTheEndSentinel lhs,
235 PastTheEndSentinel rhs)
237 return !(lhs == rhs);
239 friend bool operator!=(PastTheEndSentinel lhs,
242 return !(lhs == rhs);
246 using _EncodingLength =
unsigned char;
252 _EncodingLength _GetEncodingLength()
const
264 unsigned char x =
static_cast<unsigned char>(*_it);
269 else if ((x >= 0xc0) && (x < 0xe0))
273 else if ((x >= 0xe0) && (x < 0xf0))
277 else if ((x >= 0xf0) && (x < 0xf8))
293 TF_API uint32_t _GetCodePoint()
const;
297 bool _IsPastTheEnd()
const
302 std::string_view::const_iterator _it;
303 std::string_view::const_iterator _end;
357 inline const_iterator cbegin()
const
372 return _view.empty();
391 std::string_view _view;
436PXR_NAMESPACE_CLOSE_SCOPE
Low-level utilities for informing users of various internal and external diagnostic conditions.
Wrapper for a 32-bit code point value that can be encoded as UTF-8.
static constexpr std::pair< uint32_t, uint32_t > SurrogateRange
Values in this range (inclusive) cannot be constructed and will be replaced by the replacement code p...
constexpr TfUtf8CodePoint(uint32_t value)
Construct a UTF-8 valued code point, constrained by the maximum value and surrogate range.
static constexpr uint32_t MaximumValue
Values higher than this will be replaced with the replacement code point.
constexpr TfUtf8CodePoint()=default
Construct a code point initialized to the replacement value.
static constexpr uint32_t ReplacementValue
Code points that cannot be decoded or are outside of the valid range will be replaced with this value...
Defines an iterator over a UTF-8 encoded string that extracts unicode code point values.
bool operator!=(const TfUtf8CodePointIterator &rhs) const
Determines if two iterators are unequal.
friend bool operator==(const TfUtf8CodePointIterator &lhs, PastTheEndSentinel)
Checks if the lhs iterator is at or past the end for the underlying string_view
value_type operator*() const
Retrieves the current UTF-8 character in the sequence as its Unicode code point value.
TfUtf8CodePointIterator & operator++()
Advances the iterator logically one UTF-8 character sequence in the string.
TfUtf8CodePointIterator(const std::string_view::const_iterator &it, const std::string_view::const_iterator &end)
Constructs an iterator that can read UTF-8 character sequences from the given starting string_view it...
std::string_view::const_iterator GetBase() const
Retrieves the wrapped string iterator.
Model iteration ending when the underlying iterator's end condition has been met.
Wrapper for a UTF-8 encoded std::string_view that can be iterated over as code points instead of byte...
bool empty() const
Returns true if the underlying view is empty.
TfUtf8CodePointIterator::PastTheEndSentinel end() const
The sentinel will compare as equal to any iterator at the end of the underlying string_view
TfUtf8CodePointIterator::PastTheEndSentinel cend() const
The sentinel will compare as equal to any iterator at the end of the underlying string_view
const_iterator EndAsIterator() const
Returns an iterator of the same type as begin that identifies the end of the string.
GF_API std::ostream & operator<<(std::ostream &, const GfBBox3d &)
Output a GfBBox3d using the format [(range) matrix zeroArea].
#define TF_DEV_AXIOM(cond)
The same as TF_AXIOM, but compiled only in dev builds.
constexpr TfUtf8CodePoint TfUtf8InvalidCodePoint
The replacement code point can be used to signal that a code point could not be decoded and needed to...
TF_API bool TfIsUtf8CodePointXidContinue(uint32_t codePoint)
Determines whether the given Unicode codePoint is in the XID_Continue character class.
constexpr TfUtf8CodePoint TfUtf8CodePointFromAscii(const char value)
Constructs a TfUtf8CodePoint from an ASCII charcter (0-127).
TF_API bool TfIsUtf8CodePointXidStart(uint32_t codePoint)
Determines whether the given Unicode codePoint is in the XID_Start character class.