diff --git a/Code/Legacy/CryCommon/UnicodeBinding.h b/Code/Legacy/CryCommon/UnicodeBinding.h deleted file mode 100644 index aaefcf0f55..0000000000 --- a/Code/Legacy/CryCommon/UnicodeBinding.h +++ /dev/null @@ -1,946 +0,0 @@ -/* - * Copyright (c) Contributors to the Open 3D Engine Project. - * For complete copyright and license terms please see the LICENSE at the root of this distribution. - * - * SPDX-License-Identifier: Apache-2.0 OR MIT - * - */ - - -// Note: The utilities in this file should typically not be used directly, -// consider including UnicodeFunctions.h or UnicodeIterator.h instead. -// -// (At least) the following string types can be bound with these helper functions: -// Types Input Output Null-Terminator -// std::basic_string, std::string, std::wstring: yes yes implied by type -// QString: yes yes implied by type -// std::vector, std::list, std::deque: yes yes not present -// T[] (fixed-length buffer): yes yes guaranteed to be emitted on output, accepted on input -// T * and size_t (user-specified-size buffer): no yes guaranteed to be emitted on output -// const T * (null-terminated string): yes no expected -// const T[] (literal): yes no implied as the last item in the array -// pair of iterators over T: yes no should not be included in the range -// uint32 (single UCS code-point): yes no not present -// If some other string type is not listed, you can still use it for input easily by passing begin/end iterators. -// Note: For all types, T can be any 8-bit, 16-bit or 32-bit integral or character type. -// Further T types may be processed by explicitly passing InputEncoding and OutputEncoding. -// We never actively tested such scenario's, so no guarantees on floating and user-defined types as code-units. - - -#pragma once - -#ifndef assert -// Some tools use CRT's assert, most engine and game modules use CryAssert.h (via platform.h maybe). -// We don't want to force a choice upon all code that uses Unicode utilities, so we just assume assert is defined. -#error This header uses assert macro, please provide an applicable definition before including UnicodeXXX.h -#endif - -#include "UnicodeEncoding.h" -#include // For str(n)len and memcpy. -#include // For wcs(n)len. -#include // For size_t and ptrdiff_t. -#include // For std::iterator_traits. -#include // For std::basic_string. -#include // For std::vector. -#include // For std::list. -#include // For std::deque. -#include // ... standard type-traits (as of C++11). - -#if defined(AZ_RESTRICTED_PLATFORM) -#undef AZ_RESTRICTED_SECTION -#define UNICODEBINDING_H_SECTION_1 1 -#define UNICODEBINDING_H_SECTION_2 2 -#endif - -// Forward declare the supported types. -// Before actually instantiating a binding however, you need to have the full definition included. -// Also, this allows us to work with QChar/QString as declared names without a dependency on Qt. -namespace AZStd -{ - template - class basic_fixed_string; -} -class QChar; -class QString; - -namespace Unicode -{ - namespace Detail - { - // Import standard type traits. - // This requires C++11 compiler support. - using std::add_const; - using std::conditional; - using std::extent; - using std::integral_constant; - using std::is_arithmetic; - using std::is_array; - using std::is_base_of; - using std::is_const; - using std::is_convertible; - using std::is_integral; - using std::is_pointer; - using std::is_same; - using std::make_unsigned; - using std::remove_cv; - using std::remove_extent; - using std::remove_pointer; - - // SVoid: - // Result type will be void if T is well-formed. - // Note: This is mostly used to test the presence of member types at compile-time. - template - struct SVoid - { - typedef void type; - }; - - // SValidChar: - // Determine if T is a valid character type in the given compile-time context. - // The InferEncoding flag is set if the encoding has to be detected automatically. - // The Input flag is set if the type is used for input (and not set if the type is used for output). - template - struct SValidChar - { - typedef typename remove_cv::type BaseType; - static const bool isArithmeticType = is_arithmetic::value; - static const bool isQChar = is_same::value; - static const bool isUsable = isArithmeticType || isQChar; - static const bool isValidQualified = !is_const::value || Input; - static const bool isKnownSize = sizeof(T) == 1 || sizeof(T) == 2 || sizeof(T) == 4; - static const bool isValidInferred = isKnownSize || !InferEncoding; - static const bool value = isUsable && isValidQualified && isValidInferred; - }; - - // SPackedIterators: - // A pair of iterators over some range. - // Note: Packing iterators into a single object allows us to pass them as a single argument like all other types. - template - struct SPackedIterators - { - const T begin, end; - SPackedIterators(const T& _begin, const T& _end) - : begin(_begin) - , end(_end) {} - }; - - // SPackedBuffer: - // A buffer-pointer/length tuple. - // Note: Packing them into a single object allows us to pass them as a single argument like all other types. - template - struct SPackedBuffer - { - T buffer; - size_t size; - SPackedBuffer(T _buffer, size_t _size) - : buffer(_buffer) - , size(_size) {} - }; - - // SDependentType: - // Makes the name of type T dependent on X (which is otherwise meaningless). - // Note: This is used to force two-phase lookup so we don't need the definition of T until instantiation. - // This way we can convince standards-compliant compilers Clang and GCC to not require definition of forward-declared types. - // Specifically, we forward-declare Qt's QString and QChar, for which the definition will never be available outside Editor. - template - struct SDependentType - { - typedef T type; - }; - - // EBind: - // Methods of binding a type for input and/or output. - // Note: These are used for tag-dispatch by binding functions, and are private to the implementation. - enum EBind - { // Input Output Description - eBind_Impossible, // No No Can't bind this type. - eBind_Iterators, // Yes Yes Bind by using begin() and end() member functions. - eBind_Data, // Yes Yes Bind by using data() and size() member functions. - eBind_Literal, // Yes No Bind a fixed size buffer (const element, aka string literal). - eBind_Buffer, // Yes No Bind a fixed size buffer (non-const element) that may be null-terminated. - eBind_PackedBuffer, // No Yes Bind a user-specified size buffer (non-const element). - eBind_NullTerminated, // Yes No Bind a null-terminated buffer of unknown length (C string). - eBind_CodePoint, // Yes No Bind a single code-point value. - }; - - // SBindIterator: - // Find the EBind for input from iterator pair of type T at compile-time. - // If the type is not supported, the resulting value will be eBind_Impossible - template - struct SBindIterator - { - typedef const void CharType; - static const EBind value = eBind_Impossible; - }; - template - struct SBindIterator - { - typedef typename add_const::type CharType; - static const bool isValid = SValidChar::value; - static const EBind value = isValid ? eBind_Iterators : eBind_Impossible; - }; - template - struct SBindIterator::type, - typename SVoid::type - > - { - typedef typename add_const::type CharType; - typedef typename T::iterator_category IteratorCategory; - static const bool isInputIterator = is_base_of::value; - static const bool isValid = SValidChar::value; - static const EBind value = isValid && isInputIterator ? eBind_Iterators : eBind_Impossible; - }; - - // SBindObject: - // Find the EBind for input from object of type T at compile-time. - // If the type is not supported, the resulting value will be eBind_Impossible. - template - struct SBindObject - { - typedef typename add_const< - typename conditional< - is_array::value, - typename remove_extent::type, - typename remove_pointer::type - >::type - >::type CharType; - static const size_t FixedSize = extent::value; - COMPILE_TIME_ASSERT(!is_array::value || FixedSize > 0); - static const bool isConstArray = is_array::value && is_const::type>::value; - static const bool isBufferArray = is_array::value && !isConstArray; - static const bool isPointer = is_pointer::value; - static const bool isCodePoint = is_integral::value; - static const bool isValidChar = SValidChar::value; - static const EBind value = - !isValidChar ? eBind_Impossible : - isConstArray ? eBind_Literal : - isBufferArray ? eBind_Buffer : - isPointer ? eBind_NullTerminated : - isCodePoint ? eBind_CodePoint : - eBind_Impossible; - }; - template - struct SBindObject, InferEncoding> - { - typedef typename add_const::type CharType; - static const bool isValid = SValidChar::value; - static const EBind value = isValid ? eBind_Data : eBind_Impossible; - }; - template - struct SBindObject, InferEncoding> - { - typedef typename add_const::type CharType; - static const bool isValid = SValidChar::value; - static const EBind value = isValid ? eBind_Data : eBind_Impossible; - }; - template - struct SBindObject, InferEncoding> - { - typedef typename add_const::type CharType; - static const bool isValid = SValidChar::value; - static const EBind value = isValid ? eBind_Iterators : eBind_Impossible; - }; - template - struct SBindObject, InferEncoding> - { - typedef typename add_const::type CharType; - static const bool isValid = SValidChar::value; - static const EBind value = isValid ? eBind_Iterators : eBind_Impossible; - }; - template - struct SBindObject, InferEncoding> - { - typedef typename add_const::type CharType; - static const bool isValid = SValidChar::value; - static const EBind value = isValid ? eBind_Data : eBind_Impossible; - }; - template - struct SBindObject, InferEncoding> - { - typedef wchar_t CharType; - static const bool isValid = SValidChar::value; - static const EBind value = isValid ? eBind_Data : eBind_Impossible; - }; - template - struct SBindObject - { - typedef const QChar CharType; - static const EBind value = eBind_Data; - }; - template - struct SBindObject, InferEncoding> - { - typedef typename SBindIterator::CharType CharType; - static const EBind value = eBind_Iterators; - }; - - // SBindOutput: - // Find the EBind for output to object of type T at compile-time. - // If the type is not supported, the resulting value will be eBind_Impossible. - template - struct SBindOutput - { - typedef typename remove_extent::type CharType; - static const size_t FixedSize = extent::value; - static const bool isArray = is_array::value; - static const bool isValid = SValidChar::type, InferEncoding, false>::value; - static const EBind value = isArray && isValid ? eBind_Buffer : eBind_Impossible; - }; - template - struct SBindOutput, InferEncoding> - { - typedef OutputCharType CharType; - static const bool isValid = SValidChar::value; - static const EBind value = isValid ? eBind_PackedBuffer : eBind_Impossible; - }; - template - struct SBindOutput, InferEncoding> - { - typedef CharT CharType; - static const bool isValid = SValidChar::value; - static const EBind value = isValid ? eBind_Data : eBind_Impossible; - }; - template - struct SBindOutput, InferEncoding> - { - typedef T CharType; - static const bool isValid = SValidChar::value; - static const EBind value = isValid ? eBind_Data : eBind_Impossible; - }; - template - struct SBindOutput, InferEncoding> - { - typedef T CharType; - static const bool isValid = SValidChar::value; - static const EBind value = isValid ? eBind_Iterators : eBind_Impossible; - }; - template - struct SBindOutput, InferEncoding> - { - typedef T CharType; - static const bool isValid = SValidChar::value; - static const EBind value = isValid ? eBind_Iterators : eBind_Impossible; - }; - template - struct SBindOutput, InferEncoding> - { - typedef T CharType; - static const bool isValid = SValidChar::value; - static const EBind value = isValid ? eBind_Data : eBind_Impossible; - }; - template - struct SBindOutput - { - typedef QChar CharType; - static const EBind value = eBind_Data; - }; - - // SInferEncoding: - // Infers the encoding of the given character type. - // Note: This will always pick an UTF encoding type based on the size of the element type. - template - struct SInferEncoding - { - typedef SBindObject ObjectType; - typedef SBindIterator IteratorType; - typedef typename conditional< - IteratorType::value != eBind_Impossible, - typename IteratorType::CharType, - typename ObjectType::CharType - >::type CharType; - static const EEncoding value = - sizeof(CharType) == 1 ? eEncoding_UTF8 : - sizeof(CharType) == 2 ? eEncoding_UTF16 : - eEncoding_UTF32; - COMPILE_TIME_ASSERT(value != eEncoding_UTF32 || sizeof(CharType) == 4); - }; - - // SBindCharacter: - // Pick the base character type to use during input or output with this element type. - template::value, bool IsQChar = is_same::type>::value> - struct SBindCharacter - { - typedef typename make_unsigned::type BaseType; // The standard doesn't define if a character type is signed or unsigned. - typedef typename remove_cv::type UnqualifiedType; - typedef typename conditional::type type; - }; - template - struct SBindCharacter - { - COMPILE_TIME_ASSERT(is_arithmetic::value); - typedef typename remove_cv::type UnqualifiedType; - typedef typename conditional::type type; - }; - template - struct SBindCharacter - { - typedef typename conditional::type type; - typedef typename SDependentType::type ActuallyQChar; // Force two-phase name lookup on QChar. - COMPILE_TIME_ASSERT(sizeof(ActuallyQChar) == sizeof(type)); // In case Qt ever changes QChar. - }; - - // SBindPointer: - // Pick the pointer type to use during input or output with buffers (potentially inside string types). - template - struct SBindPointer - { - COMPILE_TIME_ASSERT(is_pointer::value || is_array::value); - typedef typename conditional< - is_pointer::value, - typename remove_pointer::type, - typename remove_extent::type - >::type UnboundCharType; - typedef typename SBindCharacter::type BoundCharType; - typedef BoundCharType* type; - }; - - // SAutomaticallyDeduced: - // Placeholder type that is never defined, used by SRequire for SFINAE overloading. - struct SAutomaticallyDeduced; - - // SRequire: - // Helper for SFINAE overloading. - // Similar to C++11's std::enable_if, which is not in boost (with that exact name anyway). - template - struct SRequire - { - typedef T type; - }; - template - struct SRequire {}; - - // SafeCast: - // Cast a pointer to type T, but only allowing safe casts. - // This guards against bad code in other functions since it prevents unintended casts. - template - inline T SafeCast(SourceChar* ptr, typename SRequire::value>::type* = 0) - { - // Allow casts from pointer-to-integral to unrelated pointer-to-integral, provided they are of the same size. - typedef typename remove_pointer::type TargetChar; - COMPILE_TIME_ASSERT(is_integral::value && is_integral::value); - COMPILE_TIME_ASSERT(sizeof(SourceChar) == sizeof(TargetChar)); - return reinterpret_cast(ptr); - } - template - inline T SafeCast(SourceChar* ptr, typename SRequire::type, QChar>::value>::type* = 0) - { - // Allow casts from pointer-to-QChar to unrelated pointer-to-integral, provided they are of the same size. - typedef typename remove_pointer::type TargetChar; - COMPILE_TIME_ASSERT(is_integral::value); - COMPILE_TIME_ASSERT(sizeof(SourceChar) == sizeof(TargetChar)); - return reinterpret_cast(ptr); - } - template - inline T SafeCast(SourceChar* ptr, typename SRequire::value&& !is_same::type, QChar>::value>::type* = 0) - { - // Any other casts that are allowed by C++. - return static_cast(ptr); - } - - // SCharacterTrait: - // Exposes some basic traits for a given character. - // Note: Map to (hopefully optimized) CRT functions where possible. - template::value> - struct SCharacterTrait - { - static size_t StrLen(const T* nts) // Fall-back strlen. - { - size_t result = 0; - while (*nts != 0) - { - ++nts; - ++result; - } - return result; - } - static size_t StrNLen(const T* ptr, size_t len) // Fall-back strnlen. - { - size_t result = 0; - while (*ptr != 0 && result != len) - { - ++ptr; - ++result; - } - return result; - } - }; - template - struct SCharacterTrait - { - static size_t StrLen(const T* nts) // Narrow CRT strlen. - { - return ::strlen(SafeCast(nts)); - } - static size_t StrNLen(const T* ptr, size_t len) // Narrow CRT strnlen. - { - return ::strnlen(SafeCast(ptr), len); - } - }; - template - struct SCharacterTrait - { - static size_t StrLen(const T* nts) // Wide CRT strlen. - { - return ::wcslen(SafeCast(nts)); - } - static size_t StrNLen(const T* ptr, size_t len) // Wide CRT strnlen. - { -#if defined(AZ_RESTRICTED_PLATFORM) - #define AZ_RESTRICTED_SECTION UNICODEBINDING_H_SECTION_1 - #include AZ_RESTRICTED_FILE(UnicodeBinding_h) -#endif - return ::wcsnlen(SafeCast(ptr), len); -#if defined(AZ_RESTRICTED_PLATFORM) - #define AZ_RESTRICTED_SECTION UNICODEBINDING_H_SECTION_2 - #include AZ_RESTRICTED_FILE(UnicodeBinding_h) -#endif - } - }; - - // void Feed(const SPackedIterators &its, Sink &out, tag): - // Feeds the provided sink from provided packed iterator-range. - template - inline void Feed(const SPackedIterators& its, Sink& out, integral_constant) - { - typedef typename std::iterator_traits::value_type UnboundCharType; - typedef typename SBindCharacter::type BoundCharType; - for (InputIteratorType it = its.begin; it != its.end; ++it) - { - const UnboundCharType unbound = *it; - const BoundCharType bound = static_cast(unbound); - const uint32 item = static_cast(bound); - out(item); - } - } - - // void Feed(const SPackedIterators &its, Sink &out, tag): - // Feeds the provided sink from provided packed pointer-range. - // This is slightly better code-generation than using generic iterators. - template - inline void Feed(const SPackedIterators& its, Sink& out, integral_constant) - { - typedef typename SBindPointer::type PointerType; - assert(reinterpret_cast(its.begin) <= reinterpret_cast(its.end) && "Invalid range specified"); - const size_t length = its.end - its.begin; - PointerType ptr = SafeCast(its.begin); - assert((ptr || !length) && "Passed a non-empty range containing a null-pointer"); - for (size_t i = 0; i < length; ++i, ++ptr) - { - const uint32 item = static_cast(*ptr); - out(item); - } - } - - // void Feed(const InputStringType &in, Sink &out, tag): - // Feeds the provided sink from a container, using it's iterators. - // Note: Dispatches to one of the packed-range overloads. - template - inline void Feed(const InputStringType& in, Sink& out, integral_constant tag) - { - typedef typename InputStringType::const_iterator IteratorType; - Detail::SPackedIterators its(in.begin(), in.end()); - Feed(its, out, tag); - } - - // void Feed(const InputStringType &in, Sink &out, tag): - // Feeds the provided sink from a string-object's buffer. - template - inline void Feed(const InputStringType& in, Sink& out, integral_constant) - { - typedef typename InputStringType::size_type SizeType; - typedef typename InputStringType::value_type ValueType; - typedef typename SBindPointer::type PointerType; - const SizeType length = in.size(); - if (length) - { - PointerType ptr = SafeCast(in.data()); - for (SizeType i = 0; i < length; ++i, ++ptr) - { - const uint32 item = static_cast(*ptr); - out(item); - } - } - } - - // void Feed(const InputStringType &in, Sink &out, tag): - // Feeds the provided sink from a string-literal. - // Note: The literal is assumed to be null-terminated. - // It's possible that a const-element fixed-size-buffer is mistaken as a literal. - // However, we expect no-one uses such buffers that are not null-terminated already. - // If somehow this use-case is desired, either terminate the buffer, or remove const from the buffer, or pass iterators. - template - inline void Feed(const InputStringType& in, Sink& out, integral_constant) - { - COMPILE_TIME_ASSERT(is_array::value && extent::value > 0); - typedef typename SBindPointer::type PointerType; - const size_t length = extent::value - 1; - PointerType ptr = SafeCast(in); - assert(ptr[length] == 0 && "Literal is not null-terminated"); - for (size_t i = 0; i < length; ++i, ++ptr) - { - const uint32 item = static_cast(*ptr); - out(item); - } - } - - // void Feed(const InputStringType &in, Sink &out, tag): - // Feeds the provided sink from a non-const-element fixed-size buffer. - // Note: The buffer is allowed to be null-terminated, but it's not required. - template - inline void Feed(const InputStringType& in, Sink& out, integral_constant) - { - COMPILE_TIME_ASSERT(is_array::value && extent::value > 0); - typedef typename SBindPointer::type PointerType; - typedef typename SBindPointer::BoundCharType CharType; - const size_t length = extent::value; - PointerType ptr = SafeCast(in); - for (size_t i = 0; i < length; ++i, ++ptr) - { - const CharType unbound = *ptr; - if (unbound == 0) - { - break; - } - const uint32 item = static_cast(unbound); - out(item); - } - } - - // void Feed(const InputStringType &in, Sink &out, tag): - // Feeds the provided sink from a null-terminated C-style string. - template - inline void Feed(const InputStringType& in, Sink& out, integral_constant) - { - COMPILE_TIME_ASSERT(is_pointer::value); - typedef typename SBindPointer::type PointerType; - typedef typename SBindPointer::BoundCharType CharType; - PointerType ptr = SafeCast(in); - if (ptr) - { - while (true) - { - const CharType unbound = *ptr; - ++ptr; - if (unbound == 0) - { - break; - } - const uint32 item = static_cast(unbound); - out(item); - } - } - } - - // void Feed(const InputCharType &in, Sink &out, tag): - // Feeds the provided sink from a single value (interpreted as an UCS code-point). - template - inline void Feed(const InputCharType& in, Sink& out, integral_constant) - { - COMPILE_TIME_ASSERT(is_arithmetic::value); - const uint32 item = static_cast(in); - out(item); - } - - // size_t EncodedLength(const SPackedIterators &its, tag): - // Determines the length of the input sequence in a range of iterators. - template - inline size_t EncodedLength(const SPackedIterators& its, integral_constant) - { - return std::distance(its.begin, its.end); // std::distance will pick optimal implementation depending on iterator category. - } - - // size_t EncodedLength(const InputStringType &in, tag): - // Determines the length of an input container, which would otherwise be enumerated with iterators. - template - inline size_t EncodedLength(const InputStringType& in, integral_constant) - { - return in.size(); // Can there be a container without size()? At the very least, not in the supported types. - } - - // size_t EncodedLength(const InputStringType &in, tag): - // Determines the length of the input container. The container uses contiguous element layout. - template - inline size_t EncodedLength(const InputStringType& in, integral_constant) - { - return in.size(); - } - - // size_t EncodedLength(const InputStringType &in, tag): - // Determines the length of the input string-literal. This is a compile-time constant. - template - inline size_t EncodedLength(const InputStringType& in, integral_constant) - { - COMPILE_TIME_ASSERT(is_array::value && extent::value > 0); - return extent::value - 1; - } - - // size_t EncodedLength(const InputStringType &in, tag): - // Determines the length of the input fixed-size-buffer. We look for an (optional) null-terminator in the buffer. - template - inline size_t EncodedLength(const InputStringType& in, integral_constant) - { - COMPILE_TIME_ASSERT(is_array::value && extent::value > 0); - typedef typename remove_extent::type CharType; - return SCharacterTrait::StrNLen(in, extent::value); - } - - // size_t EncodedLength(const InputStringType &in, tag): - // Determines the length of the input used-specified buffer. We look for an (optional) null-terminator in the buffer. - template - inline size_t EncodedLength(const SPackedBuffer& in, integral_constant) - { - return in.buffer ? SCharacterTrait::StrNLen(in.buffer, in.size) : 0; - } - - // size_t EncodedLength(const InputStringType &in, tag): - // Determines the length of the input null-terminated c-style string. We just use strlen() if available. - template - inline size_t EncodedLength(const InputStringType& in, integral_constant) - { - COMPILE_TIME_ASSERT(is_pointer::value); - typedef typename remove_pointer::type CharType; - return in ? SCharacterTrait::StrLen(in) : 0; - } - - // size_t EncodedLength(const InputCharType &in, tag): - // Determines the length of a single UCS code-point. This is always 1. - template - inline size_t EncodedLength([[maybe_unused]] const InputCharType& in, integral_constant) - { - COMPILE_TIME_ASSERT(is_arithmetic::value); - return 1; - } - - // const void *EncodedPointer(const SPackedIterators &its, tag): - // Get a pointer to contiguous storage for an iterator range. - // Note: This can only work if the iterators are pointers, or the storage won't be guaranteed contiguous. - template - inline const void* EncodedPointer(const SPackedIterators& its, integral_constant) - { - return its.begin; - } - - // const void *EncodedPointer(const InputStringType &in, tag): - // Get a pointer to contiguous storage for string/vector object. - // Note: This can only work for containers that actually use contiguous storage, which is determined by the SBindXXX helpers. - template - inline const void* EncodedPointer(const InputStringType& in, integral_constant) - { - return in.data(); - } - - // const void *EncodedPointer(const InputStringType &in, tag): - // Get a pointer to contiguous storage for a string-literal. - template - inline const void* EncodedPointer(const InputStringType& in, integral_constant) - { - COMPILE_TIME_ASSERT(is_array::value && extent::value > 0); - return in; // We can just let the array type decay to a pointer. - } - - // const void *EncodedPointer(const InputStringType &in, tag): - // Get a pointer to contiguous storage for a fixed-size-buffer. - template - inline const void* EncodedPointer(const InputStringType& in, integral_constant) - { - COMPILE_TIME_ASSERT(is_array::value && extent::value > 0); - return in; // We can just let the array type decay to a pointer. - } - - // const void *EncodedPointer(const InputStringType &in, tag): - // Get a pointer to contiguous storage for a null-terminated c-style-string. - template - inline const void* EncodedPointer(const InputStringType& in, integral_constant) - { - COMPILE_TIME_ASSERT(is_pointer::value); - return in; // Implied - } - - // const void *EncodedPointer(const InputCharType &in, tag): - // Get a pointer to contiguous storage for a single UCS code-point. - template - inline const void* EncodedPointer(const InputCharType& in, integral_constant) - { - COMPILE_TIME_ASSERT(is_arithmetic::value); - return ∈ // Take the address of the parameter (which is kept on the stack of the caller). - } - - // SWriteSink: - // A helper that performs writing to the type T and can be passed as Sink type to a trans-coder helper. - template - struct SWriteSink; - template - struct SWriteSink - { - typedef typename T::value_type OutputCharType; - T& out; - SWriteSink(T& _out, size_t) - : out(_out) - { - if (!Append) - { - // If not appending, clear the object beforehand. - out.clear(); - } - } - void operator()(uint32 item) - { - const OutputCharType bound = static_cast(item); - out.push_back(bound); // We assume this can't fail and STL container takes care of memory. - } - void operator()(const void*, size_t); // Not implemented. - void HintSequence(uint32 length) {} // Don't care about sequences. - bool CanWrite() const { return true; } // Always writable - }; - template - struct SWriteSink - { - typedef SBindPointer BindHelper; - typedef typename BindHelper::UnboundCharType CharType; - CharType* ptr; - SWriteSink(T& out, size_t length) - { - const size_t offset = Append ? out.size() : 0; - length += offset; - out.resize(length); // resize() can't fail without exceptions, so assert instead. - assert((out.size() == length) && "Buffer resize failed (out-of-memory?)"); - const CharType* base = length ? out.data() : 0; - ptr = const_cast(base + offset); - } - void operator()(uint32 item) - { - *SafeCast(ptr) = static_cast(item); - ++ptr; - } - void operator()(const void* src, size_t length) - { - ::memcpy(ptr, src, length * sizeof(CharType)); - ptr += length; - } - void HintSequence([[maybe_unused]] uint32 length) {} // Don't care about sequences. - bool CanWrite() const { return true; } // Always writable - }; - template - struct SWriteSink, Append, eBind_PackedBuffer> - { - typedef typename remove_pointer

::type ElementType; - typedef SBindPointer BindHelper; - typedef typename BindHelper::UnboundCharType CharType; - CharType* ptr; - CharType* const terminator; - SWriteSink(CharType* _terminator) - : terminator(_terminator) {} - SWriteSink(SPackedBuffer

& out, size_t) - : terminator(out.size && out.buffer ? out.buffer + out.size - 1 : 0) - { - const size_t offset = Append - ? EncodedLength(out, integral_constant()) - : 0; - const size_t fixedOffset = Append && offset >= out.size - ? out.size - 1 // In case the buffer is already full and not terminated. - : offset; - CharType* base = static_cast(out.buffer); - ptr = terminator ? base + fixedOffset : 0; - } - ~SWriteSink() - { - if (ptr) - { - *ptr = 0; // Guarantees that the output is null-terminated. - } - } - void operator()(uint32 item) - { - if (ptr != terminator) // Guarantees we don't overflow the buffer. - { - *SafeCast(ptr) = static_cast(item); - ++ptr; - } - } - void operator()(const void* src, size_t length) - { - const size_t maxLength = terminator - ptr; - if (length > maxLength) - { - length = maxLength; - } - ::memcpy(ptr, src, length * sizeof(CharType)); - ptr += length; - } - void HintSequence(uint32 length) - { - if (terminator && (ptr + length >= terminator)) - { - // This sequence will overflow the buffer. - // In this case, we prefer to not generate any part of the sequence. - // Terminate at the current position and flag as full. - *ptr = 0; - ptr = terminator; - } - } - bool CanWrite() const - { - return terminator != ptr; - } - }; - template - struct SWriteSink // Uses above implementation with specialized constructor - : SWriteSink::type*>, Append, eBind_PackedBuffer> - { - typedef typename remove_extent::type ElementType; - typedef SWriteSink, Append, eBind_PackedBuffer> Super; - typedef SBindPointer BindHelper; - typedef typename BindHelper::UnboundCharType CharType; - SWriteSink(T& out, size_t) - : Super(out + extent::value - 1) - { - const size_t offset = Append - ? EncodedLength(out, integral_constant()) - : 0; - const size_t fixedOffset = Append && offset >= extent::value - ? extent::value - 1 // In case the buffer is already full and not terminated. - : offset; - Super::ptr = out + fixedOffset; // Qualification for Super required for two-phase lookup. - } - }; - - // SIsBlockCopyable: - // Check if block-copy optimization is possible for these types. - // InputType should be an instantiation of SBindObject or SBindIterator. - // OutputType should be an instantiation of SBindOutput. - // Note: This doesn't take into account safe/unsafe conversions, just if the underlying storage types are compatible. - template - struct SIsBlockCopyable - { - template - struct SIsContiguous - { - static const bool value = - M == eBind_Data || - M == eBind_Literal || - M == eBind_Buffer || - M == eBind_PackedBuffer || - M == eBind_NullTerminated || - M == eBind_CodePoint; - }; - template - struct SIsPointers - { - static const bool value = false; - }; - template - struct SIsPointers > - { - static const bool value = true; - }; - typedef typename SBindCharacter::type InputCharType; - typedef typename SBindCharacter::type OutputCharType; - static const bool isIntegral = is_integral::value && is_integral::value; - static const bool isSameSize = sizeof(InputCharType) == sizeof(OutputCharType); - static const bool isInputContiguous = (SIsContiguous::value || SIsPointers::value); - static const bool isOutputContiguous = (SIsContiguous::value || SIsPointers::value); - static const bool value = isIntegral && isSameSize && isInputContiguous && isOutputContiguous; - }; - } -} diff --git a/Code/Legacy/CryCommon/UnicodeEncoding.h b/Code/Legacy/CryCommon/UnicodeEncoding.h deleted file mode 100644 index a3e20b639c..0000000000 --- a/Code/Legacy/CryCommon/UnicodeEncoding.h +++ /dev/null @@ -1,767 +0,0 @@ -/* - * Copyright (c) Contributors to the Open 3D Engine Project. - * For complete copyright and license terms please see the LICENSE at the root of this distribution. - * - * SPDX-License-Identifier: Apache-2.0 OR MIT - * - */ - - -// Description : Generic Unicode encoding helpers. -// -// Defines encoding and decoding functions used by the higher-level functions. -// These are used by the various conversion functions in UnicodeFunctions.h and UnicodeIterator.h. -// Note: You can use these functions manually for low-level functionality, but we don't recommend that. -// In that case, you probably want to check inside the nested Detail namespace for the elementary bits. - - -#pragma once -#include "BaseTypes.h" // For uint8, uint16, uint32 -#include "CompileTimeAssert.h" // For COMPILE_TIME_ASSERT macro -namespace Unicode -{ - // Supported encoding/conversion types. - enum EEncoding - { - // UTF-8 encoding, see http://www.unicode.org/resources/utf8.html. - // Input and output are supported. - // Note: This format maps the entire UCS, where each code-point can take [1, 4] 8-bit code-units. - // Note: This is a strict super-set of Latin1/ISO-885901 as well as ASCII. - eEncoding_UTF8, - - // UTF-16 encoding, see http://tools.ietf.org/html/rfc2781. - // Input and output are supported. - // Note: This format maps the entire UCS, where each code-point can take [1, 2] 16-bit code-units. - eEncoding_UTF16, - - // UTF-32 encoding, see http://www.unicode.org/reports/tr17/. - // Input and output are supported. - // Note: This format maps the entire UCS, each code-point is stored in a single 32-bit code-unit. - eEncoding_UTF32, - - // ASCII encoding, see http://en.wikipedia.org/wiki/ASCII. - // Input and output are supported (any output UCS values out of supported range are mapped to question mark). - // Note: Only values [U+0000, U+007F] can be mapped. - eEncoding_ASCII, - - // Latin1, aka ISO-8859-1 encoding, see http://en.wikipedia.org/wiki/ISO/IEC_8859-1. - // Only input is supported. - // Note: This is a strict super-set of ASCII, it additionally maps [U+00A0, U+00FF]. - eEncoding_Latin1, - - // Windows ANSI codepage 1252, see http://en.wikipedia.org/wiki/Windows-1252. - // Only input is supported. - // Note: This is a strict super-set of ASCII and Latin1/ISO-8859-1, it maps some code-units from [0x80, 0x9F]. - eEncoding_Win1252, - }; - - // Methods of recovery from invalid encoded sequences. - enum EErrorRecovery - { - // No attempt to detect invalid encoding is performed, the input is assumed to be valid. - // If the input is not valid, the output is undefined (in debug, this condition will cause an assert to trigger). - eErrorRecovery_None, - - // When an invalidly encoded sequence is detected, the sequence is discarded (will not be part of the output). - // Typically used for logic/hashing purposes when the input is almost certainly valid. - eErrorRecovery_Discard, - - // When an invalidly encoded sequence is detected, the sequence is replaced with the replacement-character (U+FFFD). - // Typically used when the output sequence is used for UI display purposes. - eErrorRecovery_Replace, - - // When an invalidly encoded sequence is detected, the sequence is replaced with the eEncoding_Latin1 equivalent. - // If the sequence is also not valid Latin1 encoded, the sequence is discarded. - // Typically used when reading generic text files with 1-byte code-units. - // Note: This recovery method can only be used when decoding UTF-8. - eErrorRecovery_FallbackLatin1ThenDiscard, - - // When an invalidly encoded sequence is detected, the sequence is replaced with the eEncoding_Win1252 equivalent. - // If the sequence is also not valid codepage 1252 encoded, the sequence is discarded. - // Typically used when reading text files generated on Windows with 1-byte code-units. - // Note: This recovery method can only be used when decoding UTF-8. - eErrorRecovery_FallbackWin1252ThenDiscard, - - // When an invalidly encoded sequence is detected, the sequence is replaced with the eEncoding_Latin1 equivalent. - // If the sequence is also not valid Latin1 encoded, it is replaced with the replacement-character (U+FFFD). - // Typically used when reading generic text files with 1-byte code-units. - // Note: This recovery method can only be used when decoding UTF-8. - eErrorRecovery_FallbackLatin1ThenReplace, - - // When an invalidly encoded sequence is detected, the sequence is replaced with the eEncoding_Win1252 equivalent. - // If the sequence is also not valid codepage 1252 encoded, it is replaced with the replacement-character (U+FFFD). - // Typically used when reading text files generated on Windows with 1-byte code-units. - // Note: This recovery method can only be used when decoding UTF-8. - eErrorRecovery_FallbackWin1252ThenReplace, - }; - - namespace Detail - { - // Decode(state, unit): Decodes a single code-unit of an encoding into an UCS code-point. - // When Safe flag is set, encoding errors are detected so a fall-back encoding or other recovery method can be used. - // Interpret return value as follows: - // < 0x001FFFFF: Decoded codepoint (== return value), call again with next code-unit and clear state. - // < 0x80000000: Intermediate state returned, call again with next code-unit and the returned state. - // >= 0x80000000: Bad encoding detected, up to 16 bits (UTF-16) or 24 bits (UTF-8, last in lower bits) - // contain previous consumed values (does not happen if Safe == false). - template - inline uint32 Decode(uint32 state, uint32 unit); - - // Some constant values used when encoding/decoding. - enum - { - cDecodeShiftRemaining = 26, // Where to store the remaining count in the state. - cDecodeOneRemaining = 1 << cDecodeShiftRemaining, // Remaining value of one. - cDecodeMaskRemaining = 3 << cDecodeShiftRemaining, // All possible remaining bits that can be used. - cDecodeLeadBit = 1 << 22, // All bits up to and including this one are reserved. - cDecodeErrorBit = 1 << 31, // Set if an error occurs during decoding. - cDecodeOverlongBit = 1 << 30, // Set if overlong sequence was used. - cDecodeSurrogateBit = 1 << 29, // Set if surrogate code-point decoded in UTF-8. - cDecodeInvalidBit = 1 << 28, // Set if invalid code-point decoded (U+FFFE/FFFF). - cDecodeSuccess = 0, // Placeholder to indicate no error occurred. - cCodepointMax = 0x10FFFF, // The maximum value of an UCS code-point. - cLeadSurrogateFirst = 0xD800, // The first valid UTF-16 lead-surrogate value. - cLeadSurrogateLast = 0xDBFF, // The last valid UTF-16 lead-surrogate value. - cTrailSurrogateFirst = 0xDC00, // The first valid UTF-16 trail-surrogate value. - cTrailSurrogateLast = 0xDFFF, // The last valid UTF-16 trail-surrogate value. - cReplacementCharacter = 0xFFFD, // The default replacement character. - }; - - // Validate the UTF-8 state of a multi-byte sequence. - // The safe decoder of UTF-8 will call this function when a full potential code-point has been decoded. - // This function is (at most) called for 50% of the decoded UTF-8 code-units, but likely at much lower frequency. - inline uint32 DecodeValidate8(uint32 state) - { - uint32 errorbits = (state >> 8) | cDecodeErrorBit; - state ^= (state & 0x400000) >> 1; // For 3-byte sequences, bit 5 of the lead byte needs to be cleared. - const uint32 cp = - (state & 0x3F) | - ((state & 0x3F00) >> 2) | - ((state & 0x3F0000) >> 4) | - ((state & 0x07000000) >> 6); - if (cp <= cCodepointMax) - { - if (cp >= cLeadSurrogateFirst && cp <= cTrailSurrogateLast) - { - errorbits += cDecodeSurrogateBit; // CESU-8 encoding might have been used. - } - else - { - uint32 minval = 0x80; - minval += (0x00400000 & state) ? 0x800 - 0x80 : 0; - minval += (0x40000000 & state) ? 0x10000 - 0x80 : 0; - if (cp >= minval) - { - if ((cp & 0xFFFFFFFEU) != 0xFFFEU) - { - return cp; // Valid code-point. - } - errorbits += cDecodeInvalidBit; // Invalid character used. - } - errorbits += cDecodeOverlongBit; // Overlong encoding used. - } - } - return errorbits; - } - - // Decode UTF-8, unsafe. - template<> - inline uint32 Decode(uint32 state, uint32 unit) - { - if (state == 0) // First byte. - { - unit = unit & 0xFF; - if (unit < 0xC0) - { - return unit; // Single-unit (ASCII). - } - uint32 remaining = (unit >> 4) - 0xC; - remaining += (remaining == 0); - return (unit & 0x1F) + (remaining << cDecodeShiftRemaining); // Lead byte of multi-byte. - } - state = (state << 6) + (unit & 0x3F) + (state & cDecodeMaskRemaining) - cDecodeOneRemaining; // Apply c-byte. - return state & ~cDecodeLeadBit; // Mask off the lead bits of a 4-byte sequence. - } - - // Decode UTF-8, safe - template<> - inline uint32 Decode(uint32 state, uint32 unit) - { - if (unit <= 0xF4) // Discard out-of-range values immediately. - { - if (state == 0) // First byte. - { - if (unit < 0x80) - { - return unit; // Single-byte. - } - if (unit < 0xC2) - { - return cDecodeErrorBit; // Invalid continuation byte (or illegal 0xC0/0xC1). - } - uint32 remaining = (unit >> 4) - 0xC; - remaining += (remaining == 0); - return unit + (remaining << cDecodeShiftRemaining); // Multi-byte. - } - if ((unit & 0xC0) == 0x80) - { - const uint32 remaining = (state & cDecodeMaskRemaining) - cDecodeOneRemaining; - state = (state << 8) + unit; - if (remaining != 0) - { - return state | remaining; // Intermediate byte of a multi-byte sequence. - } - return DecodeValidate8(state); // Final byte of a multi-byte sequence. - } - } - return cDecodeErrorBit | state; - } - - // Decode UTF-16, unsafe. - template<> - inline uint32 Decode(uint32 state, uint32 unit) - { - const bool bLead = (unit >= cLeadSurrogateFirst) && (unit <= cLeadSurrogateLast); - const uint32 initial = unit + (bLead << cDecodeShiftRemaining); - const uint32 pair = 0x10000 + ((state & 0x3FF) << 10) + (unit & 0x3FF); - return state == 0 ? initial : pair; - } - - // Decode UTF-16, safe. - template<> - inline uint32 Decode(uint32 state, uint32 unit) - { - const bool bTrail = (unit >= cTrailSurrogateFirst) && (unit <= cTrailSurrogateLast); - if (state != 0 && !bTrail) - { - return cDecodeErrorBit + (state & 0xFFFF); // Lead surrogate without trail surrogate - } - uint32 result = Decode(state, unit); - bool bValid = (result & 0xFFFFFFFEU) != 0xFFFEU; - return bValid ? result : result + cDecodeErrorBit + cDecodeInvalidBit; - } - - // Decode UTF-32, unsafe. - template<> - inline uint32 Decode([[maybe_unused]] uint32 state, uint32 unit) - { - return unit; - } - - // Decode UTF-32, safe. - template<> - inline uint32 Decode([[maybe_unused]] uint32 state, uint32 unit) - { - if (unit > cCodepointMax) - { - return cDecodeErrorBit; - } - if (unit >= cLeadSurrogateFirst && unit <= cTrailSurrogateLast) - { - return cDecodeErrorBit | cDecodeSurrogateBit; - } - if ((unit & 0xFFFEU) == 0xFFFEU) - { - return cDecodeErrorBit | cDecodeInvalidBit; - } - return unit; - } - - // Decode ASCII, unsafe. - template<> - inline uint32 Decode([[maybe_unused]] uint32 state, uint32 unit) - { - return unit; - } - - // Decode ASCII, safe. - template<> - inline uint32 Decode([[maybe_unused]] uint32 state, uint32 unit) - { - if (unit > 0x7F) - { - return cDecodeErrorBit; - } - return unit; - } - - // Decode Latin1, unsafe. - template<> - inline uint32 Decode([[maybe_unused]] uint32 state, uint32 unit) - { - return unit; - } - - // Decode Latin1, safe. - template<> - inline uint32 Decode([[maybe_unused]] uint32 state, uint32 unit) - { - if ((unit >= 0x80 && unit <= 0x9F) || (unit > 0xFF)) - { - return cDecodeErrorBit; - } - return unit; - } - - // Decode Windows CP-1252, unsafe. - template<> - inline uint32 Decode([[maybe_unused]] uint32 state, uint32 unit) - { - static const uint16 cp1252[] = - { - 0x20AC, 0x0081, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021, - 0x02C6, 0x2030, 0x0160, 0x2039, 0x0152, 0x008D, 0x017D, 0x008F, - 0x0090, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, - 0x02DC, 0x2122, 0x0161, 0x203A, 0x0153, 0x009D, 0x017E, 0x0178, - }; - return (unit < 0x80 || unit > 0x9F) ? unit : cp1252[unit - 0x80]; - } - - // Decode Windows CP-1252, safe. - template<> - inline uint32 Decode(uint32 state, uint32 unit) - { - if (unit > 0xFF) - { - return cDecodeErrorBit; - } - uint32 result = Decode(state, unit); - if (!(unit < 0x80 || unit > 0x9F) && (result == unit)) - { - return cDecodeErrorBit; // Not defined in codepage 1252. - } - return result; - } - - // SBase: - // Utility to apply empty-base-optimization on type T. - // Will fall back to a member if T is a reference type. - template - struct SBase - : T - { - SBase(T base) - : T(base) {} - T& GetBase() { return *this; } - const T& GetBase() const { return *this; } - }; - template - struct SBase - { - T& base; - SBase(T& b) - : base(b) {} - T& GetBase() { return base; } - const T& GetBase() const { return base; } - }; - - // SDecoder: - // Functor to decode UCS code-points from an input range. - // Recovery functor will be invoked as a fall-back if decoding fails. - // This allows ensuring all the output is valid (even if the input isn't). - // Note: The destructor will automatically flush any remaining (erroneous) state, you can also call Finalize(). - template - struct SDecoder - : SBase - , SBase - { - uint32 state; - SDecoder(Sink sink, Recovery recovery = Recovery()) - : SBase(sink) - , SBase(recovery) - , state(0) {} - SDecoder() { Finalize(); } - Recovery& recovery() { return SBase::GetBase(); } - Sink& sink() { return SBase::GetBase(); } - void operator()(uint32 unit) - { - state = Detail::Decode(state, unit); - if (state <= 0x1FFFFF) - { - sink()(state); - state = 0; - } - else if (state & Detail::cDecodeErrorBit) - { - recovery()(sink(), state, unit); - state = 0; - } - } - void Finalize() - { - if (state) - { - recovery()(sink(), state, 0); - state = 0; - } - } - }; - - // SDecoder: - // Functor to decode to UCS code-points from an input range. - // No attempt to discover or recover from encoding errors is made, can only safely be used with known-valid input. - template - struct SDecoder - : SBase - { - uint32 state; - SDecoder(Sink sink) - : SBase(sink) - , state(0) {} - Sink& sink() { return SBase::GetBase(); } - void operator()(uint32 unit) - { - state = Detail::Decode(state, unit); - if (state <= 0x1FFFFF) - { - sink()(state); - state = 0; - } - } - void Finalize() {} - }; - - // SEncoder: - // Generic Unicode encoder functor. - // Encoding must be one an encoding type for which output is supported. - // The Sink type must have HintSequence member for UTF-8 and UTF-16 (although it may be a no-op). - // In general, you feed operator() with UCS code-points and it will emit code-units. - template - struct SEncoder - { - static const bool value = false; - }; - - // SEncoder: - // Specialization of ASCII encoder functor. - // Note: Any out-of-range character is mapped to question mark. - template - struct SEncoder - : SBase - { - static const bool value = true; - typedef uint8 value_type; - SEncoder(Sink sink) - : SBase(sink) {} - void operator()(uint32 cp) - { - cp = cp < 0x80 ? cp : (uint32)'?'; - SBase::GetBase()(value_type(cp)); - } - }; - - // SEncoder: - // Specialization of UTF-8 encoder functor. - template - struct SEncoder - : SBase - { - static const bool value = true; - typedef uint8 value_type; - SEncoder(Sink sink) - : SBase(sink) {} - Sink& sink() { return SBase::GetBase(); } - void operator()(uint32 cp) - { - if (cp < 0x80) - { - // Single byte sequence. - sink()(value_type(cp)); - } - else - { - // Expand 21-bit value to 32-bit. - uint32 bits = - (cp & 0x00003F) + - ((cp & 0x000FC0) << 2) + - ((cp & 0x03F000) << 4) + - ((cp & 0x1C0000) << 6); - - // Type of sequence. - const bool bSeq4 = (cp >= 0x10000); - const bool bSeq3 = (cp >= 0x800); - - // Mask lead-bytes and continuation-bytes. - uint32 mask = 0xEFE0C080; - mask ^= (bSeq3 << 14); - mask += (bSeq4 ? 0xA00000 : 0); - bits |= mask; - - // Length of the sequence. - const uint32 length = (uint32)bSeq4 + (uint32)bSeq3 + 1; - sink().HintSequence(length); - - // Sink the multi-byte sequence. - if (bSeq4) - { - sink()(value_type(bits >> 24)); - } - if (bSeq3) - { - sink()(value_type(bits >> 16)); - } - sink()(value_type(bits >> 8)); - sink()(value_type(bits)); - } - } - }; - - // SEncoder: - // Specialization of UTF-16 encoder functor. - template - struct SEncoder - : SBase - { - static const bool value = true; - typedef uint16 value_type; - SEncoder(Sink sink) - : SBase(sink) {} - Sink& sink() { return SBase::GetBase(); } - void operator()(uint32 cp) - { - if (cp < 0x10000) - { - // Single unit - sink()(value_type(cp)); - } - else - { - // We will generate two-element sequence - sink().HintSequence(2); - - // Surrogate pair - cp -= 0x10000; - uint32 lead = ((cp >> 10) & 0x3FF) + Detail::cLeadSurrogateFirst; - uint32 trail = (cp & 0x3FF) + Detail::cTrailSurrogateFirst; - sink()(value_type(lead)); - sink()(value_type(trail)); - } - } - }; - - // SEncoder: - // Specialization of UTF-32 encoder functor. - // Note: This is a no-op, but we want to be able to express UTF-32 just like the other encodings. - template - struct SEncoder - : SBase - { - static const bool value = true; - typedef uint32 value_type; - SEncoder(Sink sink) - : SBase(sink) {} - void operator()(uint32 cp) - { - SBase::GetBase()(value_type(cp)); - } - }; - - // SDecoder, void>: - // Specialization for unsafe no-op trans-coding. - // Since the conversion is a no-op, no need to keep any state or do any computation. - // Note: For a decoding with a fallback, this is not possible since we can't guarantee the input is valid. - template - struct SDecoder, void> - { - Sink sink; - SDecoder(Sink s) - : sink(s) {} - void operator()(uint32 unit) - { - sink(unit); - } - void Finalize() {} - }; - - // SRecoveryDiscard: - // Recovery handler that, on encoding error, discards the offending sequence. - template - struct SRecoveryDiscard - { - SRecoveryDiscard() {} - void operator()([[maybe_unused]] Sink& sink, [[maybe_unused]] uint32 error, [[maybe_unused]] uint32 unit) {} - }; - - // SRecoveryReplace: - // Recovery handler that, on encoding error, replaces the sequence with replacement-character (U+FFFD). - // Note: This implementation matches a whole invalid sequence, it could be changed to emit for every code-unit. - template - struct SRecoveryReplace - { - SRecoveryReplace() {} - void operator()(Sink& sink, uint32 error, uint32 unit) { sink(cReplacementCharacter); } - }; - - // SRecoveryFallback: - // Recovery handler that, on encoding error, falls back to another encoding. - // The fallback encoding must be stateless (ie: ASCII, Latin1 or Win1252). - // This type assumes an 8-bit primary encoding since the only viable fallback encodings are 8-bit. - template - struct SRecoveryFallback - : NextFallback - { - SRecoveryFallback() - : NextFallback() {} - void operator()(Sink& sink, uint32 error, uint32 unit) - { - SDecoder fallback(sink, *static_cast(this)); - uint8 byte1(error >> 16); - uint8 byte2(error >> 8); - uint8 byte3(error); - uint8 byte4(unit); - if (byte1) - { - fallback(byte1); - } - if (byte1 | byte2) - { - fallback(byte2); - } - if (byte1 | byte2 | byte3) - { - fallback(byte3); - } - fallback(byte4); - } - }; - - // SRecoveryFallbackHelper: - // Helper to pick a SRecoveryFallback instantiation based on RecoveryMethod. - template - struct SRecoveryFallbackHelper - { - // A compilation error here means RecoveryMethod value was unexpected here - COMPILE_TIME_ASSERT( - RecoveryMethod == eErrorRecovery_FallbackLatin1ThenDiscard || - RecoveryMethod == eErrorRecovery_FallbackLatin1ThenReplace || - RecoveryMethod == eErrorRecovery_FallbackWin1252ThenDiscard || - RecoveryMethod == eErrorRecovery_FallbackWin1252ThenReplace); - typedef SEncoder SinkType; - static const EEncoding FallbackEncoding = - RecoveryMethod == eErrorRecovery_FallbackLatin1ThenDiscard || - RecoveryMethod == eErrorRecovery_FallbackLatin1ThenReplace - ? eEncoding_Latin1 : eEncoding_Win1252; - template - struct Pick - { - typedef SRecoveryDiscard type; - }; - template - struct Pick - { - typedef SRecoveryReplace type; - }; - typedef typename Pick::type NextFallback; - typedef SRecoveryFallback RecoveryType; - typedef SDecoder FullType; - }; - - // STranscoderSelect: - // Derives a chained decoder/encoder pair that performs code-unit -> code-unit transform. - // The RecoveryMethod template parameter determines the behavior during encoding. - // This is the basic way to perform trans-coding, and is the type instantiated by the higher-level functions. - template - struct STranscoderSelect; - template - struct STranscoderSelect - : SDecoder, void> - { - typedef SDecoder, void> TranscoderType; - STranscoderSelect(Sink sink) - : TranscoderType(sink) {} - }; - template - struct STranscoderSelect - : SDecoder, SRecoveryDiscard > > - { - typedef SRecoveryDiscard > RecoveryType; - typedef SDecoder, RecoveryType> TranscoderType; - STranscoderSelect(Sink sink) - : TranscoderType(sink) {} - }; - template - struct STranscoderSelect - : SDecoder, SRecoveryReplace > > - { - typedef SRecoveryReplace > RecoveryType; - typedef SDecoder, RecoveryType> TranscoderType; - STranscoderSelect(Sink sink) - : TranscoderType(sink) {} - }; - template - struct STranscoderSelect - : SRecoveryFallbackHelper::FullType - { - static const EErrorRecovery RecoveryMethod = eErrorRecovery_FallbackLatin1ThenDiscard; - typedef typename SRecoveryFallbackHelper::RecoveryType RecoveryType; - typedef typename SRecoveryFallbackHelper::FullType TranscoderType; - STranscoderSelect(Sink sink) - : TranscoderType(sink) {} - }; - template - struct STranscoderSelect - : SRecoveryFallbackHelper::FullType - { - static const EErrorRecovery RecoveryMethod = eErrorRecovery_FallbackLatin1ThenReplace; - typedef typename SRecoveryFallbackHelper::RecoveryType RecoveryType; - typedef typename SRecoveryFallbackHelper::FullType TranscoderType; - STranscoderSelect(Sink sink) - : TranscoderType(sink) {} - }; - template - struct STranscoderSelect - : SRecoveryFallbackHelper::FullType - { - static const EErrorRecovery RecoveryMethod = eErrorRecovery_FallbackWin1252ThenDiscard; - typedef typename SRecoveryFallbackHelper::RecoveryType RecoveryType; - typedef typename SRecoveryFallbackHelper::FullType TranscoderType; - STranscoderSelect(Sink sink) - : TranscoderType(sink) {} - }; - template - struct STranscoderSelect - : SRecoveryFallbackHelper::FullType - { - static const EErrorRecovery RecoveryMethod = eErrorRecovery_FallbackWin1252ThenReplace; - typedef typename SRecoveryFallbackHelper::RecoveryType RecoveryType; - typedef typename SRecoveryFallbackHelper::FullType TranscoderType; - STranscoderSelect(Sink sink) - : TranscoderType(sink) {} - }; - - // SIsSafeEncoding: - // Check if the given recovery mode is safe. - // This is used for SFINAE checks in higher-level functions. - template - struct SIsSafeEncoding - { - static const bool value = - R == eErrorRecovery_Discard || - R == eErrorRecovery_Replace || - R == eErrorRecovery_FallbackLatin1ThenDiscard || - R == eErrorRecovery_FallbackLatin1ThenReplace || - R == eErrorRecovery_FallbackWin1252ThenDiscard || - R == eErrorRecovery_FallbackWin1252ThenReplace; - }; - - // SIsCopyableEncoding: - // Check if data in one encoding can be copied directly to another encoding. - // This is the basis for block-copy and string-assign optimizations in un-safe conversion functions. - // Note: There are more valid combinations, they are left out since those can't occur with the output encodings supported. - // Note: Only used for un-safe functions since it doesn't account for potential invalid sequences (they would be copied over). - template - struct SIsCopyableEncoding - { - static const bool value = - InputEncoding == eEncoding_ASCII || // ASCII and Latin1 values don't change in any encoding. - (InputEncoding == eEncoding_Latin1 && OutputEncoding != eEncoding_ASCII); // Except Latin1 -> ASCII is lossy. - }; - template - struct SIsCopyableEncoding - { - static const bool value = true; // If the input and output encodings are the same, then it's copyable. - }; - } -} diff --git a/Code/Legacy/CryCommon/UnicodeFunctions.h b/Code/Legacy/CryCommon/UnicodeFunctions.h deleted file mode 100644 index 48debe9706..0000000000 --- a/Code/Legacy/CryCommon/UnicodeFunctions.h +++ /dev/null @@ -1,1265 +0,0 @@ -/* - * Copyright (c) Contributors to the Open 3D Engine Project. - * For complete copyright and license terms please see the LICENSE at the root of this distribution. - * - * SPDX-License-Identifier: Apache-2.0 OR MIT - * - */ - - -// Generic Unicode string functions. -// -// Implements the following functions: -// Analyze: Reports all information on the input string, (length for all encodings, validity- and non-ASCII flags). -// Validate: Checks if the input string is valid encoded. -// Length: Reports the encoded length of some known-valid input, as-if it was encoded in the given output encoding. -// LengthSafe: Reports the encoded length of some input, as-if it was encoded in the given output encoding/recovery. -// Convert: Converts input from a known-valid string type/encoding to another string type/encoding. -// ConvertSafe: Converts and recovers encoding errors from one string type/encoding to another string type/encoding. -// Append: Appends input from a known-valid string type/encoding to another string type/encoding. -// AppendSafe: Appends and recovers encoding errors from one string type/encoding to another string type/encoding. -// -// Note: Ideally the safe functions should be used only once when accepting input from the user or from a file. -// Afterwards, the content is known-safe and the unsafe functions can be used for optimal performance. -// Using ConvertSafe once with a reasonable fall-back (depending on the where the input is from) should be the goal. -// -// Each function has several overloads: -// - One variant to handle a string object / buffer / pointer (1 arg), and one to handle iterators (2 args). -// - One variant with automatic encoding (picks UTF encoding depending on character size), and one for specific encoding. -// - One variant that returns a new string, and one that takes an existing string to overwrite (Convert(Safe) only). -// Each function takes one default argument that employs SFINAE to pick the correct overload depending on the arguments. - - -#pragma once -#include "UnicodeBinding.h" -namespace Unicode -{ - // Results of analysis of an input range of code-units (in any encoding). - // This is returned by calling Analyze() function. - struct SAnalysisResult - { - // The type to use for counting units. - // Can be changed to uint64_t for dealing with 4GB+ of string data. - typedef uint32 size_type; - - size_type inputUnits; // The number of input units analyzed. - size_type outputUnits8; // The number of output units when encoded with UTF-8. - size_type outputUnits16; // The number of output units when encoded with UTF-16. - size_type outputUnits32; // The number of output units when encoded with UTF-32 (aka number of UCS code-points). - size_type cpNonAscii; // The number of non-ASCII UCS code-points encountered. - size_type cpInvalid; // The number of invalid UCS code-point encountered (or 0xFFFFFFFF if not available). - - // Default constructor, initialize everything to zero. - SAnalysisResult() - : inputUnits(0) - , outputUnits8(0) - , outputUnits16(0) - , outputUnits32(0) - , cpInvalid(0) - , cpNonAscii(0) {} - - // Check if the input range was empty. - bool IsEmpty() const { return inputUnits == 0; } - - // Check if the input range only contained ASCII characters. - bool IsAscii() const { return cpNonAscii == 0; } - - // Check if the input range was valid (has no encoding errors). - // Note: This returns false if an unsafe decoder was used for analysis. - bool IsValid() const { return cpInvalid == 0; } - - // Get the length of the input range, in source code-units. - size_type LengthInSourceUnits() const { return inputUnits; } - - // Get the length of the input range, in UCS code-points. - size_type LengthInUCS() const { return outputUnits32; } - - // Get the length of the input range when encoded with the given encoding, in code-units. - // Note: If the encoding is not supported for output, the function returns 0. - size_type LengthInEncodingUnits(EEncoding encoding) const - { - switch (encoding) - { - case eEncoding_ASCII: - case eEncoding_UTF32: - return outputUnits32; - case eEncoding_UTF16: - return outputUnits16; - case eEncoding_UTF8: - return outputUnits8; - default: - return 0; - } - } - - // Get the length of the input range when encoded with the given encoding, in bytes. - // Note: If the encoding is not supported for output, the function returns 0. - size_type LengthInEncodingBytes(EEncoding encoding) const - { - size_type units = LengthInEncodingUnits(encoding); - switch (encoding) - { - case eEncoding_UTF32: - return units << 2; - case eEncoding_UTF16: - return units << 1; - default: - return units; - } - } - }; - - namespace Detail - { - // SDummySink: - // A sink implementation that does nothing. - struct SDummySink - { - void operator()([[maybe_unused]] uint32 unit) {} - void HintSequence([[maybe_unused]] uint32 length) {} - }; - - // SCountingSink: - // A sink that counts the number of units of output. - struct SCountingSink - { - size_t result; - - SCountingSink() - : result(0) {} - void operator()([[maybe_unused]] uint32 unit) { ++result; } - void HintSequence([[maybe_unused]] uint32 length) {} - }; - - // SAnalysisSink: - // A sink that updates analysis statistics. - struct SAnalysisSink - { - SAnalysisResult& result; - - SAnalysisSink(SAnalysisResult& _result) - : result(_result) {} - void operator()(uint32 cp) - { - const bool isCat2 = cp >= 0x80; - const bool isCat3 = cp >= 0x800; - const bool isCat4 = cp >= 0x10000; - result.outputUnits32 += 1; - result.outputUnits16 += (1 + isCat4); - result.outputUnits8 += (1 + isCat4 + isCat3 + isCat2); - result.cpNonAscii += isCat2; - } - void HintSequence([[maybe_unused]] uint32 length) {} - }; - - // SAnalysisRecovery: - // A recovery helper for analysis that counts invalid sequences. - struct SAnalysisRecovery - { - SAnalysisRecovery() {} - void operator()(SAnalysisSink& sink, [[maybe_unused]] uint32 error, [[maybe_unused]] uint32 unit) - { - sink.result.cpInvalid += 1; - } - }; - - // SValidationRecovery: - // A recovery helper for validation, it tracks if there is any invalid sequence. - struct SValidationRecovery - { - bool isValid; - - SValidationRecovery() - : isValid(true) {} - void operator()([[maybe_unused]] SDummySink& sink, [[maybe_unused]] uint32 error, [[maybe_unused]] uint32 unit) - { - isValid = false; - } - }; - - // SAnalyzer: - // Helper to perform analysis, counts the input for a given encoding. - template - struct SAnalyzer - { - SDecoder decoder; - - SAnalyzer(SAnalysisResult& result) - : decoder(result) {} - void operator()(uint32 item) - { - decoder.sink().result.inputUnits += 1; - decoder(item); - } - }; - - // Analyze(target, source): - // Analyze string and store analysis result. - // This is the generic function called by other Analyze overloads. - template - inline void Analyze(SAnalysisResult& target, const InputStringType& source) - { - // Bind methods. - const EBind bindMethod = SBindObject::value; - integral_constant tag; - - // Analyze using helper. - SAnalyzer analyzer(target); - Feed(source, analyzer, tag); - } - - // Validate(source): - // Tests that the string is valid encoding. - // This is the generic function called by other Validate overloads. - template - inline bool Validate(const InputStringType& source) - { - // Bind methods. - const EBind bindMethod = SBindObject::value; - integral_constant tag; - - // Validate using helper. - SDummySink sink; - SDecoder validator(sink); - Feed(source, validator, tag); - return validator.recovery().isValid; - } - - // Length(str): - // Find length of a string (in code-units) after trans-coding from InputEncoding to OutputEncoding. - // This is the generic function called by the other Length overloads. - template - inline size_t Length(const InputStringType& source) - { - // If this assert hits, consider using LengthSafe. - assert((Detail::Validate(source)) && "Length was used with non-safe input"); - - // Bind methods. - const EBind bindMethod = SBindObject::value; - integral_constant tag; - - // All copyable encodings have the property that the number of input encoding units equals the output units. - // In addition, this also holds for UTF-32 (always 1) -> ASCII (always 1), even though it's lossy. - const bool isCopyable = SIsCopyableEncoding::value; - const bool isCountable = isCopyable || (InputEncoding == eEncoding_UTF32 && OutputEncoding == eEncoding_ASCII); - - if (isCountable) - { - // Optimization: The number of input units is equal to the number of output units. - return EncodedLength(source, tag); - } - else - { - // We need to perform the conversion. - SCountingSink sink; - STranscoderSelect transcoder(sink); - Feed(source, transcoder, tag); - return sink.result; - } - } - - // LengthSafe(str): - // Find length of a string (in code-units) after trans-coding from InputEncoding to OutputEncoding. - // Note: The Recovery type used during conversion may influence the result, so this needs to match if you use the length information. - // This is the generic function called by the other LengthSafe overloads. - template - inline size_t LengthSafe(const InputStringType& source) - { - // SRequire a safe recovery method. - COMPILE_TIME_ASSERT(SIsSafeEncoding::value); - - // Bind methods. - const EBind bindMethod = SBindObject::value; - integral_constant tag; - - // We can't optimize here, since we cannot assume the input is validly encoded - SCountingSink sink; - STranscoderSelect transcoder(sink); - Feed(source, transcoder, tag); - return sink.result; - } - - // SBlockCopy: - // Helper for block-copying entire string at once (as an optimization) - // This optimization will effectively try to memcpy or assign the whole string at once. - // Note: We need to do some partial specialization here to find out if the optimization is valid, so we can't use a function in C++98. - template - struct SBlockCopy - { - static const EBind bindMethod = SBindObject::value; - typedef integral_constant TagType; - size_t operator()(OutputStringType& target, const InputStringType& source) - { - // Optimization: Use block copying for these types. - TagType tag; - const size_t length = EncodedLength(source, tag); - SinkType sink(target, length); - if (sink.CanWrite()) - { - const void* const dataPtr = EncodedPointer(source, tag); - sink(dataPtr, length); - } - return length; - } - }; - - // SBlockCopy: - // Specialization that will use direct string assignment. - // Note: This optimization is not selected when appending, this could be a future optimization if this is common. - // Reason for this is that the += operator is not present on all supported types (ie, std::vector) - template - struct SBlockCopy - { - size_t operator()(SameStringType& target, const SameStringType& source) - { - // Optimization: Use copy assignment. - target = source; - return source.size(); - } - }; - - // SBlockCopy: - // Fall-back specialization for Enable == false. - // Note: This specialization has to exist for the linker, but should never be called (and optimized away). - template - struct SBlockCopy - { - size_t operator()([[maybe_unused]] OutputStringType& target, [[maybe_unused]] const InputStringType& source) - { - assert(false && "Should never be called"); - return 0; - } - }; - - // Convert(target, source): - // Trans-code a string from InputEncoding to OutputEncoding. - // This is the generic function that is called by Convert and Append overloads. - // Returns the number of code-units required for full output (excluding any terminators) - template - inline size_t Convert(OutputStringType& target, const InputStringType& source) - { - // If this assert hits, consider using ConvertSafe. - assert((Detail::Validate(source)) && "Convert was used with non-safe input"); - - // Bind methods. - const EBind inputBindMethod = SBindObject::value; - const EBind outputBindMethod = SBindOutput::value; - integral_constant tag; - typedef SWriteSink SinkType; - - // Check if we can optimize this. - const bool isCopyable = SIsCopyableEncoding::value; - const bool isBlocks = SIsBlockCopyable, SBindOutput >::value; - const bool useBlockCopy = isCopyable && isBlocks; - size_t length; - if (useBlockCopy) - { - // Use optimized path. - SBlockCopy blockCopy; - length = blockCopy(target, source); - } - else - { - // We need to perform the conversion code-unit by code-unit. - length = Detail::Length(source); - SinkType sink(target, length); - if (sink.CanWrite()) - { - STranscoderSelect transcoder(sink); - Feed(source, transcoder, tag); - } - } - return length; - } - - // ConvertSafe(target, source): - // Safely trans-code a string from InputEncoding to OutputEncoding using the specified Recovery to handle encoding errors. - // This is the generic function called by ConvertSafe and AppendSafe overloads. - template - inline size_t ConvertSafe(OutputStringType& target, const InputStringType& source) - { - // SRequire a safe recovery method. - COMPILE_TIME_ASSERT(SIsSafeEncoding::value); - - // Bind methods. - const EBind inputBindMethod = SBindObject::value; - const EBind outputBindMethod = SBindOutput::value; - integral_constant tag; - typedef SWriteSink SinkType; - - // We can't optimize with block-copy here, since we cannot assume the input is validly encoded. - const size_t length = Detail::LengthSafe(source); - SinkType sink(target, length); - if (sink.CanWrite()) - { - STranscoderSelect transcoder(sink); - Feed(source, transcoder, tag); - } - return length; - } - - // SReqAutoObj: - // Require that T is usable as input object, with automatic encoding. - // This is used as a SFINAE argument for overload resolution of the main functions. - template - struct SReqAutoObj - : SRequire< - SBindObject::value != eBind_Impossible&& - SEncoder::value&& - SIsSafeEncoding::value - > {}; - - // SReqAutoIts: - // Require that T is usable as input iterator, with automatic encoding. - // This is used as a SFINAE argument for overload resolution of the main functions. - template - struct SReqAutoIts - : SRequire< - SBindIterator::value != eBind_Impossible&& - SEncoder::value&& - SIsSafeEncoding::value - > {}; - - // SReqAnyObj: - // Require that T is usable as input object, with user-specified encoding. - // This is used as a SFINAE argument for overload resolution of the main functions. - template - struct SReqAnyObj - : SRequire< - SBindObject::value != eBind_Impossible&& - SEncoder::value&& - SIsSafeEncoding::value - > {}; - - // SReqAnyIts: - // Require that T is usable as input iterator, with user-specified encoding. - // This is used as a SFINAE argument for overload resolution of the main functions. - template - struct SReqAnyIts - : SRequire< - SBindIterator::value != eBind_Impossible&& - SEncoder::value&& - SIsSafeEncoding::value - > {}; - - // SReqAutoObjOut: - // Require that I is usable as input object, and O as output object, with automatic encoding. - // This is used as a SFINAE argument for overload resolution of the main functions. - template - struct SReqAutoObjOut - : SRequire< - SBindObject::value != eBind_Impossible&& - SBindOutput, O>::type, true>::value != eBind_Impossible&& - SEncoder::value&& - SIsSafeEncoding::value - > {}; - - // SReqAutoItsOut: - // Require that I is usable as input iterator, and O as output object, with automatic encoding. - // This is used as a SFINAE argument for overload resolution of the main functions. - template - struct SReqAutoItsOut - : SRequire< - SBindIterator::value != eBind_Impossible&& - SBindOutput, O>::type, true>::value != eBind_Impossible&& - SEncoder::value&& - SIsSafeEncoding::value - > {}; - - // SReqAnyObjOut: - // Require that I is usable as input object, and O as output object, with user-specified encoding. - // This is used as a SFINAE argument for overload resolution of the main functions. - template - struct SReqAnyObjOut - : SRequire< - SBindObject::value != eBind_Impossible&& - SBindOutput, O>::type, false>::value != eBind_Impossible&& - SEncoder::value&& - SIsSafeEncoding::value - > {}; - - // SReqAnyItsOut: - // Require that I is usable as input object, and O as output object, with user-specified encoding. - // This is used as a SFINAE argument for overload resolution of the main functions. - template - struct SReqAnyItsOut - : SRequire< - SBindIterator::value != eBind_Impossible&& - SBindOutput, O>::type, false>::value != eBind_Impossible&& - SEncoder::value&& - SIsSafeEncoding::value - > {}; - } - - // SAnalysisResult Analyze(str): - // Analyze the given string with the given encoding, providing information on validity and encoding length. - template - inline SAnalysisResult Analyze(const InputStringType& str, - typename Detail::SReqAnyObj::type* = 0) - { - SAnalysisResult result; - Detail::Analyze(result, str); - return result; - } - - // SAnalysisResult Analyze(str): - // Analyze the (assumed) Unicode string input, providing information on validity and encoding length. - // The Unicode encoding is picked automatically depending on the input type. - template - inline SAnalysisResult Analyze(const InputStringType& str, - typename Detail::SReqAutoObj::type* = 0) - { - const EEncoding InputEncoding = Detail::SInferEncoding::value; - SAnalysisResult result; - Detail::Analyze(result, str); - return result; - } - - // SAnalysisResult Analyze(begin, end): - // Analyze the given range with the given encoding, providing information on validity and encoding length. - template - inline SAnalysisResult Analyze(InputIteratorType begin, InputIteratorType end, - typename Detail::SReqAnyIts::type* = 0) - { - typedef Detail::SPackedIterators InputStringType; - const InputStringType its(begin, end); - SAnalysisResult result; - Detail::Analyze(result, its); - return result; - } - - // SAnalysisResult Analyze(begin, end): - // Analyze the given (assumed) Unicode range, providing information on validity and encoding length. - // The Unicode encoding is picked automatically depending on the input type. - template - inline SAnalysisResult Analyze(InputIteratorType begin, InputIteratorType end, - typename Detail::SReqAutoIts::type* = 0) - { - const EEncoding InputEncoding = Detail::SInferEncoding::value; - typedef Detail::SPackedIterators InputStringType; - const InputStringType its(begin, end); - SAnalysisResult result; - Detail::Analyze(result, its); - return result; - } - - // bool Validate(str): - // Checks if the given string is valid in the given encoding. - template - inline bool Validate(const InputStringType& str, - typename Detail::SReqAnyObj::type* = 0) - { - return Detail::Validate(str); - } - - // bool Validate(str): - // Checks if the given string is a valid Unicode string. - // The Unicode encoding is picked automatically depending on the input type. - template - inline bool Validate(const InputStringType& str, - typename Detail::SReqAutoObj::type* = 0) - { - const EEncoding InputEncoding = Detail::SInferEncoding::value; - return Detail::Validate(str); - } - - // bool Validate(begin, end): - // Checks if the given range is valid in the given encoding. - template - inline bool Validate(InputIteratorType begin, InputIteratorType end, - typename Detail::SReqAnyIts::type* = 0) - { - typedef Detail::SPackedIterators InputStringType; - const InputStringType its(begin, end); - return Detail::Validate(its); - } - - // bool Validate(begin, end): - // Checks if the given range is valid Unicode. - // The Unicode encoding is picked automatically depending on the input type. - template - inline bool Validate(InputIteratorType begin, InputIteratorType end, - typename Detail::SReqAutoIts::type* = 0) - { - const EEncoding InputEncoding = Detail::SInferEncoding::value; - typedef Detail::SPackedIterators InputStringType; - const InputStringType its(begin, end); - return Detail::Validate(its); - } - - // size_t Length(str): - // Get the length (in OutputEncoding) of the given known-valid string with the given InputEncoding. - // Note: Length assumes the input is valid encoded, if this is not guaranteed (ie, user-input), use LengthSafe. - template - inline size_t Length(const InputStringType& str, - typename Detail::SReqAnyObj::type* = 0) - { - return Detail::Length(str); - } - - // size_t Length(str): - // Get the length (in OutputEncoding) of the given known-valid Unicode string. - // The Unicode encoding is picked automatically depending on the input type. - // Note: Length assumes the input is valid encoded, if this is not guaranteed (ie, user-input), use LengthSafe. - template - inline size_t Length(const InputStringType& str, - typename Detail::SReqAutoObj::type* = 0) - { - const EEncoding InputEncoding = Detail::SInferEncoding::value; - return Detail::Length(str); - } - - // size_t Length(begin, end): - // Get the length (in OutputEncoding) of the known-valid range with the given InputEncoding. - // Note: Length assumes the input is valid encoded, if this is not guaranteed (ie, user-input), use LengthSafe. - template - inline size_t Length(InputIteratorType begin, InputIteratorType end, - typename Detail::SReqAnyIts::type* = 0) - { - typedef Detail::SPackedIterators InputStringType; - const InputStringType its(begin, end); - return Detail::Length(its); - } - - // size_t Length(begin, end): - // Get the length (in OutputEncoding) of the known-valid Unicode range. - // The Unicode encoding is picked automatically depending on the input type. - // Note: Length assumes the input is valid encoded, if this is not guaranteed (ie, user-input), use LengthSafe. - template - inline size_t Length(InputIteratorType begin, InputIteratorType end, - typename Detail::SReqAutoIts::type* = 0) - { - const EEncoding InputEncoding = Detail::SInferEncoding::value; - typedef Detail::SPackedIterators InputStringType; - const InputStringType its(begin, end); - return Detail::Length(its); - } - - // size_t LengthSafe(str): - // Get the length (in OutputEncoding) of the given string with the given InputEncoding. - // Note: LengthSafe uses the specified Recovery parameter to fix encoding errors, if the input is known-valid, use Length. - template - inline size_t LengthSafe(const InputStringType& str, - typename Detail::SReqAnyObj::type* = 0) - { - return Detail::LengthSafe(str); - } - - // size_t LengthSafe(str): - // Get the length (in OutputEncoding) of the given Unicode string. - // The Unicode encoding is picked automatically depending on the input type. - // Note: LengthSafe uses the specified Recovery parameter to fix encoding errors, if the input is known-valid, use Length. - template - inline size_t LengthSafe(const InputStringType& str, - typename Detail::SReqAutoObj::type* = 0) - { - const EEncoding InputEncoding = Detail::SInferEncoding::value; - return Detail::LengthSafe(str); - } - - // size_t LengthSafe(begin, end): - // Get the length (in OutputEncoding) of the range with the given InputEncoding. - // Note: LengthSafe uses the specified Recovery parameter to fix encoding errors, if the input is known-valid, use Length. - template - inline size_t LengthSafe(InputIteratorType begin, InputIteratorType end, - typename Detail::SReqAnyIts::type* = 0) - { - typedef Detail::SPackedIterators InputStringType; - const InputStringType its(begin, end); - return Detail::LengthSafe(its); - } - - // size_t LengthSafe(begin, end): - // Get the length (in OutputEncoding) of the Unicode range. - // The Unicode encoding is picked automatically depending on the input type. - // Note: LengthSafe uses the specified Recovery parameter to fix encoding errors, if the input is known-valid, use Length. - template - inline size_t LengthSafe(InputIteratorType begin, InputIteratorType end, - typename Detail::SReqAutoIts::type* = 0) - { - const EEncoding InputEncoding = Detail::SInferEncoding::value; - typedef Detail::SPackedIterators InputStringType; - const InputStringType its(begin, end); - return Detail::LengthSafe(its); - } - - // OutputStringType &Convert(result, str): - // Converts the given string in the given input encoding and stores into the result string with the given output encoding. - // Note: Convert assumes the input is valid encoded, if this is not guaranteed (ie, user-input), use ConvertSafe. - template - inline OutputStringType& Convert(OutputStringType& result, const InputStringType& str, - typename Detail::SReqAnyObjOut::type* = 0) - { - Detail::Convert(result, str); - return result; - } - - // OutputStringType &Convert(result, str): - // Converts the (assumed) Unicode string input and stores into the result Unicode string. - // The Unicode encodings are picked automatically depending on the input type and output type. - // Note: Convert assumes the input is valid encoded, if this is not guaranteed (ie, user-input), use ConvertSafe. - template - inline OutputStringType& Convert(OutputStringType& result, const InputStringType& str, - typename Detail::SReqAutoObjOut::type* = 0) - { - const EEncoding InputEncoding = Detail::SInferEncoding::value; - const EEncoding OutputEncoding = Detail::SInferEncoding::value; - Detail::Convert(result, str); - return result; - } - - // OutputStringType &Convert(result, begin, end): - // Converts the given range in the given input encoding and stores into the result string with the given output encoding. - // Note: Convert assumes the input is valid encoded, if this is not guaranteed (ie, user-input), use ConvertSafe. - template - inline OutputStringType& Convert(OutputStringType& result, InputIteratorType begin, InputIteratorType end, - typename Detail::SReqAnyItsOut::type* = 0) - { - typedef Detail::SPackedIterators InputStringType; - const InputStringType its(begin, end); - Detail::Convert(result, its); - return result; - } - - // OutputStringType &Convert(result, begin, end): - // Converts the (assumed) Unicode range and stores into the result Unicode string. - // The Unicode encodings are picked automatically depending on the range type and output type. - // Note: Convert assumes the input is valid encoded, if this is not guaranteed (ie, user-input), use ConvertSafe. - template - inline OutputStringType& Convert(OutputStringType& result, InputIteratorType begin, InputIteratorType end, - typename Detail::SReqAutoItsOut::type* = 0) - { - const EEncoding InputEncoding = Detail::SInferEncoding::value; - const EEncoding OutputEncoding = Detail::SInferEncoding::value; - typedef Detail::SPackedIterators InputStringType; - const InputStringType its(begin, end); - Detail::Convert(result, its); - return result; - } - - // size_t Convert(buffer, length, str): - // Converts the given string in the given input encoding and stores into the result buffer with the given output encoding. - // Returns the required length of the output buffer, in code-units, including the null-terminator. - // Note: Convert assumes the input is valid encoded, if this is not guaranteed (ie, user-input), use ConvertSafe. - template - inline size_t Convert(OutputCharType* buffer, size_t length, const InputStringType& str, - typename Detail::SReqAnyObjOut::type* = 0) - { - typedef Detail::SPackedBuffer OutputStringType; - OutputStringType result(buffer, length); - return Detail::Convert(result, str) + 1; - } - - // size_t Convert(buffer, length, str): - // Converts the (assumed) Unicode string input and stores into the result Unicode buffer. - // The Unicode encodings are picked automatically depending on the buffer type and output type. - // Returns the required length of the output buffer, in code-units, including the null-terminator. - // Note: Convert assumes the input is valid encoded, if this is not guaranteed (ie, user-input), use ConvertSafe. - template - inline size_t Convert(OutputCharType* buffer, size_t length, const InputStringType& str, - typename Detail::SReqAutoObjOut::type* = 0) - { - const EEncoding InputEncoding = Detail::SInferEncoding::value; - const EEncoding OutputEncoding = Detail::SInferEncoding::value; - typedef Detail::SPackedBuffer OutputStringType; - OutputStringType result(buffer, length); - return Detail::Convert(result, str) + 1; - } - - // size_t Convert(buffer, length, begin, end): - // Converts the given range in the given input encoding and stores into the result buffer with the given output encoding. - // Returns the required length of the output buffer, in code-units, including the null-terminator. - // Note: Convert assumes the input is valid encoded, if this is not guaranteed (ie, user-input), use ConvertSafe. - template - inline size_t Convert(OutputCharType* buffer, size_t length, InputIteratorType begin, InputIteratorType end, - typename Detail::SReqAnyItsOut::type* = 0) - { - typedef Detail::SPackedIterators InputStringType; - typedef Detail::SPackedBuffer OutputStringType; - const InputStringType its(begin, end); - OutputStringType result(buffer, length); - return Detail::Convert(result, its) + 1; - } - - // size_t Convert(buffer, length, begin, end): - // Converts the (assumed) Unicode range and stores into the result Unicode buffer. - // The Unicode encodings are picked automatically depending on the range type and output type. - // Returns the required length of the output buffer, in code-units, including the null-terminator. - // Note: Convert assumes the input is valid encoded, if this is not guaranteed (ie, user-input), use ConvertSafe. - template - inline size_t Convert(OutputCharType* buffer, size_t length, InputIteratorType begin, InputIteratorType end, - typename Detail::SReqAutoItsOut::type* = 0) - { - const EEncoding InputEncoding = Detail::SInferEncoding::value; - const EEncoding OutputEncoding = Detail::SInferEncoding::value; - typedef Detail::SPackedIterators InputStringType; - typedef Detail::SPackedBuffer OutputStringType; - const InputStringType its(begin, end); - OutputStringType result(buffer, length); - return Detail::Convert(result, its) + 1; - } - - // OutputStringType Convert(str): - // Converts the given string in the given input encoding to a new string of the given type and output encoding. - // Note: Convert assumes the input is valid encoded, if this is not guaranteed (ie, user-input), use ConvertSafe. - template - inline OutputStringType Convert(const InputStringType& str, - typename Detail::SReqAnyObjOut::type* = 0) - { - OutputStringType result; - Detail::Convert(result, str); - return result; - } - - // OutputStringType Convert(str): - // Converts the (assumed) Unicode string input to a new Unicode string of the given type. - // The Unicode encodings are picked automatically depending on the input type and output type. - // Note: Convert assumes the input is valid encoded, if this is not guaranteed (ie, user-input), use ConvertSafe. - template - inline OutputStringType Convert(const InputStringType& str, - typename Detail::SReqAutoObjOut::type* = 0) - { - const EEncoding InputEncoding = Detail::SInferEncoding::value; - const EEncoding OutputEncoding = Detail::SInferEncoding::value; - OutputStringType result; - Detail::Convert(result, str); - return result; - } - - // OutputStringType Convert(begin, end): - // Converts the given range in the given input encoding to a new string of the given type and output encoding. - // Note: Convert assumes the input is valid encoded, if this is not guaranteed (ie, user-input), use ConvertSafe. - template - inline OutputStringType Convert(InputIteratorType begin, InputIteratorType end, - typename Detail::SReqAnyItsOut::type* = 0) - { - typedef Detail::SPackedIterators InputStringType; - const InputStringType its(begin, end); - OutputStringType result; - Detail::Convert(result, its); - return result; - } - - // OutputStringType Convert(begin, end): - // Converts the (assumed) Unicode range to a new Unicode string of the given type. - // The Unicode encodings are picked automatically depending on the range type and output type. - // Note: Convert assumes the input is valid encoded, if this is not guaranteed (ie, user-input), use ConvertSafe. - template - inline OutputStringType Convert(InputIteratorType begin, InputIteratorType end, - typename Detail::SReqAutoItsOut::type* = 0) - { - const EEncoding InputEncoding = Detail::SInferEncoding::value; - const EEncoding OutputEncoding = Detail::SInferEncoding::value; - typedef Detail::SPackedIterators InputStringType; - const InputStringType its(begin, end); - OutputStringType result; - Detail::Convert(result, its); - return result; - } - - // OutputStringType &ConvertSafe(result, str): - // Converts the given string in the given input encoding and stores into the result string with the given output encoding. - // Note: ConvertSafe uses the specified Recovery parameter to fix encoding errors, if the input is known-valid, use Convert. - template - inline OutputStringType& ConvertSafe(OutputStringType& result, const InputStringType& str, - typename Detail::SReqAnyObjOut::type* = 0) - { - Detail::ConvertSafe(result, str); - return result; - } - - // OutputStringType &ConvertSafe(result, str): - // Converts the (assumed) Unicode string input and stores into the result Unicode string. - // The Unicode encodings are picked automatically depending on the input type and output type. - // Note: ConvertSafe uses the specified Recovery parameter to fix encoding errors, if the input is known-valid, use Convert. - template - inline OutputStringType& ConvertSafe(OutputStringType& result, const InputStringType& str, - typename Detail::SReqAutoObjOut::type* = 0) - { - const EEncoding InputEncoding = Detail::SInferEncoding::value; - const EEncoding OutputEncoding = Detail::SInferEncoding::value; - Detail::ConvertSafe(result, str); - return result; - } - - // OutputStringType &ConvertSafe(result, begin, end): - // Converts the given range in the given input encoding and stores into the result string with the given output encoding. - // Note: ConvertSafe uses the specified Recovery parameter to fix encoding errors, if the input is known-valid, use Convert. - template - inline OutputStringType& ConvertSafe(OutputStringType& result, InputIteratorType begin, InputIteratorType end, - typename Detail::SReqAnyItsOut::type* = 0) - { - typedef Detail::SPackedIterators InputStringType; - const InputStringType its(begin, end); - Detail::ConvertSafe(result, its); - return result; - } - - // OutputStringType &ConvertSafe(result, begin, end): - // Converts the (assumed) Unicode range and stores into the result Unicode string. - // The Unicode encodings are picked automatically depending on the range type and output type. - // Note: ConvertSafe uses the specified Recovery parameter to fix encoding errors, if the input is known-valid, use Convert. - template - inline OutputStringType& ConvertSafe(OutputStringType& result, InputIteratorType begin, InputIteratorType end, - typename Detail::SReqAutoItsOut::type* = 0) - { - const EEncoding InputEncoding = Detail::SInferEncoding::value; - const EEncoding OutputEncoding = Detail::SInferEncoding::value; - typedef Detail::SPackedIterators InputStringType; - const InputStringType its(begin, end); - Detail::ConvertSafe(result, its); - return result; - } - - // size_t ConvertSafe(buffer, length, str): - // Converts the given string in the given input encoding and stores into the result buffer with the given output encoding. - // Returns the required length of the output buffer, in code-units, including the null-terminator. - // Note: ConvertSafe uses the specified Recovery parameter to fix encoding errors, if the input is known-valid, use Convert. - template - inline size_t ConvertSafe(OutputCharType* buffer, size_t length, const InputStringType& str, - typename Detail::SReqAnyObjOut::type* = 0) - { - typedef Detail::SPackedBuffer OutputStringType; - OutputStringType result(buffer, length); - return Detail::ConvertSafe(result, str) + 1; - } - - // size_t ConvertSafe(buffer, length, str): - // Converts the (assumed) Unicode string input and stores into the result Unicode buffer. - // The Unicode encodings are picked automatically depending on the buffer type and output type. - // Returns the required length of the output buffer, in code-units, including the null-terminator. - // Note: ConvertSafe uses the specified Recovery parameter to fix encoding errors, if the input is known-valid, use Convert. - template - inline size_t ConvertSafe(OutputCharType* buffer, size_t length, const InputStringType& str, - typename Detail::SReqAutoObjOut::type* = 0) - { - const EEncoding InputEncoding = Detail::SInferEncoding::value; - const EEncoding OutputEncoding = Detail::SInferEncoding::value; - typedef Detail::SPackedBuffer OutputStringType; - OutputStringType result(buffer, length); - return Detail::ConvertSafe(result, str) + 1; - } - - // size_t ConvertSafe(buffer, length, begin, end): - // Converts the given range in the given input encoding and stores into the result buffer with the given output encoding. - // Returns the required length of the output buffer, in code-units, including the null-terminator. - // Note: ConvertSafe uses the specified Recovery parameter to fix encoding errors, if the input is known-valid, use Convert. - template - inline size_t ConvertSafe(OutputCharType* buffer, size_t length, InputIteratorType begin, InputIteratorType end, - typename Detail::SReqAnyItsOut::type* = 0) - { - typedef Detail::SPackedIterators InputStringType; - typedef Detail::SPackedBuffer OutputStringType; - const InputStringType its(begin, end); - OutputStringType result(buffer, length); - return Detail::ConvertSafe(result, its) + 1; - } - - // size_t ConvertSafe(buffer, length, begin, end): - // Converts the (assumed) Unicode range and stores into the result Unicode buffer. - // The Unicode encodings are picked automatically depending on the range type and output type. - // Returns the required length of the output buffer, in code-units, including the null-terminator. - // Note: ConvertSafe uses the specified Recovery parameter to fix encoding errors, if the input is known-valid, use Convert. - template - inline size_t ConvertSafe(OutputCharType* buffer, size_t length, InputIteratorType begin, InputIteratorType end, - typename Detail::SReqAutoItsOut::type* = 0) - { - const EEncoding InputEncoding = Detail::SInferEncoding::value; - const EEncoding OutputEncoding = Detail::SInferEncoding::value; - typedef Detail::SPackedIterators InputStringType; - typedef Detail::SPackedBuffer OutputStringType; - const InputStringType its(begin, end); - OutputStringType result(buffer, length); - return Detail::Convert(result, its) + 1; - } - - // OutputStringType ConvertSafe(str): - // Converts the given string in the given input encoding to a new string of the given type and output encoding. - // Note: ConvertSafe uses the specified Recovery parameter to fix encoding errors, if the input is known-valid, use Convert. - template - inline OutputStringType ConvertSafe(const InputStringType& str, - typename Detail::SReqAnyObjOut::type* = 0) - { - OutputStringType result; - Detail::ConvertSafe(result, str); - return result; - } - - // OutputStringType ConvertSafe(str): - // Converts the (assumed) Unicode string input to a new Unicode string of the given type. - // The Unicode encodings are picked automatically depending on the input type and output type. - // Note: ConvertSafe uses the specified Recovery parameter to fix encoding errors, if the input is known-valid, use Convert. - template - inline OutputStringType ConvertSafe(const InputStringType& str, - typename Detail::SReqAutoObjOut::type* = 0) - { - const EEncoding InputEncoding = Detail::SInferEncoding::value; - const EEncoding OutputEncoding = Detail::SInferEncoding::value; - OutputStringType result; - Detail::ConvertSafe(result, str); - return result; - } - - // OutputStringType ConvertSafe(begin, end): - // Converts the given range in the given input encoding to a new string of the given type and output encoding. - // Note: ConvertSafe uses the specified Recovery parameter to fix encoding errors, if the input is known-valid, use Convert. - template - inline OutputStringType ConvertSafe(InputIteratorType begin, InputIteratorType end, - typename Detail::SReqAnyItsOut::type* = 0) - { - typedef Detail::SPackedIterators InputStringType; - const InputStringType its(begin, end); - OutputStringType result; - Detail::ConvertSafe(result, its); - return result; - } - - // OutputStringType ConvertSafe(begin, end): - // Converts the (assumed) Unicode range to a new Unicode string of the given type. - // The Unicode encodings are picked automatically depending on the range type and output type. - // Note: ConvertSafe uses the specified Recovery parameter to fix encoding errors, if the input is known-valid, use Convert. - template - inline OutputStringType ConvertSafe(InputIteratorType begin, InputIteratorType end, - typename Detail::SReqAutoItsOut::type* = 0) - { - const EEncoding InputEncoding = Detail::SInferEncoding::value; - const EEncoding OutputEncoding = Detail::SInferEncoding::value; - typedef Detail::SPackedIterators InputStringType; - const InputStringType its(begin, end); - OutputStringType result; - Detail::ConvertSafe(result, its); - return result; - } - - // OutputStringType &Append(result, str): - // Appends the given string in the given input encoding and stores at the end of the result string with the given output encoding. - // Note: Append assumes the input is valid encoded, if this is not guaranteed (ie, user-input), use AppendSafe. - template - inline OutputStringType& Append(OutputStringType& result, const InputStringType& str, - typename Detail::SReqAnyObjOut::type* = 0) - { - Detail::Convert(result, str); - return result; - } - - // OutputStringType &Append(result, str): - // Appends the (assumed) Unicode string input and stores at the end of the result Unicode string. - // The Unicode encodings are picked automatically depending on the input type and output type. - // Note: Append assumes the input is valid encoded, if this is not guaranteed (ie, user-input), use AppendSafe. - template - inline OutputStringType& Append(OutputStringType& result, const InputStringType& str, - typename Detail::SReqAutoObjOut::type* = 0) - { - const EEncoding InputEncoding = Detail::SInferEncoding::value; - const EEncoding OutputEncoding = Detail::SInferEncoding::value; - Detail::Convert(result, str); - return result; - } - - // OutputStringType &Append(result, begin, end): - // Appends the given range in the given input encoding and stores at the end of the result string with the given output encoding. - // Note: Append assumes the input is valid encoded, if this is not guaranteed (ie, user-input), use AppendSafe. - template - inline OutputStringType& Append(OutputStringType& result, InputIteratorType begin, InputIteratorType end, - typename Detail::SReqAnyItsOut::type* = 0) - { - typedef Detail::SPackedIterators InputStringType; - const InputStringType its(begin, end); - Detail::Convert(result, its); - return result; - } - - // OutputStringType &Append(result, begin, end): - // Appends the (assumed) Unicode range and stores at the end of the result Unicode string. - // The Unicode encodings are picked automatically depending on the range type and output type. - // Note: Append assumes the input is valid encoded, if this is not guaranteed (ie, user-input), use AppendSafe. - template - inline OutputStringType& Append(OutputStringType& result, InputIteratorType begin, InputIteratorType end, - typename Detail::SReqAutoItsOut::type* = 0) - { - const EEncoding InputEncoding = Detail::SInferEncoding::value; - const EEncoding OutputEncoding = Detail::SInferEncoding::value; - typedef Detail::SPackedIterators InputStringType; - const InputStringType its(begin, end); - Detail::Convert(result, its); - return result; - } - - // size_t Append(buffer, length, str): - // Appends the given string in the given input encoding to the result buffer with the given output encoding. - // Returns the required length of the output buffer, in code-units, including the null-terminator. - // Note: Append assumes the input is valid encoded, if this is not guaranteed (ie, user-input), use AppendSafe. - template - inline size_t Append(OutputCharType* buffer, size_t length, const InputStringType& str, - typename Detail::SReqAnyObjOut::type* = 0) - { - typedef Detail::SPackedBuffer OutputStringType; - OutputStringType result(buffer, length); - return Detail::Convert(result, str) + 1; - } - - // size_t Append(buffer, length, str): - // Appends the (assumed) Unicode string input to the result Unicode buffer. - // The Unicode encodings are picked automatically depending on the buffer type and output type. - // Returns the required length of the output buffer, in code-units, including the null-terminator. - // Note: Append assumes the input is valid encoded, if this is not guaranteed (ie, user-input), use AppendSafe. - template - inline size_t Append(OutputCharType* buffer, size_t length, const InputStringType& str, - typename Detail::SReqAutoObjOut::type* = 0) - { - const EEncoding InputEncoding = Detail::SInferEncoding::value; - const EEncoding OutputEncoding = Detail::SInferEncoding::value; - typedef Detail::SPackedBuffer OutputStringType; - OutputStringType result(buffer, length); - return Detail::Convert(result, str) + 1; - } - - // size_t Append(buffer, length, begin, end): - // Appends the given range in the given input encoding to the result buffer with the given output encoding. - // Returns the required length of the output buffer, in code-units, including the null-terminator. - // Note: Append assumes the input is valid encoded, if this is not guaranteed (ie, user-input), use AppendSafe. - template - inline size_t Append(OutputCharType* buffer, size_t length, InputIteratorType begin, InputIteratorType end, - typename Detail::SReqAnyItsOut::type* = 0) - { - typedef Detail::SPackedIterators InputStringType; - typedef Detail::SPackedBuffer OutputStringType; - const InputStringType its(begin, end); - OutputStringType result(buffer, length); - return Detail::Convert(result, its) + 1; - } - - // size_t Append(buffer, length, begin, end): - // Appends the (assumed) Unicode range to the result Unicode buffer. - // The Unicode encodings are picked automatically depending on the range type and output type. - // Returns the required length of the output buffer, in code-units, including the null-terminator. - // Note: Append assumes the input is valid encoded, if this is not guaranteed (ie, user-input), use AppendSafe. - template - inline size_t Append(OutputCharType* buffer, size_t length, InputIteratorType begin, InputIteratorType end, - typename Detail::SReqAutoItsOut::type* = 0) - { - const EEncoding InputEncoding = Detail::SInferEncoding::value; - const EEncoding OutputEncoding = Detail::SInferEncoding::value; - typedef Detail::SPackedIterators InputStringType; - typedef Detail::SPackedBuffer OutputStringType; - const InputStringType its(begin, end); - OutputStringType result(buffer, length); - return Detail::Convert(result, its) + 1; - } - - // OutputStringType &AppendSafe(result, str): - // Appends the given string in the given input encoding and stores at the end of the result string with the given output encoding. - // Note: AppendSafe uses the specified Recovery parameter to fix encoding errors, if the input is known-valid, use Append. - template - inline OutputStringType& AppendSafe(OutputStringType& result, const InputStringType& str, - typename Detail::SReqAnyObjOut::type* = 0) - { - Detail::ConvertSafe(result, str); - return result; - } - - // OutputStringType &AppendSafe(result, str): - // Appends the (assumed) Unicode string input and stores at the end of the result Unicode string. - // The Unicode encodings are picked automatically depending on the input type and output type. - // Note: AppendSafe uses the specified Recovery parameter to fix encoding errors, if the input is known-valid, use Append. - template - inline OutputStringType& AppendSafe(OutputStringType& result, const InputStringType& str, - typename Detail::SReqAutoObjOut::type* = 0) - { - const EEncoding InputEncoding = Detail::SInferEncoding::value; - const EEncoding OutputEncoding = Detail::SInferEncoding::value; - Detail::ConvertSafe(result, str); - return result; - } - - // OutputStringType &AppendSafe(result, begin, end): - // Appends the given range in the given input encoding and stores at the end of the result string with the given output encoding. - // Note: AppendSafe uses the specified Recovery parameter to fix encoding errors, if the input is known-valid, use Append. - template - inline OutputStringType& AppendSafe(OutputStringType& result, InputIteratorType begin, InputIteratorType end, - typename Detail::SReqAnyItsOut::type* = 0) - { - typedef Detail::SPackedIterators InputStringType; - const InputStringType its(begin, end); - Detail::ConvertSafe(result, its); - return result; - } - - // OutputStringType &AppendSafe(result, begin, end): - // Appends the (assumed) Unicode range and stores at the end of the result Unicode string. - // The Unicode encodings are picked automatically depending on the range type and output type. - // Note: AppendSafe uses the specified Recovery parameter to fix encoding errors, if the input is known-valid, use Append. - template - inline OutputStringType& AppendSafe(OutputStringType& result, InputIteratorType begin, InputIteratorType end, - typename Detail::SReqAutoItsOut::type* = 0) - { - const EEncoding InputEncoding = Detail::SInferEncoding::value; - const EEncoding OutputEncoding = Detail::SInferEncoding::value; - typedef Detail::SPackedIterators InputStringType; - const InputStringType its(begin, end); - Detail::ConvertSafe(result, its); - return result; - } - - // size_t AppendSafe(buffer, length, str): - // Appends the given string in the given input encoding to the result buffer with the given output encoding. - // Returns the required length of the output buffer, in code-units, including the null-terminator. - // Note: AppendSafe uses the specified Recovery parameter to fix encoding errors, if the input is known-valid, use Append. - template - inline size_t AppendSafe(OutputCharType* buffer, size_t length, const InputStringType& str, - typename Detail::SReqAnyObjOut::type* = 0) - { - typedef Detail::SPackedBuffer OutputStringType; - OutputStringType result(buffer, length); - return Detail::ConvertSafe(result, str) + 1; - } - - // size_t AppendSafe(buffer, length, str): - // Appends the (assumed) Unicode string input to the result Unicode buffer. - // The Unicode encodings are picked automatically depending on the buffer type and output type. - // Returns the required length of the output buffer, in code-units, including the null-terminator. - // Note: AppendSafe uses the specified Recovery parameter to fix encoding errors, if the input is known-valid, use Append. - template - inline size_t AppendSafe(OutputCharType* buffer, size_t length, const InputStringType& str, - typename Detail::SReqAutoObjOut::type* = 0) - { - const EEncoding InputEncoding = Detail::SInferEncoding::value; - const EEncoding OutputEncoding = Detail::SInferEncoding::value; - typedef Detail::SPackedBuffer OutputStringType; - OutputStringType result(buffer, length); - return Detail::ConvertSafe(result, str) + 1; - } - - // size_t AppendSafe(buffer, length, begin, end): - // Appends the given range in the given input encoding to the result buffer with the given output encoding. - // Returns the required length of the output buffer, in code-units, including the null-terminator. - // Note: AppendSafe uses the specified Recovery parameter to fix encoding errors, if the input is known-valid, use Append. - template - inline size_t AppendSafe(OutputCharType* buffer, size_t length, InputIteratorType begin, InputIteratorType end, - typename Detail::SReqAnyItsOut::type* = 0) - { - typedef Detail::SPackedIterators InputStringType; - typedef Detail::SPackedBuffer OutputStringType; - const InputStringType its(begin, end); - OutputStringType result(buffer, length); - return Detail::ConvertSafe(result, its) + 1; - } - - // size_t AppendSafe(buffer, length, begin, end): - // Appends the (assumed) Unicode range to the result Unicode buffer. - // The Unicode encodings are picked automatically depending on the range type and output type. - // Returns the required length of the output buffer, in code-units, including the null-terminator. - // Note: AppendSafe uses the specified Recovery parameter to fix encoding errors, if the input is known-valid, use Append. - template - inline size_t AppendSafe(OutputCharType* buffer, size_t length, InputIteratorType begin, InputIteratorType end, - typename Detail::SReqAutoItsOut::type* = 0) - { - const EEncoding InputEncoding = Detail::SInferEncoding::value; - const EEncoding OutputEncoding = Detail::SInferEncoding::value; - typedef Detail::SPackedIterators InputStringType; - typedef Detail::SPackedBuffer OutputStringType; - const InputStringType its(begin, end); - OutputStringType result(buffer, length); - return Detail::Convert(result, its) + 1; - } -} diff --git a/Code/Legacy/CryCommon/UnicodeIterator.h b/Code/Legacy/CryCommon/UnicodeIterator.h deleted file mode 100644 index d939eaa721..0000000000 --- a/Code/Legacy/CryCommon/UnicodeIterator.h +++ /dev/null @@ -1,615 +0,0 @@ -/* - * Copyright (c) Contributors to the Open 3D Engine Project. - * For complete copyright and license terms please see the LICENSE at the root of this distribution. - * - * SPDX-License-Identifier: Apache-2.0 OR MIT - * - */ - - -// Description : Encoded Unicode sequence iteration. -// -// For lower level accessing of encoded text, an STL compatible iterator wrapper is provided. -// This iterator will decode the underlying sequence, abstracting it to a sequence of UCS code-points. -// Using the iterator wrapper, you can find where in an encoded string code-points (or encoding errors) are located. -// Note: The iterator is an input-only iterator, you cannot write to the underlying sequence. - - -#pragma once -#include "UnicodeBinding.h" -namespace Unicode -{ - namespace Detail - { - // MoveNext(it, checker, tag): - // Moves the iterator to the next UCS code-point in the encoded sequence. - // Non-specialized version (for 1:1 code-unit to code-point). - template - inline void MoveNext(BaseIterator& it, const BoundsChecker& checker, const integral_constant) - { - COMPILE_TIME_ASSERT( - Encoding == eEncoding_ASCII || - Encoding == eEncoding_UTF32 || - Encoding == eEncoding_Latin1 || - Encoding == eEncoding_Win1252); - assert(!checker.IsEnd(it) && "Attempt to iterate past the end of the sequence"); - - // All of these encodings use a single code-unit for each code-point. - ++it; - } - - // MoveNext(it, checker, tag): - // Moves the iterator to the next UCS code-point in the encoded sequence. - // Specialized for UTF-8. - template - inline void MoveNext(BaseIterator& it, const BoundsChecker& checker, integral_constant) - { - assert(!checker.IsEnd(it) && "Attempt to iterate past the end of the sequence"); - - // UTF-8: just need to skip up to 3 continuation bytes. - for (int i = 0; i < 4; ++i) - { - ++it; - if (checker.IsEnd(it)) // :WARN: always returns false if "safe" bool is false! - { - break; - } - uint32 val = static_cast(*it); - if ((val & 0xC0) != 0x80) - { - break; - } - } - } - - // MoveNext(it, checker, tag): - // Moves the iterator to the next UCS code-point in the encoded sequence. - // Specialized for UTF-16. - template - inline void MoveNext(BaseIterator& it, const BoundsChecker& checker, integral_constant) - { - assert(!checker.IsEnd(it) && "Attempt to iterate past the end of the sequence"); - - // UTF-16: just need to skip one lead surrogate. - ++it; - uint32 val = static_cast(*it); - if (val >= cLeadSurrogateFirst && val <= cLeadSurrogateLast) - { - if (!checker.IsEnd(it)) - { - ++it; - } - } - } - - // MovePrev(it, checker, tag): - // Moves the iterator to the previous UCS code-point in the encoded sequence. - // Non-specialized version (for 1:1 code-unit to code-point). - template - inline void MovePrev(BaseIterator& it, const BoundsChecker& checker, const integral_constant) - { - COMPILE_TIME_ASSERT( - Encoding == eEncoding_ASCII || - Encoding == eEncoding_UTF32 || - Encoding == eEncoding_Latin1 || - Encoding == eEncoding_Win1252); - assert(!checker.IsBegin(it) && "Attempt to iterate past the beginning of the sequence"); - - // All of these encodings use a single code-unit for each code-point. - --it; - } - - // MovePrev(it, checker, tag): - // Moves the iterator to the previous UCS code-point in the encoded sequence. - // Specialized for UTF-8. - template - inline void MovePrev(BaseIterator& it, const BoundsChecker& checker, integral_constant) - { - assert(!checker.IsBegin(it) && "Attempt to iterate past the beginning of the sequence"); - - // UTF-8: just need to skip up to 3 continuation bytes. - for (int i = 0; i < 4; ++i) - { - --it; - if (checker.IsBegin(it)) - { - break; - } - uint32 val = static_cast(*it); - if ((val & 0xC0) != 0x80) - { - break; - } - } - } - - // MovePrev(it, checker, tag): - // Moves the iterator to the previous UCS code-point in the encoded sequence. - // Specialized for UTF-16. - template - inline void MovePrev(BaseIterator& it, const BoundsChecker& checker, integral_constant) - { - assert(!checker.IsBegin(it) && "Attempt to iterate past the beginning of the sequence"); - - // UTF-16: just need to skip one lead surrogate. - --it; - uint32 val = static_cast(*it); - if (val >= cLeadSurrogateFirst && val <= cLeadSurrogateLast) - { - if (!checker.IsBegin(it)) - { - --it; - } - } - } - - // SBaseIterators: - // Utility to access base iterators properties from CIterator. - // This is the bounds-checked specialization, the range information is kept to defend against malformed sequences. - template - struct SBaseIterators - { - typedef BaseIterator type; - type begin, end; - type it; - - SBaseIterators(const BaseIterator& _begin, const BaseIterator& _end) - : begin(_begin) - , end(_end) - , it(_begin) {} - - SBaseIterators(const SBaseIterators& other) - : begin(other.begin) - , end(other.end) - , it(other.it) {} - - SBaseIterators& operator =(const SBaseIterators& other) - { - begin = other.begin; - end = other.end; - it = other.it; - return *this; - } - - bool IsBegin(const BaseIterator& _it) const - { - return begin == _it; - } - - bool IsEnd(const BaseIterator& _it) const - { - return end == _it; - } - - bool IsEqual(const SBaseIterators& other) const - { - return it == other.it - && begin == other.begin - && end == other.end; - } - - // Note: Only called inside assert. - // O(N) version; works with any forward-iterator (or better) - bool IsInRange(const BaseIterator& _it, std::forward_iterator_tag) const - { - for (BaseIterator i = begin; i != end; ++i) - { - if (_it == i) - { - return true; - } - } - return false; - } - - // Note: Only called inside assert. - // O(1) version; requires random-access-iterator. - bool IsInRange(const BaseIterator& _it, std::random_access_iterator_tag) const - { - return (begin <= _it && _it < end); - } - - // Note: Only called inside assert. - // Dispatches to the O(1) version if a random-access iterator is used (common case). - bool IsInRange(const BaseIterator& _it) const - { - return IsInRange(_it, typename std::iterator_traits::iterator_category()); - } - }; - - // SBaseIterators: - // Utility to access base iterators properties from CIterator. - // This is the un-checked specialization for known-safe sequences. - template - struct SBaseIterators - { - typedef BaseIterator type; - type it; - - explicit SBaseIterators(const BaseIterator& begin) - : it(begin) {} - - SBaseIterators(const BaseIterator& begin, const BaseIterator& end) - : it(begin) {} - - SBaseIterators(const SBaseIterators& other) - : it(other.it) {} - - SBaseIterators& operator =(const SBaseIterators& other) - { - it = other.it; - return *this; - } - - bool IsBegin(const BaseIterator&) const - { - return false; - } - - bool IsEnd(const BaseIterator&) const - { - return false; - } - - bool IsEqual(const SBaseIterators& other) const - { - return it == other.it; - } - - bool IsInRange(const BaseIterator&) const - { - return true; - } - }; - - // SIteratorSink: - // Helper to store the last code-point and error bit that was decoded. - // This is the safe specialization for potentially malformed sequences. - template - struct SIteratorSink - { - static const uint32 cEmpty = 0xFFFFFFFFU; - uint32 value; - bool error; - - void Clear() - { - value = cEmpty; - error = false; - } - - bool IsEmpty() const - { - return value == cEmpty; - } - - bool IsError() const - { - return error; - } - - const uint32& GetValue() const - { - return value; - } - - void MarkDecodingError() - { - value = cReplacementCharacter; - error = true; - } - - template - void Decode(const SBaseIterators& its, integral_constant) - { - typedef SDecoder DecoderType; - DecoderType decoder(*this, *this); - Clear(); - for (BaseIterator it = its.it; IsEmpty(); ++it) - { - uint32 val = static_cast(*it); - decoder(val); - if (its.IsEnd(it)) - { - break; - } - } - if (IsEmpty()) - { - // If we still have neither a new value or an error flag, just treat as error. - // This can happen if we reached the end of the sequence, but it ends in an incomplete code-sequence. - MarkDecodingError(); - } - } - - template - void DecodeIfEmpty(const SBaseIterators& its, integral_constant tag) - { - if (IsEmpty()) - { - Decode(its, tag); - } - } - - void operator()(uint32 unit) - { - value = unit; - } - - void operator()(SIteratorSink&, uint32, uint32) - { - MarkDecodingError(); - } - }; - - // SIteratorSink: - // Helper to store the last code-point that was decoded. - // This is the un-safe specialization for known-valid sequences. - // Note: No error-state is tracked since we won't handle that regardless for un-safe CIterator. - template<> - struct SIteratorSink - { - static const uint32 cEmpty = 0xFFFFFFFFU; - uint32 value; - - void Clear() - { - value = cEmpty; - } - - bool IsEmpty() const - { - return value == cEmpty; - } - - bool IsError() const - { - return false; - } - - const uint32& GetValue() const - { - return value; - } - - template - void Decode(const SBaseIterators& its, integral_constant) - { - typedef SDecoder DecoderType; - DecoderType decoder(*this); - for (BaseIterator it = its.it; IsEmpty(); ++it) - { - uint32 val = static_cast(*it); - decoder(val); - } - } - - template - void DecodeIfEmpty(const SBaseIterators& its, integral_constant tag) - { - if (IsEmpty()) - { - Decode(its, tag); - } - } - - void operator()(uint32 unit) - { - value = unit; - } - }; - } - - // CIterator: - // Helper class that can iterate over an encoded text sequence and read the underlying UCS code-points. - // If the Safe flag is set, bounds checking is performed inside multi-unit sequences to guard against decoding errors. - // This requires the user to know where the sequence ends (use the constructor taking two parameters). - // Note: The BaseIterator must be forward-iterator or better when Safe flag is set. - // If the Safe flag is not set, you must guarantee the sequence is validly encoded, and allows the use of the single argument constructor. - // In the case of unsafe iterator used for C-style string pointer, look for a U+0000 dereferenced value to end the iteration. - // Regardless of the Safe flag, the user must ensure that the iterator is never moved past the beginning or end of the range (just like any other STL iterator). - // Example of typical usage: - // string utf8 = "foo"; // UTF-8 - // for (Unicode::CIterator it(utf8.begin(), utf8.end()); it != utf8.end(); ++it) - // { - // uint32 codepoint = *it; // 32-bit UCS code-point - // } - // Example unsafe usage: (for known-valid encoded C-style strings): - // const char *pValid = "foo"; // UTF-8 - // for (Unicode::CIterator it = pValid; *it != 0; ++it) - // { - // uint32 codepoint = *it; // 32-bit UCS code-point - // } - template::value> - class CIterator - { - // The iterator value in the encoded sequence. - // Optionally provides bounds-checking. - Detail::SBaseIterators its; - - // The cached UCS code-point at the current position. - // Mutable because dereferencing is conceptually const, but does cache some state in this case. - mutable Detail::SIteratorSink sink; - - public: - // Types for compatibility with STL bidirectional iterator requirements. - typedef const uint32 value_type; - typedef const uint32& reference; - typedef const uint32* pointer; - typedef const ptrdiff_t difference_type; - typedef std::bidirectional_iterator_tag iterator_category; - - // Construct an iterator for the given range. - // The initial position of the iterator as at the beginning of the range. - CIterator(const BaseIterator& begin, const BaseIterator& end) - : its(begin, end) - { - sink.Clear(); - } - - // Construct an iterator from a single iterator (typically C-style string pointer). - // This can only be used for unsafe iterators. - template - CIterator(const IteratorType& it, typename Detail::SRequire::value, IteratorType>::type* = 0) - : its(static_cast(it)) - { - sink.Clear(); - } - - // Copy-construct an iterator. - CIterator(const CIterator& other) - : its(other.its) - , sink(other.sink) {} - - // Copy-assign an iterator. - CIterator& operator =(const CIterator& other) - { - its = other.its; - sink = other.sink; - return *this; - } - - // Test if the iterator points at an encoding error in the underlying encoded sequence. - // If so, the function returns false. - // When using an un-safe iterator, this function always returns true, if a sequence can contain encoding errors, you must use the safe variant. - // Note: This requires the underlying iterator to be dereferenced, so you cannot use it only while the iterator is inside the valid range. - bool IsAtValidCodepoint() const - { - assert(!its.IsEnd(its.it) && "Attempt to dereference the past-the-end iterator"); - Detail::integral_constant tag; - sink.DecodeIfEmpty(its, tag); - return !sink.IsError(); - } - - // Gets the current position in the underlying encoded sequence. - // If the iterator points to an invalidly encoded sequence (ie, IsError() returns true), the direction of iteration is significant. - // In that case the returned position is approximated; to work around this: move all iterators of which the position is compared in the same direction. - const BaseIterator& GetPosition() const - { - return its.it; - } - - // Sets the current position in the underlying encoded sequence. - // You may not set the position outside the range for which this iterator was constructed. - void SetPosition(const BaseIterator& it) - { - assert(its.IsInRange(it) && "Attempt to set the underlying iterator outside of the supported range"); - its.it = it; - } - - // Test if this iterator is equal to another iterator instance. - // Note: In the presence of an invalidly encoded sequence (ie, IsError() returns true), the direction of iteration is significant. - // To work around this, you can either: - // 1) Move all iterators that will be compared in the same direction; or - // 2) Compare the dereferenced iterator value(s) instead (if applicable). - bool operator ==(const CIterator& other) const - { - return its.IsEqual(other.its); - } - - // Test if this iterator is equal to another base iterator. - // Note: If the provided iterator does not point to the the first code-unit of an UCS code-point, the behavior is undefined. - bool operator ==(const BaseIterator& other) const - { - return its.it == other; - } - - // Test if this iterator is equal to another iterator instance. - // Note: In the presence of an invalidly encoded sequence (ie, IsError() returns true), the direction of iteration is significant. - // To work around this, you can either: - // 1) Move all iterators that will be compared in the same direction; or - // 2) Compare the dereferenced iterator value(s) instead (if applicable). - bool operator !=(const CIterator& other) const - { - return !its.IsEqual(other.its); - } - - // Test if this iterator is equal to another base iterator. - // Note: If the provided iterator does not point to the the first code-unit of an UCS code-point, the behavior is undefined. - bool operator !=(const BaseIterator& other) const - { - return its.it != other; - } - - // Get the decoded UCS code-point at the current position in the sequence. - // If the iterator points to an invalidly encoded sequence (ie, IsError() returns true) the function returns U+FFFD (replacement character). - reference operator *() const - { - assert(!its.IsEnd(its.it) && "Attempt to dereference the past-the-end iterator"); - Detail::integral_constant tag; - sink.DecodeIfEmpty(its, tag); - return sink.GetValue(); - } - - // Advance the iterator to the next UCS code-point. - // Note: You must make sure the iterator is not at the end of the sequence, even in Safe mode. - // However, in Safe mode, the iterator will never move past the end of the sequence in the presence of encoding errors. - CIterator& operator ++() - { - Detail::integral_constant tag; - Detail::MoveNext(its.it, its, tag); - sink.Clear(); - return *this; - } - - // Go back to the previous UCS code-point. - // Note: You must make sure the iterator is not at the beginning of the sequence, even in Safe mode. - // However, in Safe mode, the iterators will never move past the beginning of the sequence in the presence of encoding errors. - CIterator& operator --() - { - Detail::integral_constant tag; - Detail::MovePrev(its.it, its, tag); - sink.Clear(); - return *this; - } - - // Advance the iterator to the next UCS code-point, return a copy of the iterator position before advancing. - // Note: You must make sure the iterator is not at the end of the sequence, even in Safe mode. - // However, in Safe mode, the iterator will never move past the end of the sequence in the presence of encoding errors. - CIterator operator ++(int) - { - CIterator result = *this; - ++*this; - return result; - } - - // Go back to the previous UCS code-point, return a copy of the iterator position before going back. - // Note: You must make sure the iterator is not at the beginning of the sequence, even in Safe mode. - // However, in Safe mode, the iterators will never move past the beginning of the sequence in the presence of encoding errors. - CIterator operator --(int) - { - CIterator result = *this; - --*this; - return result; - } - }; - - namespace Detail - { - // SIteratorSpecializer: - // Specializes the CIterator template to use for a given string type. - // Note: The reason we use this is because MSVC doesn't want to deduce this on the MakeIterator declaration. - template - struct SIteratorSpecializer - { - typedef CIterator type; - }; - } - - // MakeIterator(const StringType &str): - // Helper function to make an UCS code-point iterator given an Unicode string. - // Example usage: - // string utf8 = "foo"; // UTF-8 - // auto it = Unicode::MakeIterator(utf8); - // while (it != utf8.end()) - // { - // uint32 codepoint = *it; // 32-bit UCS code-point - // } - // Or, in a for-loop: - // for (auto it = Unicode::MakeIterator(utf8); it != utf8.end(); ++it) {} - template - inline typename Detail::SIteratorSpecializer::type MakeIterator(const StringType& str) - { - return typename Detail::SIteratorSpecializer::type(str.begin(), str.end()); - } -} diff --git a/Code/Legacy/CryCommon/crycommon_files.cmake b/Code/Legacy/CryCommon/crycommon_files.cmake index 63c5b49513..c78a6a4df0 100644 --- a/Code/Legacy/CryCommon/crycommon_files.cmake +++ b/Code/Legacy/CryCommon/crycommon_files.cmake @@ -116,10 +116,6 @@ set(FILES TimeValue_info.h TypeInfo_decl.h TypeInfo_impl.h - UnicodeBinding.h - UnicodeEncoding.h - UnicodeFunctions.h - UnicodeIterator.h VectorMap.h VectorSet.h VertexFormats.h