package.deps.is_utf8.src.is_utf8.cpp Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of utf-8-validate Show documentation
Show all versions of utf-8-validate Show documentation
Check if a buffer contains valid UTF-8
The newest version!
#ifndef IS_UTF8_H
#define IS_UTF8_H
#include
#ifndef IS_UTF8_COMPILER_CHECK_H
#define IS_UTF8_COMPILER_CHECK_H
#ifndef __cplusplus
#error we require a C++ compiler
#endif
#ifndef IS_UTF8_CPLUSPLUS
#if defined(_MSVC_LANG) && !defined(__clang__)
#define IS_UTF8_CPLUSPLUS (_MSC_VER == 1900 ? 201103L : _MSVC_LANG)
#else
#define IS_UTF8_CPLUSPLUS __cplusplus
#endif
#endif
// C++ 17
#if !defined(IS_UTF8_CPLUSPLUS17) && (IS_UTF8_CPLUSPLUS >= 201703L)
#define IS_UTF8_CPLUSPLUS17 1
#endif
// C++ 14
#if !defined(IS_UTF8_CPLUSPLUS14) && (IS_UTF8_CPLUSPLUS >= 201402L)
#define IS_UTF8_CPLUSPLUS14 1
#endif
// C++ 11
#if !defined(IS_UTF8_CPLUSPLUS11) && (IS_UTF8_CPLUSPLUS >= 201103L)
#define IS_UTF8_CPLUSPLUS11 1
#endif
#ifndef IS_UTF8_CPLUSPLUS11
#error we require a compiler compliant with the C++11 standard
#endif
#endif // IS_UTF8_COMPILER_CHECK_H
#ifndef IS_UTF8_COMMON_DEFS_H
#define IS_UTF8_COMMON_DEFS_H
#include
#ifndef IS_UTF8_PORTABILITY_H
#define IS_UTF8_PORTABILITY_H
#include
#include
#include
#include
#include
#ifndef _WIN32
// strcasecmp, strncasecmp
#include
#endif
#ifdef _MSC_VER
#define IS_UTF8_VISUAL_STUDIO 1
/**
* We want to differentiate carefully between
* clang under visual studio and regular visual
* studio.
*
* Under clang for Windows, we enable:
* * target pragmas so that part and only part of the
* code gets compiled for advanced instructions.
*
*/
#ifdef __clang__
// clang under visual studio
#define IS_UTF8_CLANG_VISUAL_STUDIO 1
#else
// just regular visual studio (best guess)
#define IS_UTF8_REGULAR_VISUAL_STUDIO 1
#endif // __clang__
#endif // _MSC_VER
#ifdef IS_UTF8_REGULAR_VISUAL_STUDIO
// https://en.wikipedia.org/wiki/C_alternative_tokens
// This header should have no effect, except maybe
// under Visual Studio.
#include
#endif
#if defined(__x86_64__) || defined(_M_AMD64)
#define IS_UTF8_IS_X86_64 1
#elif defined(__aarch64__) || defined(_M_ARM64)
#define IS_UTF8_IS_ARM64 1
#elif defined(__PPC64__) || defined(_M_PPC64)
//#define IS_UTF8_IS_PPC64 1
#pragma message("The library does yet support SIMD acceleration under\
POWER processors.")
#else
#define IS_UTF8_IS_32BITS 1
// We do not support 32-bit platforms, but it can be
// handy to identify them.
#if defined(_M_IX86) || defined(__i386__)
#define IS_UTF8_IS_X86_32BITS 1
#elif defined(__arm__) || defined(_M_ARM)
#define IS_UTF8_IS_ARM_32BITS 1
#elif defined(__PPC__) || defined(_M_PPC)
#define IS_UTF8_IS_PPC_32BITS 1
#endif
#endif // defined(__x86_64__) || defined(_M_AMD64)
// this is almost standard?
#define IS_UTF8_STRINGIFY_IMPLEMENTATION_(a) #a
#define IS_UTF8_STRINGIFY(a) IS_UTF8_STRINGIFY_IMPLEMENTATION_(a)
// We are going to use runtime dispatch.
#ifdef IS_UTF8_IS_X86_64
#ifdef __clang__
// clang does not have GCC push pop
// warning: clang attribute push can't be used within a namespace in clang up
// til 8.0 so IS_UTF8_TARGET_REGION and IS_UTF8_UNTARGET_REGION must be
// *outside* of a namespace.
#define IS_UTF8_TARGET_REGION(T) \
_Pragma(IS_UTF8_STRINGIFY( \
clang attribute push(__attribute__((target(T))), apply_to = function)))
#define IS_UTF8_UNTARGET_REGION _Pragma("clang attribute pop")
#elif defined(__GNUC__)
// GCC is easier
#define IS_UTF8_TARGET_REGION(T) \
_Pragma("GCC push_options") _Pragma(IS_UTF8_STRINGIFY(GCC target(T)))
#define IS_UTF8_UNTARGET_REGION _Pragma("GCC pop_options")
#endif // clang then gcc
#endif // x86
// Default target region macros don't do anything.
#ifndef IS_UTF8_TARGET_REGION
#define IS_UTF8_TARGET_REGION(T)
#define IS_UTF8_UNTARGET_REGION
#endif
#if defined(__GNUC__) && !defined(__clang__)
#if __GNUC__ >= 11
#define IS_UTF8_GCC11ORMORE 1
#endif // __GNUC__ >= 11
#endif // defined(__GNUC__) && !defined(__clang__)
#endif // IS_UTF8_PORTABILITY_H
#ifndef IS_UTF8_AVX512_H_
#define IS_UTF8_AVX512_H_
/*
It's possible to override AVX512 settings with cmake DCMAKE_CXX_FLAGS.
All preprocessor directives has form `IS_UTF8_HAS_AVX512{feature}`,
where a feature is a code name for extensions.
Please see the listing below to find which are supported.
*/
#ifndef IS_UTF8_HAS_AVX512F
#if defined(__AVX512F__) && __AVX512F__ == 1
#define IS_UTF8_HAS_AVX512F 1
#endif
#endif
#ifndef IS_UTF8_HAS_AVX512DQ
#if defined(__AVX512DQ__) && __AVX512DQ__ == 1
#define IS_UTF8_HAS_AVX512DQ 1
#endif
#endif
#ifndef IS_UTF8_HAS_AVX512IFMA
#if defined(__AVX512IFMA__) && __AVX512IFMA__ == 1
#define IS_UTF8_HAS_AVX512IFMA 1
#endif
#endif
#ifndef IS_UTF8_HAS_AVX512CD
#if defined(__AVX512CD__) && __AVX512CD__ == 1
#define IS_UTF8_HAS_AVX512CD 1
#endif
#endif
#ifndef IS_UTF8_HAS_AVX512BW
#if defined(__AVX512BW__) && __AVX512BW__ == 1
#define IS_UTF8_HAS_AVX512BW 1
#endif
#endif
#ifndef IS_UTF8_HAS_AVX512VL
#if defined(__AVX512VL__) && __AVX512VL__ == 1
#define IS_UTF8_HAS_AVX512VL 1
#endif
#endif
#ifndef IS_UTF8_HAS_AVX512VBMI
#if defined(__AVX512VBMI__) && __AVX512VBMI__ == 1
#define IS_UTF8_HAS_AVX512VBMI 1
#endif
#endif
#ifndef IS_UTF8_HAS_AVX512VBMI2
#if defined(__AVX512VBMI2__) && __AVX512VBMI2__ == 1
#define IS_UTF8_HAS_AVX512VBMI2 1
#endif
#endif
#ifndef IS_UTF8_HAS_AVX512VNNI
#if defined(__AVX512VNNI__) && __AVX512VNNI__ == 1
#define IS_UTF8_HAS_AVX512VNNI 1
#endif
#endif
#ifndef IS_UTF8_HAS_AVX512BITALG
#if defined(__AVX512BITALG__) && __AVX512BITALG__ == 1
#define IS_UTF8_HAS_AVX512BITALG 1
#endif
#endif
#ifndef IS_UTF8_HAS_AVX512VPOPCNTDQ
#if defined(__AVX512VPOPCNTDQ__) && __AVX512VPOPCNTDQ__ == 1
#define IS_UTF8_HAS_AVX512VPOPCNTDQ 1
#endif
#endif
#endif // IS_UTF8_AVX512_H_
#if defined(__GNUC__)
// Marks a block with a name so that MCA analysis can see it.
#define IS_UTF8_BEGIN_DEBUG_BLOCK(name) \
__asm volatile("# LLVM-MCA-BEGIN " #name);
#define IS_UTF8_END_DEBUG_BLOCK(name) __asm volatile("# LLVM-MCA-END " #name);
#define IS_UTF8_DEBUG_BLOCK(name, block) \
BEGIN_DEBUG_BLOCK(name); \
block; \
END_DEBUG_BLOCK(name);
#else
#define IS_UTF8_BEGIN_DEBUG_BLOCK(name)
#define IS_UTF8_END_DEBUG_BLOCK(name)
#define IS_UTF8_DEBUG_BLOCK(name, block)
#endif
// Align to N-byte boundary
#define IS_UTF8_ROUNDUP_N(a, n) (((a) + ((n)-1)) & ~((n)-1))
#define IS_UTF8_ROUNDDOWN_N(a, n) ((a) & ~((n)-1))
#define IS_UTF8_ISALIGNED_N(ptr, n) (((uintptr_t)(ptr) & ((n)-1)) == 0)
#if defined(IS_UTF8_REGULAR_VISUAL_STUDIO)
#define is_utf8_really_inline __forceinline
#define is_utf8_never_inline __declspec(noinline)
#define is_utf8_unused
#define is_utf8_warn_unused
#ifndef is_utf8_likely
#define is_utf8_likely(x) x
#endif
#ifndef is_utf8_unlikely
#define is_utf8_unlikely(x) x
#endif
#define IS_UTF8_PUSH_DISABLE_WARNINGS __pragma(warning(push))
#define IS_UTF8_PUSH_DISABLE_ALL_WARNINGS __pragma(warning(push, 0))
#define IS_UTF8_DISABLE_VS_WARNING(WARNING_NUMBER) \
__pragma(warning(disable : WARNING_NUMBER))
// Get rid of Intellisense-only warnings (Code Analysis)
// Though __has_include is C++17, it is supported in Visual Studio 2017 or
// better (_MSC_VER>=1910).
#ifdef __has_include
#if __has_include()
#include
#define IS_UTF8_DISABLE_UNDESIRED_WARNINGS \
IS_UTF8_DISABLE_VS_WARNING(ALL_CPPCORECHECK_WARNINGS)
#endif
#endif
#ifndef IS_UTF8_DISABLE_UNDESIRED_WARNINGS
#define IS_UTF8_DISABLE_UNDESIRED_WARNINGS
#endif
#define IS_UTF8_DISABLE_DEPRECATED_WARNING IS_UTF8_DISABLE_VS_WARNING(4996)
#define IS_UTF8_DISABLE_STRICT_OVERFLOW_WARNING
#define IS_UTF8_POP_DISABLE_WARNINGS __pragma(warning(pop))
#else // IS_UTF8_REGULAR_VISUAL_STUDIO
#define is_utf8_really_inline inline __attribute__((always_inline))
#define is_utf8_never_inline inline __attribute__((noinline))
#define is_utf8_unused __attribute__((unused))
#define is_utf8_warn_unused __attribute__((warn_unused_result))
#ifndef is_utf8_likely
#define is_utf8_likely(x) __builtin_expect(!!(x), 1)
#endif
#ifndef is_utf8_unlikely
#define is_utf8_unlikely(x) __builtin_expect(!!(x), 0)
#endif
#define IS_UTF8_PUSH_DISABLE_WARNINGS _Pragma("GCC diagnostic push")
// gcc doesn't seem to disable all warnings with all and extra, add warnings
// here as necessary
#define IS_UTF8_PUSH_DISABLE_ALL_WARNINGS \
IS_UTF8_PUSH_DISABLE_WARNINGS \
IS_UTF8_DISABLE_GCC_WARNING(-Weffc++) \
IS_UTF8_DISABLE_GCC_WARNING(-Wall) \
IS_UTF8_DISABLE_GCC_WARNING(-Wconversion) \
IS_UTF8_DISABLE_GCC_WARNING(-Wextra) \
IS_UTF8_DISABLE_GCC_WARNING(-Wattributes) \
IS_UTF8_DISABLE_GCC_WARNING(-Wimplicit-fallthrough) \
IS_UTF8_DISABLE_GCC_WARNING(-Wnon-virtual-dtor) \
IS_UTF8_DISABLE_GCC_WARNING(-Wreturn-type) \
IS_UTF8_DISABLE_GCC_WARNING(-Wshadow) \
IS_UTF8_DISABLE_GCC_WARNING(-Wunused-parameter) \
IS_UTF8_DISABLE_GCC_WARNING(-Wunused-variable)
#define IS_UTF8_PRAGMA(P) _Pragma(#P)
#define IS_UTF8_DISABLE_GCC_WARNING(WARNING) \
IS_UTF8_PRAGMA(GCC diagnostic ignored #WARNING)
#if defined(IS_UTF8_CLANG_VISUAL_STUDIO)
#define IS_UTF8_DISABLE_UNDESIRED_WARNINGS \
IS_UTF8_DISABLE_GCC_WARNING(-Wmicrosoft-include)
#else
#define IS_UTF8_DISABLE_UNDESIRED_WARNINGS
#endif
#define IS_UTF8_DISABLE_DEPRECATED_WARNING \
IS_UTF8_DISABLE_GCC_WARNING(-Wdeprecated-declarations)
#define IS_UTF8_DISABLE_STRICT_OVERFLOW_WARNING \
IS_UTF8_DISABLE_GCC_WARNING(-Wstrict-overflow)
#define IS_UTF8_POP_DISABLE_WARNINGS _Pragma("GCC diagnostic pop")
#endif // MSC_VER
#if defined(IS_UTF8_VISUAL_STUDIO)
/**
* It does not matter here whether you are using
* the regular visual studio or clang under visual
* studio.
*/
#if IS_UTF8_USING_LIBRARY
#define IS_UTF8_DLLIMPORTEXPORT __declspec(dllimport)
#else
#define IS_UTF8_DLLIMPORTEXPORT __declspec(dllexport)
#endif
#else
#define IS_UTF8_DLLIMPORTEXPORT
#endif
/// If EXPR is an error, returns it.
#define IS_UTF8_TRY(EXPR) \
{ \
auto _err = (EXPR); \
if (_err) { \
return _err; \
} \
}
#endif // IS_UTF8_COMMON_DEFS_H
#include
namespace is_utf8_internals {
enum encoding_type {
UTF8 = 1, // BOM 0xef 0xbb 0xbf
UTF16_LE = 2, // BOM 0xff 0xfe
UTF16_BE = 4, // BOM 0xfe 0xff
UTF32_LE = 8, // BOM 0xff 0xfe 0x00 0x00
UTF32_BE = 16, // BOM 0x00 0x00 0xfe 0xff
unspecified = 0
};
enum endianness { LITTLE, BIG };
std::string to_string(encoding_type bom);
// Note that BOM for UTF8 is discouraged.
namespace BOM {
/**
* Checks for a BOM. If not, returns unspecified
* @param input the string to process
* @param length the length of the string in words
* @return the corresponding encoding
*/
encoding_type check_bom(const uint8_t *byte, size_t length);
encoding_type check_bom(const char *byte, size_t length);
/**
* Returns the size, in bytes, of the BOM for a given encoding type.
* Note that UTF8 BOM are discouraged.
* @param bom the encoding type
* @return the size in bytes of the corresponding BOM
*/
size_t bom_byte_size(encoding_type bom);
} // namespace BOM
} // namespace is_utf8_internals
#ifndef ERROR_H
#define ERROR_H
namespace is_utf8_internals {
enum error_code {
SUCCESS = 0,
HEADER_BITS, // Any byte must have fewer than 5 header bits.
TOO_SHORT, // The leading byte must be followed by N-1 continuation bytes,
// where N is the UTF-8 character length This is also the error
// when the input is truncated.
TOO_LONG, // The leading byte must not be a continuation byte.
OVERLONG, // The decoded character must be above U+7F for two-byte characters,
// U+7FF for three-byte characters, and U+FFFF for four-byte
// characters.
TOO_LARGE, // The decoded character must be less than or equal to U+10FFFF OR
// less than or equal than U+7F for ASCII.
SURROGATE, // The decoded character must be not be in U+D800...DFFF (UTF-8 or
// UTF-32) OR a high surrogate must be followed by a low surrogate
// and a low surrogate must be preceded by a high surrogate
// (UTF-16)
OTHER // Not related to validation/transcoding.
};
struct result {
error_code error;
size_t
count; // In case of error, indicates the position of the error. In case
// of success, indicates the number of words validated/written.
is_utf8_really_inline result();
is_utf8_really_inline result(error_code, size_t);
};
} // namespace is_utf8_internals
#endif
IS_UTF8_PUSH_DISABLE_WARNINGS
IS_UTF8_DISABLE_UNDESIRED_WARNINGS
#ifndef IS_UTF8_IMPLEMENTATION_H
#define IS_UTF8_IMPLEMENTATION_H
#include
#if !defined(IS_UTF8_NO_THREADS)
#include
#endif
#include
#include
/* From
https://github.com/endorno/pytorch/blob/master/torch/lib/TH/generic/simd/simd.h
Highly modified.
Copyright (c) 2016- Facebook, Inc (Adam Paszke)
Copyright (c) 2014- Facebook, Inc (Soumith Chintala)
Copyright (c) 2011-2014 Idiap Research Institute (Ronan Collobert)
Copyright (c) 2012-2014 Deepmind Technologies (Koray Kavukcuoglu)
Copyright (c) 2011-2012 NEC Laboratories America (Koray Kavukcuoglu)
Copyright (c) 2011-2013 NYU (Clement Farabet)
Copyright (c) 2006-2010 NEC Laboratories America (Ronan Collobert, Leon Bottou,
Iain Melvin, Jason Weston) Copyright (c) 2006 Idiap Research Institute
(Samy Bengio) Copyright (c) 2001-2004 Idiap Research Institute (Ronan Collobert,
Samy Bengio, Johnny Mariethoz)
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
3. Neither the names of Facebook, Deepmind Technologies, NYU, NEC Laboratories
America and IDIAP Research Institute nor the names of its contributors may be
used to endorse or promote products derived from this software without
specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef IS_UTF8_INTERNAL_ISADETECTION_H
#define IS_UTF8_INTERNAL_ISADETECTION_H
#include
#include
#if defined(_MSC_VER)
#include
#elif defined(HAVE_GCC_GET_CPUID) && defined(USE_GCC_GET_CPUID)
#include
#endif
namespace is_utf8_internals {
namespace internal {
enum instruction_set {
DEFAULT = 0x0,
NEON = 0x1,
AVX2 = 0x4,
SSE42 = 0x8,
PCLMULQDQ = 0x10,
BMI1 = 0x20,
BMI2 = 0x40,
ALTIVEC = 0x80,
AVX512F = 0x100,
AVX512DQ = 0x200,
AVX512IFMA = 0x400,
AVX512PF = 0x800,
AVX512ER = 0x1000,
AVX512CD = 0x2000,
AVX512BW = 0x4000,
AVX512VL = 0x8000,
AVX512VBMI2 = 0x10000
};
#if defined(__PPC64__)
static inline uint32_t detect_supported_architectures() {
return instruction_set::ALTIVEC;
}
#elif defined(__aarch64__) || defined(_M_ARM64)
static inline uint32_t detect_supported_architectures() {
return instruction_set::NEON;
}
#elif defined(__x86_64__) || defined(_M_AMD64) // x64
namespace {
namespace cpuid_bit {
// Can be found on Intel ISA Reference for CPUID
// EAX = 0x01
constexpr uint32_t pclmulqdq = uint32_t(1)
<< 1; ///< @private bit 1 of ECX for EAX=0x1
constexpr uint32_t sse42 = uint32_t(1)
<< 20; ///< @private bit 20 of ECX for EAX=0x1
// EAX = 0x7f (Structured Extended Feature Flags), ECX = 0x00 (Sub-leaf)
// See: "Table 3-8. Information Returned by CPUID Instruction"
namespace ebx {
constexpr uint32_t bmi1 = uint32_t(1) << 3;
constexpr uint32_t avx2 = uint32_t(1) << 5;
constexpr uint32_t bmi2 = uint32_t(1) << 8;
constexpr uint32_t avx512f = uint32_t(1) << 16;
constexpr uint32_t avx512dq = uint32_t(1) << 17;
constexpr uint32_t avx512cd = uint32_t(1) << 28;
constexpr uint32_t avx512bw = uint32_t(1) << 30;
constexpr uint32_t avx512vl = uint32_t(1) << 31;
} // namespace ebx
namespace ecx {
constexpr uint32_t avx512vbmi2 = uint32_t(1) << 6;
} // namespace ecx
} // namespace cpuid_bit
} // namespace
static inline void cpuid(uint32_t *eax, uint32_t *ebx, uint32_t *ecx,
uint32_t *edx) {
#if defined(_MSC_VER)
int cpu_info[4];
__cpuid(cpu_info, *eax);
*eax = cpu_info[0];
*ebx = cpu_info[1];
*ecx = cpu_info[2];
*edx = cpu_info[3];
#elif defined(HAVE_GCC_GET_CPUID) && defined(USE_GCC_GET_CPUID)
uint32_t level = *eax;
__get_cpuid(level, eax, ebx, ecx, edx);
#else
uint32_t a = *eax, b, c = *ecx, d;
asm volatile("cpuid\n\t" : "+a"(a), "=b"(b), "+c"(c), "=d"(d));
*eax = a;
*ebx = b;
*ecx = c;
*edx = d;
#endif
}
static inline uint32_t detect_supported_architectures() {
uint32_t eax;
uint32_t ebx = 0;
uint32_t ecx = 0;
uint32_t edx = 0;
uint32_t host_isa = 0x0;
// EBX for EAX=0x1
eax = 0x1;
cpuid(&eax, &ebx, &ecx, &edx);
if (ecx & cpuid_bit::sse42) {
host_isa |= instruction_set::SSE42;
}
if (ecx & cpuid_bit::pclmulqdq) {
host_isa |= instruction_set::PCLMULQDQ;
}
// ECX for EAX=0x7
eax = 0x7;
ecx = 0x0; // Sub-leaf = 0
cpuid(&eax, &ebx, &ecx, &edx);
if (ebx & cpuid_bit::ebx::avx2) {
host_isa |= instruction_set::AVX2;
}
if (ebx & cpuid_bit::ebx::bmi1) {
host_isa |= instruction_set::BMI1;
}
if (ebx & cpuid_bit::ebx::bmi2) {
host_isa |= instruction_set::BMI2;
}
if (ebx & cpuid_bit::ebx::avx512f) {
host_isa |= instruction_set::AVX512F;
}
if (ebx & cpuid_bit::ebx::avx512bw) {
host_isa |= instruction_set::AVX512BW;
}
if (ebx & cpuid_bit::ebx::avx512cd) {
host_isa |= instruction_set::AVX512CD;
}
if (ebx & cpuid_bit::ebx::avx512dq) {
host_isa |= instruction_set::AVX512DQ;
}
if (ebx & cpuid_bit::ebx::avx512vl) {
host_isa |= instruction_set::AVX512VL;
}
if (ecx & cpuid_bit::ecx::avx512vbmi2) {
host_isa |= instruction_set::AVX512VBMI2;
}
return host_isa;
}
#else // fallback
static inline uint32_t detect_supported_architectures() {
return instruction_set::DEFAULT;
}
#endif // end SIMD extension detection code
} // namespace internal
} // namespace is_utf8_internals
#endif // IS_UTF8_INTERNAL_ISADETECTION_H
namespace is_utf8_internals {
/**
* Validate the UTF-8 string. This function may be best when you expect
* the input to be almost always valid. Otherwise, consider using
* validate_utf8_with_errors.
*
* Overridden by each implementation.
*
* @param buf the UTF-8 string to validate.
* @param len the length of the string in bytes.
* @return true if and only if the string is valid UTF-8.
*/
bool validate_utf8(const char *buf, size_t len) noexcept;
class implementation {
public:
virtual const std::string &name() const { return _name; }
virtual const std::string &description() const { return _description; }
bool supported_by_runtime_system() const;
virtual uint32_t required_instruction_sets() const {
return _required_instruction_sets;
}
/**
* Validate the UTF-8 string.
*
* Overridden by each implementation.
*
* @param buf the UTF-8 string to validate.
* @param len the length of the string in bytes.
* @return true if and only if the string is valid UTF-8.
*/
is_utf8_warn_unused virtual bool validate_utf8(const char *buf,
size_t len) const noexcept = 0;
protected:
/** @private Construct an implementation with the given name and description.
* For subclasses. */
is_utf8_really_inline implementation(std::string name,
std::string description,
uint32_t required_instruction_sets)
: _name(name), _description(description),
_required_instruction_sets(required_instruction_sets) {}
virtual ~implementation() = default;
private:
/**
* The name of this implementation.
*/
const std::string _name;
/**
* The description of this implementation.
*/
const std::string _description;
/**
* Instruction sets required for this implementation.
*/
const uint32_t _required_instruction_sets;
};
/** @private */
namespace internal {
class available_implementation_list {
public:
/** Get the list of available implementations */
is_utf8_really_inline available_implementation_list() {}
/** Number of implementations */
size_t size() const noexcept;
/** STL const begin() iterator */
const implementation *const *begin() const noexcept;
/** STL const end() iterator */
const implementation *const *end() const noexcept;
/**
* Get the implementation with the given name.
*
* Case sensitive.
*
* const implementation *impl =
* is_utf8_internals::available_implementations["westmere"]; if (!impl) {
* exit(1); } if (!imp->supported_by_runtime_system()) { exit(1); }
* is_utf8_internals::active_implementation = impl;
*
* @param name the implementation to find, e.g. "westmere", "haswell", "arm64"
* @return the implementation, or nullptr if the parse failed.
*/
const implementation *operator[](const std::string &name) const noexcept {
for (const implementation *impl : *this) {
if (impl->name() == name) {
return impl;
}
}
return nullptr;
}
/**
* Detect the most advanced implementation supported by the current host.
*
* This is used to initialize the implementation on startup.
*
* const implementation *impl =
* is_utf8_internals::available_implementation::detect_best_supported();
* is_utf8_internals::active_implementation = impl;
*
* @return the most advanced supported implementation for the current host, or
* an implementation that returns UNSUPPORTED_ARCHITECTURE if there is no
* supported implementation. Will never return nullptr.
*/
const implementation *detect_best_supported() const noexcept;
};
template class atomic_ptr {
public:
atomic_ptr(T *_ptr) : ptr{_ptr} {}
#if defined(IS_UTF8_NO_THREADS)
operator const T *() const { return ptr; }
const T &operator*() const { return *ptr; }
const T *operator->() const { return ptr; }
operator T *() { return ptr; }
T &operator*() { return *ptr; }
T *operator->() { return ptr; }
atomic_ptr &operator=(T *_ptr) {
ptr = _ptr;
return *this;
}
#else
operator const T *() const { return ptr.load(); }
const T &operator*() const { return *ptr; }
const T *operator->() const { return ptr.load(); }
operator T *() { return ptr.load(); }
T &operator*() { return *ptr; }
T *operator->() { return ptr.load(); }
atomic_ptr &operator=(T *_ptr) {
ptr = _ptr;
return *this;
}
#endif
private:
#if defined(IS_UTF8_NO_THREADS)
T *ptr;
#else
std::atomic ptr;
#endif
};
} // namespace internal
/**
* The list of available implementations compiled into simdutf.
*/
extern IS_UTF8_DLLIMPORTEXPORT const internal::available_implementation_list& get_available_implementations();
/**
* The active implementation.
*
* Automatically initialized on first use to the most advanced implementation
* supported by this hardware.
*/
extern IS_UTF8_DLLIMPORTEXPORT internal::atomic_ptr& get_active_implementation();
} // namespace is_utf8_internals
#endif // IS_UTF8_IMPLEMENTATION_H
// Implementation-internal files (must be included before the implementations
// themselves, to keep amalgamation working--otherwise, the first time a file is
// included, it might be put inside the #ifdef
// IS_UTF8_IMPLEMENTATION_ARM64/FALLBACK/etc., which means the other
// implementations can't compile unless that implementation is turned on).
IS_UTF8_POP_DISABLE_WARNINGS
#endif // IS_UTF8_H
#include
#include
// Useful for debugging purposes
namespace is_utf8_internals {
namespace {
template std::string toBinaryString(T b) {
std::string binary = "";
T mask = T(1) << (sizeof(T) * CHAR_BIT - 1);
while (mask > 0) {
binary += ((b & mask) == 0) ? '0' : '1';
mask >>= 1;
}
return binary;
}
} // namespace
} // namespace is_utf8_internals
// Implementations
// The best choice should always come first!
#ifndef IS_UTF8_ARM64_H
#define IS_UTF8_ARM64_H
#ifdef IS_UTF8_FALLBACK_H
#error "arm64.h must be included before fallback.h"
#endif
#ifndef IS_UTF8_IMPLEMENTATION_ARM64
#define IS_UTF8_IMPLEMENTATION_ARM64 (IS_UTF8_IS_ARM64)
#endif
#if IS_UTF8_IMPLEMENTATION_ARM64 &&IS_UTF8_IS_ARM64
#define IS_UTF8_CAN_ALWAYS_RUN_ARM64 1
#else
#define IS_UTF8_CAN_ALWAYS_RUN_ARM64 0
#endif
#if IS_UTF8_IMPLEMENTATION_ARM64
namespace is_utf8_internals {
/**
* Implementation for NEON (ARMv8).
*/
namespace arm64 {} // namespace arm64
} // namespace is_utf8_internals
#ifndef IS_UTF8_ARM64_IMPLEMENTATION_H
#define IS_UTF8_ARM64_IMPLEMENTATION_H
namespace is_utf8_internals {
namespace arm64 {
class implementation final : public is_utf8_internals::implementation {
public:
is_utf8_really_inline implementation()
: is_utf8_internals::implementation("arm64", "ARM NEON",
internal::instruction_set::NEON) {}
is_utf8_warn_unused bool validate_utf8(const char *buf,
size_t len) const noexcept final;
};
} // namespace arm64
} // namespace is_utf8_internals
#endif // IS_UTF8_ARM64_IMPLEMENTATION_H
// redefining IS_UTF8_IMPLEMENTATION to "arm64"
// #define IS_UTF8_IMPLEMENTATION arm64
// Declarations
#ifndef IS_UTF8_ARM64_INTRINSICS_H
#define IS_UTF8_ARM64_INTRINSICS_H
// This should be the correct header whether
// you use visual studio or other compilers.
#include
#endif // IS_UTF8_ARM64_INTRINSICS_H
#ifndef IS_UTF8_ARM64_BITMANIPULATION_H
#define IS_UTF8_ARM64_BITMANIPULATION_H
namespace is_utf8_internals {
namespace arm64 {
namespace {} // unnamed namespace
} // namespace arm64
} // namespace is_utf8_internals
#endif // IS_UTF8_ARM64_BITMANIPULATION_H
#ifndef IS_UTF8_ARM64_SIMD_H
#define IS_UTF8_ARM64_SIMD_H
#include
namespace is_utf8_internals {
namespace arm64 {
namespace {
namespace simd {
#ifdef IS_UTF8_REGULAR_VISUAL_STUDIO
namespace {
// Start of private section with Visual Studio workaround
/**
* make_uint8x16_t initializes a SIMD register (uint8x16_t).
* This is needed because, incredibly, the syntax uint8x16_t x = {1,2,3...}
* is not recognized under Visual Studio! This is a workaround.
* Using a std::initializer_list as a parameter resulted in
* inefficient code. With the current approach, if the parameters are
* compile-time constants,
* GNU GCC compiles it to ldr, the same as uint8x16_t x = {1,2,3...}.
* You should not use this function except for compile-time constants:
* it is not efficient.
*/
is_utf8_really_inline uint8x16_t make_uint8x16_t(
uint8_t x1, uint8_t x2, uint8_t x3, uint8_t x4, uint8_t x5, uint8_t x6,
uint8_t x7, uint8_t x8, uint8_t x9, uint8_t x10, uint8_t x11, uint8_t x12,
uint8_t x13, uint8_t x14, uint8_t x15, uint8_t x16) {
// Doing a load like so end ups generating worse code.
// uint8_t array[16] = {x1, x2, x3, x4, x5, x6, x7, x8,
// x9, x10,x11,x12,x13,x14,x15,x16};
// return vld1q_u8(array);
uint8x16_t x{};
// incredibly, Visual Studio does not allow x[0] = x1
x = vsetq_lane_u8(x1, x, 0);
x = vsetq_lane_u8(x2, x, 1);
x = vsetq_lane_u8(x3, x, 2);
x = vsetq_lane_u8(x4, x, 3);
x = vsetq_lane_u8(x5, x, 4);
x = vsetq_lane_u8(x6, x, 5);
x = vsetq_lane_u8(x7, x, 6);
x = vsetq_lane_u8(x8, x, 7);
x = vsetq_lane_u8(x9, x, 8);
x = vsetq_lane_u8(x10, x, 9);
x = vsetq_lane_u8(x11, x, 10);
x = vsetq_lane_u8(x12, x, 11);
x = vsetq_lane_u8(x13, x, 12);
x = vsetq_lane_u8(x14, x, 13);
x = vsetq_lane_u8(x15, x, 14);
x = vsetq_lane_u8(x16, x, 15);
return x;
}
// We have to do the same work for make_int8x16_t
is_utf8_really_inline int8x16_t
make_int8x16_t(int8_t x1, int8_t x2, int8_t x3, int8_t x4, int8_t x5, int8_t x6,
int8_t x7, int8_t x8, int8_t x9, int8_t x10, int8_t x11,
int8_t x12, int8_t x13, int8_t x14, int8_t x15, int8_t x16) {
// Doing a load like so end ups generating worse code.
// int8_t array[16] = {x1, x2, x3, x4, x5, x6, x7, x8,
// x9, x10,x11,x12,x13,x14,x15,x16};
// return vld1q_s8(array);
int8x16_t x{};
// incredibly, Visual Studio does not allow x[0] = x1
x = vsetq_lane_s8(x1, x, 0);
x = vsetq_lane_s8(x2, x, 1);
x = vsetq_lane_s8(x3, x, 2);
x = vsetq_lane_s8(x4, x, 3);
x = vsetq_lane_s8(x5, x, 4);
x = vsetq_lane_s8(x6, x, 5);
x = vsetq_lane_s8(x7, x, 6);
x = vsetq_lane_s8(x8, x, 7);
x = vsetq_lane_s8(x9, x, 8);
x = vsetq_lane_s8(x10, x, 9);
x = vsetq_lane_s8(x11, x, 10);
x = vsetq_lane_s8(x12, x, 11);
x = vsetq_lane_s8(x13, x, 12);
x = vsetq_lane_s8(x14, x, 13);
x = vsetq_lane_s8(x15, x, 14);
x = vsetq_lane_s8(x16, x, 15);
return x;
}
is_utf8_really_inline uint8x8_t make_uint8x8_t(uint8_t x1, uint8_t x2,
uint8_t x3, uint8_t x4,
uint8_t x5, uint8_t x6,
uint8_t x7, uint8_t x8) {
uint8x8_t x{};
x = vset_lane_u8(x1, x, 0);
x = vset_lane_u8(x2, x, 1);
x = vset_lane_u8(x3, x, 2);
x = vset_lane_u8(x4, x, 3);
x = vset_lane_u8(x5, x, 4);
x = vset_lane_u8(x6, x, 5);
x = vset_lane_u8(x7, x, 6);
x = vset_lane_u8(x8, x, 7);
return x;
}
is_utf8_really_inline uint16x8_t make_uint16x8_t(uint16_t x1, uint16_t x2,
uint16_t x3, uint16_t x4,
uint16_t x5, uint16_t x6,
uint16_t x7, uint16_t x8) {
uint16x8_t x{};
x = vsetq_lane_u16(x1, x, 0);
x = vsetq_lane_u16(x2, x, 1);
x = vsetq_lane_u16(x3, x, 2);
x = vsetq_lane_u16(x4, x, 3);
x = vsetq_lane_u16(x5, x, 4);
x = vsetq_lane_u16(x6, x, 5);
x = vsetq_lane_u16(x7, x, 6);
x = vsetq_lane_u16(x8, x, 7);
;
return x;
}
is_utf8_really_inline int16x8_t make_int16x8_t(int16_t x1, int16_t x2,
int16_t x3, int16_t x4,
int16_t x5, int16_t x6,
int16_t x7, int16_t x8) {
uint16x8_t x{};
x = vsetq_lane_s16(x1, x, 0);
x = vsetq_lane_s16(x2, x, 1);
x = vsetq_lane_s16(x3, x, 2);
x = vsetq_lane_s16(x4, x, 3);
x = vsetq_lane_s16(x5, x, 4);
x = vsetq_lane_s16(x6, x, 5);
x = vsetq_lane_s16(x7, x, 6);
x = vsetq_lane_s16(x8, x, 7);
;
return x;
}
// End of private section with Visual Studio workaround
} // namespace
#endif // IS_UTF8_REGULAR_VISUAL_STUDIO
template struct simd8;
//
// Base class of simd8 and simd8, both of which use uint8x16_t
// internally.
//
template > struct base_u8 {
uint8x16_t value;
static const int SIZE = sizeof(value);
// Conversion from/to SIMD register
is_utf8_really_inline base_u8(const uint8x16_t _value) : value(_value) {}
is_utf8_really_inline operator const uint8x16_t &() const {
return this->value;
}
is_utf8_really_inline operator uint8x16_t &() { return this->value; }
is_utf8_really_inline T first() const { return vgetq_lane_u8(*this, 0); }
is_utf8_really_inline T last() const { return vgetq_lane_u8(*this, 15); }
// Bit operations
is_utf8_really_inline simd8 operator|(const simd8 other) const {
return vorrq_u8(*this, other);
}
is_utf8_really_inline simd8 operator&(const simd8 other) const {
return vandq_u8(*this, other);
}
is_utf8_really_inline simd8 operator^(const simd8 other) const {
return veorq_u8(*this, other);
}
is_utf8_really_inline simd8 bit_andnot(const simd8 other) const {
return vbicq_u8(*this, other);
}
is_utf8_really_inline simd8 operator~() const { return *this ^ 0xFFu; }
is_utf8_really_inline simd8 &operator|=(const simd8 other) {
auto this_cast = static_cast *>(this);
*this_cast = *this_cast | other;
return *this_cast;
}
is_utf8_really_inline simd8 &operator&=(const simd8 other) {
auto this_cast = static_cast *>(this);
*this_cast = *this_cast & other;
return *this_cast;
}
is_utf8_really_inline simd8 &operator^=(const simd8 other) {
auto this_cast = static_cast *>(this);
*this_cast = *this_cast ^ other;
return *this_cast;
}
friend is_utf8_really_inline Mask operator==(const simd8 lhs,
const simd8 rhs) {
return vceqq_u8(lhs, rhs);
}
template
is_utf8_really_inline simd8 prev(const simd8 prev_chunk) const {
return vextq_u8(prev_chunk, *this, 16 - N);
}
};
// SIMD byte mask type (returned by things like eq and gt)
template <> struct simd8 : base_u8 {
typedef uint16_t bitmask_t;
typedef uint32_t bitmask2_t;
static is_utf8_really_inline simd8 splat(bool _value) {
return vmovq_n_u8(uint8_t(-(!!_value)));
}
is_utf8_really_inline simd8(const uint8x16_t _value)
: base_u8(_value) {}
// False constructor
is_utf8_really_inline simd8() : simd8(vdupq_n_u8(0)) {}
// Splat constructor
is_utf8_really_inline simd8(bool _value) : simd8(splat(_value)) {}
is_utf8_really_inline void store(uint8_t dst[16]) const {
return vst1q_u8(dst, *this);
}
// We return uint32_t instead of uint16_t because that seems to be more
// efficient for most purposes (cutting it down to uint16_t costs performance
// in some compilers).
is_utf8_really_inline uint32_t to_bitmask() const {
#ifdef IS_UTF8_REGULAR_VISUAL_STUDIO
const uint8x16_t bit_mask =
make_uint8x16_t(0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, 0x01,
0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80);
#else
const uint8x16_t bit_mask = {0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80,
0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80};
#endif
auto minput = *this & bit_mask;
uint8x16_t tmp = vpaddq_u8(minput, minput);
tmp = vpaddq_u8(tmp, tmp);
tmp = vpaddq_u8(tmp, tmp);
return vgetq_lane_u16(vreinterpretq_u16_u8(tmp), 0);
}
// Returns 4-bit out of each byte, alternating between the high 4 bits and low
// bits result it is 64 bit. This method is expected to be faster than none()
// and is equivalent when the vector register is the result of a comparison,
// with byte values 0xff and 0x00.
is_utf8_really_inline uint64_t to_bitmask64() const {
return vget_lane_u64(
vreinterpret_u64_u8(vshrn_n_u16(vreinterpretq_u16_u8(*this), 4)), 0);
}
is_utf8_really_inline bool any() const { return vmaxvq_u8(*this) != 0; }
is_utf8_really_inline bool none() const { return vmaxvq_u8(*this) == 0; }
is_utf8_really_inline bool all() const { return vminvq_u8(*this) == 0xFF; }
};
// Unsigned bytes
template <> struct simd8 : base_u8 {
static is_utf8_really_inline simd8 splat(uint8_t _value) {
return vmovq_n_u8(_value);
}
static is_utf8_really_inline simd8 zero() { return vdupq_n_u8(0); }
static is_utf8_really_inline simd8 load(const uint8_t *values) {
return vld1q_u8(values);
}
is_utf8_really_inline simd8(const uint8x16_t _value)
: base_u8(_value) {}
// Zero constructor
is_utf8_really_inline simd8() : simd8(zero()) {}
// Array constructor
is_utf8_really_inline simd8(const uint8_t values[16]) : simd8(load(values)) {}
// Splat constructor
is_utf8_really_inline simd8(uint8_t _value) : simd8(splat(_value)) {}
// Member-by-member initialization
#ifdef IS_UTF8_REGULAR_VISUAL_STUDIO
is_utf8_really_inline
simd8(uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5,
uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, uint8_t v10,
uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15)
: simd8(make_uint8x16_t(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11,
v12, v13, v14, v15)) {}
#else
is_utf8_really_inline
simd8(uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5,
uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, uint8_t v10,
uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15)
: simd8(uint8x16_t{v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12,
v13, v14, v15}) {}
#endif
// Repeat 16 values as many times as necessary (usually for lookup tables)
is_utf8_really_inline static simd8
repeat_16(uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4,
uint8_t v5, uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9,
uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14,
uint8_t v15) {
return simd8(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12,
v13, v14, v15);
}
// Store to array
is_utf8_really_inline void store(uint8_t dst[16]) const {
return vst1q_u8(dst, *this);
}
// Saturated math
is_utf8_really_inline simd8
saturating_add(const simd8 other) const {
return vqaddq_u8(*this, other);
}
is_utf8_really_inline simd8
saturating_sub(const simd8 other) const {
return vqsubq_u8(*this, other);
}
// Addition/subtraction are the same for signed and unsigned
is_utf8_really_inline simd8
operator+(const simd8 other) const {
return vaddq_u8(*this, other);
}
is_utf8_really_inline simd8
operator-(const simd8 other) const {
return vsubq_u8(*this, other);
}
is_utf8_really_inline simd8 &operator+=(const simd8 other) {
*this = *this + other;
return *this;
}
is_utf8_really_inline simd8 &operator-=(const simd8 other) {
*this = *this - other;
return *this;
}
// Order-specific operations
is_utf8_really_inline uint8_t max_val() const { return vmaxvq_u8(*this); }
is_utf8_really_inline uint8_t min_val() const { return vminvq_u8(*this); }
is_utf8_really_inline simd8
max_val(const simd8 other) const {
return vmaxq_u8(*this, other);
}
is_utf8_really_inline simd8
min_val(const simd8 other) const {
return vminq_u8(*this, other);
}
is_utf8_really_inline simd8
operator<=(const simd8 other) const {
return vcleq_u8(*this, other);
}
is_utf8_really_inline simd8
operator>=(const simd8 other) const {
return vcgeq_u8(*this, other);
}
is_utf8_really_inline simd8
operator<(const simd8 other) const {
return vcltq_u8(*this, other);
}
is_utf8_really_inline simd8
operator>(const simd8 other) const {
return vcgtq_u8(*this, other);
}
// Same as >, but instead of guaranteeing all 1's == true, false = 0 and true
// = nonzero. For ARM, returns all 1's.
is_utf8_really_inline simd8
gt_bits(const simd8 other) const {
return simd8(*this > other);
}
// Same as <, but instead of guaranteeing all 1's == true, false = 0 and true
// = nonzero. For ARM, returns all 1's.
is_utf8_really_inline simd8
lt_bits(const simd8 other) const {
return simd8(*this < other);
}
// Bit-specific operations
is_utf8_really_inline simd8 any_bits_set(simd8 bits) const {
return vtstq_u8(*this, bits);
}
is_utf8_really_inline bool is_ascii() const {
return this->max_val() < 0b10000000u;
}
is_utf8_really_inline bool any_bits_set_anywhere() const {
return this->max_val() != 0;
}
is_utf8_really_inline bool any_bits_set_anywhere(simd8 bits) const {
return (*this & bits).any_bits_set_anywhere();
}
template is_utf8_really_inline simd8 shr() const {
return vshrq_n_u8(*this, N);
}
template is_utf8_really_inline simd8 shl() const {
return vshlq_n_u8(*this, N);
}
// Perform a lookup assuming the value is between 0 and 16 (undefined behavior
// for out of range values)
template
is_utf8_really_inline simd8 lookup_16(simd8 lookup_table) const {
return lookup_table.apply_lookup_16_to(*this);
}
template
is_utf8_really_inline simd8
lookup_16(L replace0, L replace1, L replace2, L replace3, L replace4,
L replace5, L replace6, L replace7, L replace8, L replace9,
L replace10, L replace11, L replace12, L replace13, L replace14,
L replace15) const {
return lookup_16(simd8::repeat_16(
replace0, replace1, replace2, replace3, replace4, replace5, replace6,
replace7, replace8, replace9, replace10, replace11, replace12,
replace13, replace14, replace15));
}
template
is_utf8_really_inline simd8
apply_lookup_16_to(const simd8 original) const {
return vqtbl1q_u8(*this, simd8(original));
}
};
// Signed bytes
template <> struct simd8 {
int8x16_t value;
static is_utf8_really_inline simd8 splat(int8_t _value) {
return vmovq_n_s8(_value);
}
static is_utf8_really_inline simd8 zero() { return vdupq_n_s8(0); }
static is_utf8_really_inline simd8 load(const int8_t values[16]) {
return vld1q_s8(values);
}
template
is_utf8_really_inline void store_ascii_as_utf16(char16_t *p) const {
uint16x8_t first = vmovl_u8(vget_low_u8(vreinterpretq_u8_s8(this->value)));
uint16x8_t second = vmovl_high_u8(vreinterpretq_u8_s8(this->value));
if (big_endian) {
#ifdef IS_UTF8_REGULAR_VISUAL_STUDIO
const uint8x16_t swap =
make_uint8x16_t(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14);
#else
const uint8x16_t swap = {1, 0, 3, 2, 5, 4, 7, 6,
9, 8, 11, 10, 13, 12, 15, 14};
#endif
first =
vreinterpretq_u16_u8(vqtbl1q_u8(vreinterpretq_u8_u16(first), swap));
second =
vreinterpretq_u16_u8(vqtbl1q_u8(vreinterpretq_u8_u16(second), swap));
}
vst1q_u16(reinterpret_cast(p), first);
vst1q_u16(reinterpret_cast(p + 8), second);
}
is_utf8_really_inline void store_ascii_as_utf32(char32_t *p) const {
vst1q_u32(reinterpret_cast(p),
vmovl_u16(vget_low_u16(
vmovl_u8(vget_low_u8(vreinterpretq_u8_s8(this->value))))));
vst1q_u32(reinterpret_cast(p + 4),
vmovl_high_u16(
vmovl_u8(vget_low_u8(vreinterpretq_u8_s8(this->value)))));
vst1q_u32(reinterpret_cast(p + 8),
vmovl_u16(vget_low_u16(
vmovl_high_u8(vreinterpretq_u8_s8(this->value)))));
vst1q_u32(reinterpret_cast(p + 12),
vmovl_high_u16(vmovl_high_u8(vreinterpretq_u8_s8(this->value))));
}
// Conversion from/to SIMD register
is_utf8_really_inline simd8(const int8x16_t _value) : value{_value} {}
is_utf8_really_inline operator const int8x16_t &() const {
return this->value;
}
is_utf8_really_inline operator const uint8x16_t() const {
return vreinterpretq_u8_s8(this->value);
}
is_utf8_really_inline operator int8x16_t &() { return this->value; }
// Zero constructor
is_utf8_really_inline simd8() : simd8(zero()) {}
// Splat constructor
is_utf8_really_inline simd8(int8_t _value) : simd8(splat(_value)) {}
// Array constructor
is_utf8_really_inline simd8(const int8_t *values) : simd8(load(values)) {}
// Member-by-member initialization
#ifdef IS_UTF8_REGULAR_VISUAL_STUDIO
is_utf8_really_inline simd8(int8_t v0, int8_t v1, int8_t v2, int8_t v3,
int8_t v4, int8_t v5, int8_t v6, int8_t v7,
int8_t v8, int8_t v9, int8_t v10, int8_t v11,
int8_t v12, int8_t v13, int8_t v14, int8_t v15)
: simd8(make_int8x16_t(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11,
v12, v13, v14, v15)) {}
#else
is_utf8_really_inline simd8(int8_t v0, int8_t v1, int8_t v2, int8_t v3,
int8_t v4, int8_t v5, int8_t v6, int8_t v7,
int8_t v8, int8_t v9, int8_t v10, int8_t v11,
int8_t v12, int8_t v13, int8_t v14, int8_t v15)
: simd8(int8x16_t{v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12,
v13, v14, v15}) {}
#endif
// Repeat 16 values as many times as necessary (usually for lookup tables)
is_utf8_really_inline static simd8
repeat_16(int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5,
int8_t v6, int8_t v7, int8_t v8, int8_t v9, int8_t v10, int8_t v11,
int8_t v12, int8_t v13, int8_t v14, int8_t v15) {
return simd8(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12,
v13, v14, v15);
}
// Store to array
is_utf8_really_inline void store(int8_t dst[16]) const {
return vst1q_s8(dst, value);
}
// Explicit conversion to/from unsigned
//
// Under Visual Studio/ARM64 uint8x16_t and int8x16_t are apparently the same
// type. In theory, we could check this occurrence with std::same_as and
// std::enabled_if but it is C++14 and relatively ugly and hard to read.
#ifndef IS_UTF8_REGULAR_VISUAL_STUDIO
is_utf8_really_inline explicit simd8(const uint8x16_t other)
: simd8(vreinterpretq_s8_u8(other)) {}
#endif
is_utf8_really_inline operator simd8() const {
return vreinterpretq_u8_s8(this->value);
}
is_utf8_really_inline simd8
operator|(const simd8 other) const {
return vorrq_s8(value, other.value);
}
is_utf8_really_inline simd8
operator&(const simd8 other) const {
return vandq_s8(value, other.value);
}
is_utf8_really_inline simd8
operator^(const simd8 other) const {
return veorq_s8(value, other.value);
}
is_utf8_really_inline simd8
bit_andnot(const simd8 other) const {
return vbicq_s8(value, other.value);
}
// Math
is_utf8_really_inline simd8
operator+(const simd8 other) const {
return vaddq_s8(value, other.value);
}
is_utf8_really_inline simd8
operator-(const simd8 other) const {
return vsubq_s8(value, other.value);
}
is_utf8_really_inline simd8 &operator+=(const simd8 other) {
*this = *this + other;
return *this;
}
is_utf8_really_inline simd8 &operator-=(const simd8 other) {
*this = *this - other;
return *this;
}
is_utf8_really_inline int8_t max_val() const { return vmaxvq_s8(value); }
is_utf8_really_inline int8_t min_val() const { return vminvq_s8(value); }
is_utf8_really_inline bool is_ascii() const { return this->min_val() >= 0; }
// Order-sensitive comparisons
is_utf8_really_inline simd8 max_val(const simd8 other) const {
return vmaxq_s8(value, other.value);
}
is_utf8_really_inline simd8 min_val(const simd8 other) const {
return vminq_s8(value, other.value);
}
is_utf8_really_inline simd8 operator>(const simd8 other) const {
return vcgtq_s8(value, other.value);
}
is_utf8_really_inline simd8 operator<(const simd8 other) const {
return vcltq_s8(value, other.value);
}
is_utf8_really_inline simd8
operator==(const simd8 other) const {
return vceqq_s8(value, other.value);
}
template
is_utf8_really_inline simd8
prev(const simd8 prev_chunk) const {
return vextq_s8(prev_chunk, *this, 16 - N);
}
// Perform a lookup assuming no value is larger than 16
template
is_utf8_really_inline simd8 lookup_16(simd8 lookup_table) const {
return lookup_table.apply_lookup_16_to(*this);
}
template
is_utf8_really_inline simd8
lookup_16(L replace0, L replace1, L replace2, L replace3, L replace4,
L replace5, L replace6, L replace7, L replace8, L replace9,
L replace10, L replace11, L replace12, L replace13, L replace14,
L replace15) const {
return lookup_16(simd8::repeat_16(
replace0, replace1, replace2, replace3, replace4, replace5, replace6,
replace7, replace8, replace9, replace10, replace11, replace12,
replace13, replace14, replace15));
}
template
is_utf8_really_inline simd8
apply_lookup_16_to(const simd8 original) {
return vqtbl1q_s8(*this, simd8(original));
}
};
template struct simd8x64 {
static constexpr int NUM_CHUNKS = 64 / sizeof(simd8);
static_assert(NUM_CHUNKS == 4,
"ARM kernel should use four registers per 64-byte block.");
simd8 chunks[NUM_CHUNKS];
simd8x64(const simd8x64 &o) = delete; // no copy allowed
simd8x64 &
operator=(const simd8 other) = delete; // no assignment allowed
simd8x64() = delete; // no default constructor allowed
is_utf8_really_inline simd8x64(const simd8 chunk0, const simd8 chunk1,
const simd8 chunk2, const simd8 chunk3)
: chunks{chunk0, chunk1, chunk2, chunk3} {}
is_utf8_really_inline simd8x64(const T *ptr)
: chunks{simd8::load(ptr),
simd8::load(ptr + sizeof(simd8) / sizeof(T)),
simd8::load(ptr + 2 * sizeof(simd8) / sizeof(T)),
simd8::load(ptr + 3 * sizeof(simd8) / sizeof(T))} {}
is_utf8_really_inline void store(T *ptr) const {
this->chunks[0].store(ptr + sizeof(simd8) * 0 / sizeof(T));
this->chunks[1].store(ptr + sizeof(simd8) * 1 / sizeof(T));
this->chunks[2].store(ptr + sizeof(simd8) * 2 / sizeof(T));
this->chunks[3].store(ptr + sizeof(simd8) * 3 / sizeof(T));
}
is_utf8_really_inline simd8x64 &operator|=(const simd8x64 &other) {
this->chunks[0] |= other.chunks[0];
this->chunks[1] |= other.chunks[1];
this->chunks[2] |= other.chunks[2];
this->chunks[3] |= other.chunks[3];
return *this;
}
is_utf8_really_inline simd8 reduce_or() const {
return (this->chunks[0] | this->chunks[1]) |
(this->chunks[2] | this->chunks[3]);
}
is_utf8_really_inline bool is_ascii() const { return reduce_or().is_ascii(); }
template
is_utf8_really_inline void store_ascii_as_utf16(char16_t *ptr) const {
this->chunks[0].template store_ascii_as_utf16(ptr +
sizeof(simd8) * 0);
this->chunks[1].template store_ascii_as_utf16(ptr +
sizeof(simd8) * 1);
this->chunks[2].template store_ascii_as_utf16(ptr +
sizeof(simd8) * 2);
this->chunks[3].template store_ascii_as_utf16(ptr +
sizeof(simd8) * 3);
}
is_utf8_really_inline void store_ascii_as_utf32(char32_t *ptr) const {
this->chunks[0].store_ascii_as_utf32(ptr + sizeof(simd8) * 0);
this->chunks[1].store_ascii_as_utf32(ptr + sizeof(simd8) * 1);
this->chunks[2].store_ascii_as_utf32(ptr + sizeof(simd8) * 2);
this->chunks[3].store_ascii_as_utf32(ptr + sizeof(simd8) * 3);
}
is_utf8_really_inline uint64_t to_bitmask() const {
#ifdef IS_UTF8_REGULAR_VISUAL_STUDIO
const uint8x16_t bit_mask =
make_uint8x16_t(0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, 0x01,
0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80);
#else
const uint8x16_t bit_mask = {0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80,
0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80};
#endif
// Add each of the elements next to each other, successively, to stuff each
// 8 byte mask into one.
uint8x16_t sum0 =
vpaddq_u8(vandq_u8(uint8x16_t(this->chunks[0]), bit_mask),
vandq_u8(uint8x16_t(this->chunks[1]), bit_mask));
uint8x16_t sum1 =
vpaddq_u8(vandq_u8(uint8x16_t(this->chunks[2]), bit_mask),
vandq_u8(uint8x16_t(this->chunks[3]), bit_mask));
sum0 = vpaddq_u8(sum0, sum1);
sum0 = vpaddq_u8(sum0, sum0);
return vgetq_lane_u64(vreinterpretq_u64_u8(sum0), 0);
}
is_utf8_really_inline uint64_t eq(const T m) const {
const simd8 mask = simd8::splat(m);
return simd8x64(this->chunks[0] == mask, this->chunks[1] == mask,
this->chunks[2] == mask, this->chunks[3] == mask)
.to_bitmask();
}
is_utf8_really_inline uint64_t lteq(const T m) const {
const simd8 mask = simd8::splat(m);
return simd8x64(this->chunks[0] <= mask, this->chunks[1] <= mask,
this->chunks[2] <= mask, this->chunks[3] <= mask)
.to_bitmask();
}
is_utf8_really_inline uint64_t in_range(const T low, const T high) const {
const simd8 mask_low = simd8::splat(low);
const simd8 mask_high = simd8::splat(high);
return simd8x64(
(this->chunks[0] <= mask_high) & (this->chunks[0] >= mask_low),
(this->chunks[1] <= mask_high) & (this->chunks[1] >= mask_low),
(this->chunks[2] <= mask_high) & (this->chunks[2] >= mask_low),
(this->chunks[3] <= mask_high) & (this->chunks[3] >= mask_low))
.to_bitmask();
}
is_utf8_really_inline uint64_t not_in_range(const T low, const T high) const {
const simd8 mask_low = simd8::splat(low);
const simd8 mask_high = simd8::splat(high);
return simd8x64(
(this->chunks[0] > mask_high) | (this->chunks[0] < mask_low),
(this->chunks[1] > mask_high) | (this->chunks[1] < mask_low),
(this->chunks[2] > mask_high) | (this->chunks[2] < mask_low),
(this->chunks[3] > mask_high) | (this->chunks[3] < mask_low))
.to_bitmask();
}
is_utf8_really_inline uint64_t lt(const T m) const {
const simd8 mask = simd8::splat(m);
return simd8x64(this->chunks[0] < mask, this->chunks[1] < mask,
this->chunks[2] < mask, this->chunks[3] < mask)
.to_bitmask();
}
is_utf8_really_inline uint64_t gt(const T m) const {
const simd8 mask = simd8::splat(m);
return simd8x64(this->chunks[0] > mask, this->chunks[1] > mask,
this->chunks[2] > mask, this->chunks[3] > mask)
.to_bitmask();
}
is_utf8_really_inline uint64_t gteq(const T m) const {
const simd8 mask = simd8::splat(m);
return simd8x64(this->chunks[0] >= mask, this->chunks[1] >= mask,
this->chunks[2] >= mask, this->chunks[3] >= mask)
.to_bitmask();
}
is_utf8_really_inline uint64_t gteq_unsigned(const uint8_t m) const {
const simd8 mask = simd8::splat(m);
return simd8x64(simd8(uint8x16_t(this->chunks[0])) >= mask,
simd8(uint8x16_t(this->chunks[1])) >= mask,
simd8(uint8x16_t(this->chunks[2])) >= mask,
simd8(uint8x16_t(this->chunks[3])) >= mask)
.to_bitmask();
}
}; // struct simd8x64
template struct simd16;
template > struct base_u16 {
uint16x8_t value;
static const int SIZE = sizeof(value);
// Conversion from/to SIMD register
is_utf8_really_inline base_u16() = default;
is_utf8_really_inline base_u16(const uint16x8_t _value) : value(_value) {}
is_utf8_really_inline operator const uint16x8_t &() const {
return this->value;
}
is_utf8_really_inline operator uint16x8_t &() { return this->value; }
// Bit operations
is_utf8_really_inline simd16 operator|(const simd16 other) const {
return vorrq_u16(*this, other);
}
is_utf8_really_inline simd16 operator&(const simd16 other) const {
return vandq_u16(*this, other);
}
is_utf8_really_inline simd16 operator^(const simd16 other) const {
return veorq_u16(*this, other);
}
is_utf8_really_inline simd16 bit_andnot(const simd16 other) const {
return vbicq_u16(*this, other);
}
is_utf8_really_inline simd16 operator~() const { return *this ^ 0xFFu; }
is_utf8_really_inline simd16 &operator|=(const simd16 other) {
auto this_cast = static_cast