2#ifndef __ASE_UNICODE_HH__ 
    3#define __ASE_UNICODE_HH__ 
   16size_t                  utf8len                 (
const char *str);
 
   23constexpr uint32_t      unicode_last_codepoint  = 0x10FFFF;
 
   37    (u >=   0xE000 && u <=   0xFFFF) ||   
 
   38    (u >=  0x10000 && u <=  0x14FFF) ||   
 
   39    (u >=  0x16000 && u <=  0x18FFF) ||   
 
   40    (u >=  0x1B000 && u <=  0x1BFFF) ||   
 
   41    (u >=  0x1D000 && u <=  0x1FFFF) ||   
 
   42    (u >=  0x20000 && u <=  0x2FFFF) ||   
 
   43    (u >=  0xE0000 && u <=  0xE0FFF) ||   
 
   44    (u >=  0xF0000 && u <=  0xFFFFF) ||   
 
   45    (u >= 0x100000 && u <= 0x10FFFF);     
 
   46  return __builtin_expect (assigned, 
true);
 
 
   53  const bool valid = u <= 0x10FFFF && (u & 0x1FF800) != 0xD800;
 
   54  return __builtin_expect (valid, 
true);
 
 
   61  const bool noncharacter = (u >= 0xFDD0 && u <= 0xFDEF) || (u & 0xFFFE) == 0xFFFE;
 
   62  return __builtin_expect (noncharacter, 
false);
 
 
   76  const bool control = ( u <= 0x1F) || (u >= 0x7F && u <= 0x9f);
 
   77  return __builtin_expect (control, 
false);
 
 
   84  const bool priv = (u >= 0xE000 && u <= 0xF8FF) || (u >= 0xF0000 && u <= 0xFFFFD) || (u >= 0x100000 && u <= 0x10FFFD);
 
   85  return __builtin_expect (priv, 
false);
 
 
The Anklang C++ API namespace.
 
bool string_is_ncname(const String &input)
 
std::string utf8encode(const uint32_t *codepoints, size_t n_codepoints)
Convert codepoints into an UTF-8 string, using the shortest possible encoding.
 
constexpr bool unicode_is_private(uint32_t u)
Return whether u is in one of the 3 private use areas of Unicode.
 
size_t utf8len(const char *str)
Count valid UTF-8 sequences, invalid sequences are counted as Latin-1 characters.
 
constexpr bool unicode_is_noncharacter(uint32_t u)
Return whether u is one of the 66 Unicode noncharacters.
 
size_t utf8_to_unicode(const char *str, uint32_t *codepoints)
 
constexpr bool unicode_is_character(uint32_t u)
Return whether u is not one of the 66 Unicode noncharacters.
 
std::string decodefs(const std::string &utf8str)
Decode UTF-8 string back into file system path representation, extracting surrogate code points as by...
 
constexpr bool unicode_is_valid(uint32_t u)
Return whether u is an allowed Unicode codepoint within 0x10FFFF and not part of a UTF-16 surrogate p...
 
std::string displayfs(const std::string &utf8str)
Convert UTF-8 encoded file system path into human readable display format, the conversion is lossy bu...
 
std::string String
Convenience alias for std::string.
 
constexpr bool unicode_is_assigned(uint32_t u)
Return whether u matches any of the assigned Unicode planes.
 
constexpr bool unicode_is_control_code(uint32_t u)
Return whether u is one of the 65 Unicode control codes.
 
std::string encodefs(const std::string &fschars)
Encode a file system path consisting of bytes into UTF-8, using surrogate code points to store non UT...
 
String string_to_ncname(const String &input, uint32_t substitute)
 
std::vector< uint32_t > utf8decode(const std::string &utf8str)
Convert valid UTF-8 sequences to Unicode codepoints, invalid sequences are treated as Latin-1 charact...