file
unicode.hhNamespaces
- namespace Ase
- The Anklang C++ API namespace.
Functions
-
auto decodefs(const std::
string& utf8str) → std:: string - Decode UTF-8 string back into file system path representation, extracting surrogate code points as bytes.
-
auto displayfs(const std::
string& utf8str) → std:: string - Convert UTF-8 encoded file system path into human readable display format, the conversion is lossy but readable.
-
auto encodefs(const std::
string& fschars) → std:: string - Encode a file system path consisting of bytes into UTF-8, using surrogate code points to store non UTF-8 bytes.
- auto string_is_ncname(const String& input) → bool
- auto string_to_ncname(const String& input, uint32_t substitute) → String
- auto unicode_is_assigned(uint32_t u) → bool constexpr
- Return whether u matches any of the assigned Unicode planes.
- auto unicode_is_character(uint32_t u) → bool constexpr
- Return whether u is not one of the 66 Unicode noncharacters.
- auto unicode_is_control_code(uint32_t u) → bool constexpr
- Return whether u is one of the 65 Unicode control codes.
- auto unicode_is_noncharacter(uint32_t u) → bool constexpr
- Return whether u is one of the 66 Unicode noncharacters.
- auto unicode_is_private(uint32_t u) → bool constexpr
- Return whether u is in one of the 3 private use areas of Unicode.
- auto unicode_is_valid(uint32_t u) → bool constexpr
- Return whether u is an allowed Unicode codepoint within 0x10FFFF and not part of a UTF-16 surrogate pair.
-
auto utf8_to_unicode(const std::
string& str, std:: vector<uint32_t>& codepoints) → size_t - auto utf8_to_unicode(const char* str, uint32_t* codepoints) → size_t
-
auto utf8decode(const std::
string& utf8str) → std:: vector<uint32_t> - Convert valid UTF-8 sequences to Unicode codepoints, invalid sequences are treated as Latin-1 characters.
-
auto utf8encode(const std::
vector<uint32_t>& codepoints) → std:: string - Convert codepoints into an UTF-8 string, using the shortest possible encoding.
-
auto utf8encode(const uint32_t* codepoints,
size_t n_codepoints) → std::
string - Convert codepoints into an UTF-8 string, using the shortest possible encoding.
-
auto utf8len(const std::
string& str) → size_t - Count valid UTF-8 sequences, invalid sequences are counted as Latin-1 characters.
- auto utf8len(const char* str) → size_t
- Count valid UTF-8 sequences, invalid sequences are counted as Latin-1 characters.