Anklang 0.3.0-460-gc4ef46ba
ASE — Anklang Sound Engine (C++)

« « « Anklang Documentation
Loading...
Searching...
No Matches
unicode.hh
Go to the documentation of this file.
1 // This Source Code Form is licensed MPL-2.0: http://mozilla.org/MPL/2.0
2#ifndef __ASE_UNICODE_HH__
3#define __ASE_UNICODE_HH__
4
5#include <ase/cxxaux.hh>
6
7namespace Ase {
8
10std::string utf8encode (const uint32_t *codepoints, size_t n_codepoints);
11String string_to_ncname (const String &input, uint32_t substitute = 0);
12bool string_is_ncname (const String &input);
13size_t utf8_to_unicode (const std::string &str, std::vector<uint32_t> &codepoints);
14size_t utf8_to_unicode (const char *str, uint32_t *codepoints);
15size_t utf8len (const std::string &str);
16size_t utf8len (const char *str);
17constexpr inline bool unicode_is_valid (uint32_t u);
18constexpr inline bool unicode_is_assigned (uint32_t u);
19constexpr inline bool unicode_is_noncharacter (uint32_t u);
20constexpr inline bool unicode_is_character (uint32_t u);
21constexpr inline bool unicode_is_control_code (uint32_t u);
22constexpr inline bool unicode_is_private (uint32_t u);
23constexpr uint32_t unicode_last_codepoint = 0x10FFFF;
24std::string encodefs (const std::string &fschars);
25std::string decodefs (const std::string &utf8str);
26std::string displayfs (const std::string &utf8str);
28
29
30// == Implementations ==
32constexpr inline bool
34{
35 const bool assigned =
36 (/*u >= 0x00 &&*/ u <= 0xD7FF) || // BMP - Basic Multilingual Plane (below surrogates at 0xD800)
37 (u >= 0xE000 && u <= 0xFFFF) || // BMP - Basic Multilingual Plane (above surrogates at 0xDFFF)
38 (u >= 0x10000 && u <= 0x14FFF) || // SMP - Supplementary Multilingual Plane
39 (u >= 0x16000 && u <= 0x18FFF) || // SMP - Supplementary Multilingual Plane
40 (u >= 0x1B000 && u <= 0x1BFFF) || // SMP - Supplementary Multilingual Plane
41 (u >= 0x1D000 && u <= 0x1FFFF) || // SMP - Supplementary Multilingual Plane
42 (u >= 0x20000 && u <= 0x2FFFF) || // SIP - Supplementary Ideographic Plane
43 (u >= 0xE0000 && u <= 0xE0FFF) || // SSP - Supplementary Special-purpose Plane
44 (u >= 0xF0000 && u <= 0xFFFFF) || // SPUA-A - Supplementary Private Use Area Plane
45 (u >= 0x100000 && u <= 0x10FFFF); // SPUA-B - Supplementary Private Use Area Plane
46 return __builtin_expect (assigned, true);
47}
48
50constexpr inline bool
51unicode_is_valid (uint32_t u)
52{
53 const bool valid = u <= 0x10FFFF && (u & 0x1FF800) != 0xD800;
54 return __builtin_expect (valid, true);
55}
56
58constexpr inline bool
60{
61 const bool noncharacter = (u >= 0xFDD0 && u <= 0xFDEF) || (u & 0xFFFE) == 0xFFFE;
62 return __builtin_expect (noncharacter, false);
63}
64
66constexpr inline bool
68{
69 return __builtin_expect (!unicode_is_noncharacter (u), true);
70}
71
73constexpr inline bool
75{
76 const bool control = (/*u >= 0x00 &&*/ u <= 0x1F) || (u >= 0x7F && u <= 0x9f);
77 return __builtin_expect (control, false);
78}
79
81constexpr inline bool
83{
84 const bool priv = (u >= 0xE000 && u <= 0xF8FF) || (u >= 0xF0000 && u <= 0xFFFFD) || (u >= 0x100000 && u <= 0x10FFFD);
85 return __builtin_expect (priv, false);
86}
87
88} // Ase
89
90#endif // __ASE_UNICODE_HH__
The Anklang C++ API namespace.
Definition api.hh:9
bool string_is_ncname(const String &input)
Definition unicode.cc:325
std::string utf8encode(const uint32_t *codepoints, size_t n_codepoints)
Convert codepoints into an UTF-8 string, using the shortest possible encoding.
Definition unicode.cc:249
constexpr bool unicode_is_private(uint32_t u)
Return whether u is in one of the 3 private use areas of Unicode.
Definition unicode.hh:82
size_t utf8len(const char *str)
Count valid UTF-8 sequences, invalid sequences are counted as Latin-1 characters.
Definition unicode.cc:184
constexpr bool unicode_is_noncharacter(uint32_t u)
Return whether u is one of the 66 Unicode noncharacters.
Definition unicode.hh:59
size_t utf8_to_unicode(const char *str, uint32_t *codepoints)
Definition unicode.cc:221
constexpr bool unicode_is_character(uint32_t u)
Return whether u is not one of the 66 Unicode noncharacters.
Definition unicode.hh:67
std::string decodefs(const std::string &utf8str)
Decode UTF-8 string back into file system path representation, extracting surrogate code points as by...
Definition unicode.cc:131
constexpr bool unicode_is_valid(uint32_t u)
Return whether u is an allowed Unicode codepoint within 0x10FFFF and not part of a UTF-16 surrogate p...
Definition unicode.hh:51
std::string displayfs(const std::string &utf8str)
Convert UTF-8 encoded file system path into human readable display format, the conversion is lossy bu...
Definition unicode.cc:150
std::string String
Convenience alias for std::string.
Definition cxxaux.hh:35
constexpr bool unicode_is_assigned(uint32_t u)
Return whether u matches any of the assigned Unicode planes.
Definition unicode.hh:33
constexpr bool unicode_is_control_code(uint32_t u)
Return whether u is one of the 65 Unicode control codes.
Definition unicode.hh:74
std::string encodefs(const std::string &fschars)
Encode a file system path consisting of bytes into UTF-8, using surrogate code points to store non UT...
Definition unicode.cc:112
String string_to_ncname(const String &input, uint32_t substitute)
Definition unicode.cc:339
std::vector< uint32_t > utf8decode(const std::string &utf8str)
Convert valid UTF-8 sequences to Unicode codepoints, invalid sequences are treated as Latin-1 charact...
Definition unicode.cc:208
typedef uint32_t