Anklang 0.3.0-460-gc4ef46ba
ASE — Anklang Sound Engine (C++)

« « « Anklang Documentation
Loading...
Searching...
No Matches
strings.cc
Go to the documentation of this file.
1 // This Source Code Form is licensed MPL-2.0: http://mozilla.org/MPL/2.0
2#include "strings.hh"
3#include "internal.hh"
4#include <cmath>
5#include <stdio.h>
6#include <stdlib.h>
7#include <libintl.h>
8#include <iconv.h>
9#include <errno.h>
10#include <algorithm>
11
12#include <glib.h> // g_unichar_*
13
14namespace Ase {
15
16namespace Unicode {
17extern inline unichar tolower (unichar uc) { return g_unichar_tolower (uc); }
18extern inline unichar toupper (unichar uc) { return g_unichar_toupper (uc); }
19extern inline unichar totitle (unichar uc) { return g_unichar_totitle (uc); }
20} // Unicode
21
22// === String ===
23static inline bool
24c_isalnum (uint8 c)
25{
26 return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9');
27}
28
32 uint64 count)
33{
34 if (count == 1)
35 return s;
36 else if (count & 1)
37 {
38 String tmp = string_multiply (s, count - 1);
39 tmp += s;
40 return tmp;
41 }
42 else if (count)
43 {
44 String tmp = string_multiply (s, count / 2);
45 tmp += tmp;
46 return tmp;
47 }
48 else
49 return "";
50}
51
55{
56 static const String validset = ASE_STRING_SET_LOWER_ALNUM "_";
57 String ident = string_tolower (input);
58 ident = string_canonify (ident, validset, "_");
59 if (!ident.empty() && ident[0] <= '9')
60 ident = "_" + ident;
61 return ident;
62}
63
68string_canonify (const String &string, const String &valid_chars, const String &substitute)
69{
70 const size_t l = string.size();
71 const char *valids = valid_chars.c_str(), *p = string.c_str();
72 size_t i;
73 for (i = 0; i < l; i++)
74 if (!strchr (valids, p[i]))
75 goto rewrite_string;
76 return string; // only ref increment
77 rewrite_string:
78 String d = string.substr (0, i);
79 d += substitute;
80 for (++i; i < l; i++)
81 if (strchr (valids, p[i]))
82 d += p[i];
83 else
84 d += substitute;
85 return d;
86}
87
89bool
90string_is_canonified (const String &string, const String &valid_chars)
91{
92 const size_t l = string.size();
93 const char *valids = valid_chars.c_str(), *p = string.c_str();
94 for (size_t i = 0; i < l; i++)
95 if (!strchr (valids, p[i]))
96 return false;
97 return true;
98}
99
101const String&
103{
104 static const String cached_a2z = "abcdefghijklmnopqrstuvwxyz";
105 return cached_a2z;
106}
107
109const String&
111{
112 static const String cached_A2Z = "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
113 return cached_A2Z;
114}
115
117const String&
119{
120 static const String cached_alnum = ASE_STRING_SET_ASCII_ALNUM;
121 return cached_alnum;
122}
123
124bool
125string_is_ascii_alnum (const String &str)
126{
127 const String &alnum = string_set_ascii_alnum();
128 for (size_t i = 0; i < str.size(); i++)
129 if (!strchr (alnum.c_str(), str[i]))
130 return false;
131 return true;
132}
133
135String
137{
138 String s (str);
139 for (size_t i = 0; i < s.size(); i++)
140 s[i] = Unicode::tolower (s[i]);
141 return s;
142}
143
145bool
147{
148 for (size_t i = 0; i < str.size(); i++)
149 if (str[i] != Unicode::tolower (str[i]))
150 return false;
151 return true;
152}
153
155String
157{
158 String s (str);
159 for (size_t i = 0; i < s.size(); i++)
160 s[i] = Unicode::toupper (s[i]);
161 return s;
162}
163
165bool
167{
168 for (size_t i = 0; i < str.size(); i++)
169 if (str[i] != Unicode::toupper (str[i]))
170 return false;
171 return true;
172}
173
175String
177{
178 String s (str);
179 for (size_t i = 0; i < s.size(); i++)
180 s[i] = Unicode::totitle (s[i]);
181 return s;
182}
183
185String
186string_capitalize (const String &str, size_t maxn, bool rest_tolower)
187{
188 String s (str);
189 bool wasalpha = false;
190 for (size_t i = 0; i < s.size(); i++)
191 {
192 const bool atalpha = isalpha (s[i]);
193 if (!wasalpha && atalpha)
194 {
195 if (maxn == 0)
196 break;
197 s[i] = Unicode::toupper (s[i]);
198 maxn--;
199 }
200 else if (rest_tolower)
201 s[i] = Unicode::tolower (s[i]);
202 wasalpha = atalpha;
203 }
204 return s;
205}
206
208String
210{
211 gchar *result = g_utf8_normalize (src.c_str(), src.size(), G_NORMALIZE_NFC);
212 const String ret { result ? result : "" };
213 g_free (result);
214 return ret;
215}
216
218String
220{
221 gchar *result = g_utf8_normalize (src.c_str(), src.size(), G_NORMALIZE_NFD);
222 const String ret { result ? result : "" };
223 g_free (result);
224 return ret;
225}
226
228String
230{
231 gchar *result = g_utf8_normalize (src.c_str(), src.size(), G_NORMALIZE_NFKC);
232 const String ret { result ? result : "" };
233 g_free (result);
234 return ret;
235}
236
238String
240{
241 gchar *result = g_utf8_normalize (src.c_str(), src.size(), G_NORMALIZE_NFKD);
242 const String ret { result ? result : "" };
243 g_free (result);
244 return ret;
245}
246
248String
250{
251 gchar *result = g_utf8_casefold (src.c_str(), src.size());
252 const String ret { result ? result : "" };
253 g_free (result);
254 return ret;
255}
256
258int
259string_cmp (const String &s1, const String &s2)
260{
261 return g_utf8_collate (s1.c_str(), s2.c_str());
262}
263
265int
266string_casecmp (const String &s1, const String &s2)
267{
268 const String cf1 = string_casefold (s1);
269 const String cf2 = string_casefold (s2);
270 return string_cmp (cf1, cf2);
271}
272
273#define STACK_BUFFER_SIZE 3072
274
275static inline String
276current_locale_vprintf (const char *format, va_list vargs)
277{
278 va_list pargs;
279 char buffer[STACK_BUFFER_SIZE];
280 buffer[0] = 0;
281 va_copy (pargs, vargs);
282 const int l = vsnprintf (buffer, sizeof (buffer), format, pargs);
283 va_end (pargs);
285 if (l < 0)
286 string = format; // error?
287 else if (size_t (l) < sizeof (buffer))
288 string = String (buffer, l);
289 else
290 {
291 string.resize (l + 1);
292 va_copy (pargs, vargs);
293 const int j = vsnprintf (&string[0], string.size(), format, pargs);
294 va_end (pargs);
295 string.resize (std::min (l, std::max (j, 0)));
296 }
297 return string;
298}
299
300static inline String
301posix_locale_vprintf (const char *format, va_list vargs)
302{
303 ScopedPosixLocale posix_locale; // use POSIX locale for this scope
304 return current_locale_vprintf (format, vargs);
305}
306
308String
309string_vprintf (const char *format, va_list vargs)
310{
311 return posix_locale_vprintf (format, vargs);
312}
313
315String
316string_locale_vprintf (const char *format, va_list vargs)
317{
318 return current_locale_vprintf (format, vargs);
319}
320
321static StringS
322string_whitesplit (const String &string, size_t maxn)
323{
324 static const char whitespaces[] = " \t\n\r\f\v";
325 StringS sv;
326 size_t i, l = 0;
327 for (i = 0; i < string.size() && sv.size() < maxn; i++)
328 if (strchr (whitespaces, string[i]))
329 {
330 if (i > l)
331 sv.push_back (string.substr (l, i - l));
332 l = i + 1;
333 }
334 i = string.size();
335 if (i > l)
336 sv.push_back (string.substr (l, i - l));
337 return sv;
338}
339
343string_split (const String &string, const String &splitter, size_t maxn)
344{
345 if (splitter == "")
346 return string_whitesplit (string, maxn);
347 StringS sv;
348 size_t i, l = 0, k = splitter.size();
349 for (i = 0; i < string.size() && sv.size() < maxn; i++)
350 if (string.substr (i, k) == splitter)
351 {
352 if (i >= l)
353 sv.push_back (string.substr (l, i - l));
354 l = i + k;
355 }
356 i = string.size();
357 if (i >= l)
358 sv.push_back (string.substr (l, i - l));
359 return sv;
360}
361
365string_split_any (const String &string, const String &splitchars, size_t maxn)
366{
367 StringS sv;
368 size_t i, l = 0;
369 if (splitchars.empty())
370 {
371 for (i = 0; i < string.size() && sv.size() < maxn; i++)
372 sv.push_back (string.substr (i, 1));
373 if (i < string.size())
374 sv.push_back (string.substr (i, string.size() - i));
375 }
376 else
377 {
378 const char *schars = splitchars.c_str();
379 l = 0;
380 for (i = 0; i < string.size() && sv.size() < maxn; i++)
381 if (strchr (schars, string[i]))
382 {
383 if (i >= l)
384 sv.push_back (string.substr (l, i - l));
385 l = i + 1;
386 }
387 i = string.size();
388 if (i >= l)
389 sv.push_back (string.substr (l, i - l));
390 }
391 return sv;
392}
393
395strings_version_sort (const StringS &strings, bool reverse)
396{
397 StringS dest = strings;
398 strings_version_sort (&dest, reverse);
399 return dest;
400}
401
402void
403strings_version_sort (StringS *strings, bool reverse)
404{
405 return_unless (strings);
406 std::sort (strings->begin(), strings->end(), [reverse] (auto &a, auto &b) -> bool {
407 const int c = strverscmp (a.c_str(), b.c_str());
408 return reverse ? c > 0 : c < 0;
409 });
410}
411
413void
415{
416 for (size_t i = svector.size(); i; i--)
417 {
418 const size_t idx = i - 1;
419 if (svector[idx].empty())
420 svector.erase (svector.begin() + idx);
421 }
422}
423
425void
427{
428 for (auto &s : svector)
429 s = string_lstrip (s);
430}
431
433void
435{
436 for (auto &s : svector)
437 s = string_rstrip (s);
438}
439
441void
443{
444 for (auto &s : svector)
445 s = string_strip (s);
446}
447
451String
452string_join (const String &junctor, const StringS &strvec)
453{
454 String s;
455 if (strvec.size())
456 s = strvec[0];
457 for (uint i = 1; i < strvec.size(); i++)
458 s += junctor + strvec[i];
459 return s;
460}
461
466bool
467string_to_bool (const String &string, bool fallback)
468{
469 return cstring_to_bool (string.c_str(), fallback);
470}
471
472bool
473cstring_to_bool (const char *string, bool fallback)
474{
475 if (!string)
476 return fallback;
477 const char *p = string;
478 // skip spaces
479 while (*p && isspace (*p))
480 p++;
481 // ignore signs
482 if (p[0] == '-' || p[0] == '+')
483 {
484 p++;
485 // skip spaces
486 while (*p && isspace (*p))
487 p++;
488 }
489 // handle numbers
490 if (p[0] >= '0' && p[0] <= '9')
491 return 0 != string_to_uint (p);
492 // handle special words
493 if (strncasecmp (p, "ON", 2) == 0)
494 return 1;
495 if (strncasecmp (p, "OFF", 3) == 0)
496 return 0;
497 // empty string
498 if (!p[0])
499 return fallback;
500 // anything else needs to resemble "yes" or "true"
501 return strchr ("YyTt", p[0]);
502}
503
504// The strrstr() function finds the last occurrence of the substring `needle` in the string `haystack`
505const char*
506strrstr (const char *haystack, const char *needle)
507{
508 const ssize_t n = strlen (needle);
509 return_unless (n > 0, haystack);
510 const ssize_t h = strlen (haystack);
511 return_unless (h >= n, nullptr);
512 for (ssize_t i = h - n; i >= 0; i--)
513 if (strncmp (haystack + i, needle, n) == 0)
514 return haystack + i;
515 return nullptr;
516}
517
519const char*
520string_find_word (const char *haystack, const char *word)
521{
522 return_unless (haystack && word, nullptr);
523 const size_t l = strlen (word);
524 // loop over all positions where `word` is found in `haystack`
525 for (const char *p = strstr (haystack, word); p; p = strstr (p + 1, word))
526 if ((p == haystack || !c_isalnum (p[-1])) && !c_isalnum (p[l]))
527 return p;
528 return nullptr;
529}
530
532String
534{
535 return String (value ? "1" : "0");
536}
537
539uint64
540string_to_uint (const String &string, size_t *consumed, uint base)
541{
542 const char *const start = string.c_str(), *p = start;
543 while (*p == ' ' || *p == '\n' || *p == '\t' || *p == '\r')
544 p++;
545 const bool hex = p[0] == '0' && (p[1] == 'X' || p[1] == 'x');
546 const char *const number = hex ? p + 2 : p;
547 char *endptr = NULL;
548 const uint64 result = strtoull (number, &endptr, hex ? 16 : base);
549 if (consumed)
550 {
551 if (!endptr || endptr <= number)
552 *consumed = 0;
553 else
554 *consumed = endptr - start;
555 }
556 return result;
557}
558
560String
562{
563 return string_format ("%u", value);
564}
565
567bool
568string_has_int (const String &string)
569{
570 const char *p = string.c_str();
571 while (*p == ' ' || *p == '\n' || *p == '\t' || *p == '\r')
572 p++;
573 return p[0] >= '0' && p[0] <= '9';
574}
575
577int64
578string_to_int (const String &string, size_t *consumed, uint base)
579{
580 const char *const start = string.c_str(), *p = start;
581 while (*p == ' ' || *p == '\n' || *p == '\t' || *p == '\r')
582 p++;
583 const bool negate = p[0] == '-';
584 if (negate)
585 p++;
586 const bool hex = p[0] == '0' && (p[1] == 'X' || p[1] == 'x');
587 const char *const number = hex ? p + 2 : p;
588 char *endptr = NULL;
589 const uint64_t result = strtoull (number, &endptr, hex ? 16 : base);
590 if (consumed)
591 {
592 if (!endptr || endptr <= number)
593 *consumed = 0;
594 else
595 *consumed = endptr - start;
596 }
597 if (result < 9223372036854775808ull)
598 return negate ? -int64_t (result) : result;
599 return negate ? -9223372036854775807ll - 1 : 9223372036854775808ull - 1;
600}
601
603String
605{
606 return string_format ("%d", value);
607}
608
609static long double
610libc_strtold (const char *nptr, char **endptr)
611{
612 const long double result = strtold (nptr, endptr);
613 if (std::isnan (result) && std::signbit (result) == 0)
614 {
615 const char *p = nptr;
616 while (isspace (*p))
617 p++;
618 if (strncasecmp (p, "-nan", 4) == 0)
619 return -result; // glibc-2.19 doesn't get the NAN sign right
620 }
621 return result;
622}
623
625long double
626posix_locale_strtold (const char *nptr, char **endptr)
627{
628 ScopedPosixLocale posix_locale; // use POSIX locale for this scope
629 char *fail_pos = NULL;
630 const long double val = libc_strtold (nptr, &fail_pos);
631 if (endptr)
632 *endptr = fail_pos;
633 return val;
634}
635
637long double
638current_locale_strtold (const char *nptr, char **endptr)
639{
640 char *fail_pos_1 = NULL;
641 const long double val_1 = posix_locale_strtold (nptr, &fail_pos_1);
642 if (fail_pos_1 && fail_pos_1[0] != 0)
643 {
644 char *fail_pos_2 = NULL;
645 const long double val_2 = libc_strtold (nptr, &fail_pos_2);
646 if (fail_pos_2 > fail_pos_1)
647 {
648 if (endptr)
649 *endptr = fail_pos_2;
650 return val_2;
651 }
652 }
653 if (endptr)
654 *endptr = fail_pos_1;
655 return val_1;
656}
657
659long double
661{
662 return current_locale_strtold (string.c_str(), NULL);
663}
664
666long double
667string_to_long_double (const char *dblstring, const char **endptr)
668{
669 return current_locale_strtold (dblstring, (char**) endptr);
670}
671
673double
675{
676 return current_locale_strtold (string.c_str(), NULL);
677}
678
680double
681string_to_double (const char *dblstring, const char **endptr)
682{
683 return current_locale_strtold (dblstring, (char**) endptr);
684}
685
687String
688string_from_float (float value)
689{
690 if (std::isnan (value))
691 return std::signbit (value) ? "-NaN" : "+NaN";
692 if (std::isinf (value))
693 return std::signbit (value) ? "-Infinity" : "+Infinity";
694 return string_format ("%.7g", value);
695}
696
698String
699string_from_double (double value)
700{
701 if (std::isnan (value))
702 return std::signbit (value) ? "-NaN" : "+NaN";
703 if (std::isinf (value))
704 return std::signbit (value) ? "-Infinity" : "+Infinity";
705 return string_format ("%.17g", value);
706}
707
709String
710string_from_long_double (long double value)
711{
712 if (std::isnan (value))
713 return std::signbit (value) ? "-NaN" : "+NaN";
714 if (std::isinf (value))
715 return std::signbit (value) ? "-Infinity" : "+Infinity";
716 return string_format ("%.20g", value);
717}
718
722{
724 const char *spaces = " \t\n";
725 const char *obrace = "{([";
726 const char *delims = ";";
727 const char *cbrace = "])}";
728 const char *number = "+-0123456789eE.,";
729 const char *s = string.c_str();
730 /* skip spaces */
731 while (*s && strchr (spaces, *s))
732 s++;
733 /* skip opening brace */
734 if (*s && strchr (obrace, *s))
735 s++;
736 const char *d = s;
737 while (*d && !strchr (cbrace, *d))
738 {
739 while (*d && strchr (spaces, *d)) /* skip spaces */
740 d++;
741 s = d; /* start of number */
742 if (!*d || (!strchr (number, *d) && /* ... if any */
743 !strchr (delims, *d)))
744 break;
745 while (*d && strchr (number, *d)) /* pass across number */
746 d++;
747 dvec.push_back (string_to_double (String (s, d - s)));
748 while (*d && strchr (spaces, *d)) /* skip spaces */
749 d++;
750 if (*d && strchr (delims, *d))
751 d++; /* eat delimiter */
752 }
753 // printerr ("vector: %d: %s\n", dvec.size(), string_from_double_vector (dvec).c_str());
754 return dvec;
755}
756
758String
760{
761 String s;
762 for (uint i = 0; i < dvec.size(); i++)
763 {
764 if (i > 0)
765 s += delim;
766 s += string_from_double (dvec[i]);
767 }
768 return s;
769}
770
772double
773string_to_seconds (const String &string, double fallback)
774{
775 const char *fail = nullptr;
776 const char *const cs = string.c_str();
777 double d = string_to_double (cs, &fail);
778 if (fail == cs)
779 return fallback;
780 if (!fail || fail[0] == 's')
781 return d;
782 if (strncmp (fail, "ns", 2) == 0)
783 return d * 0.000000001;
784 const char *usec = "µs";
785 if (strncmp (fail, "us", 2) == 0 || strncmp (fail, usec, strlen (usec)) == 0)
786 return d * 0.000001;
787 if (strncmp (fail, "ms", 2) == 0)
788 return d * 0.001;
789 if (fail[0] == 'm')
790 return d * 60;
791 if (fail[0] == 'h')
792 return d * 3600;
793 if (fail[0] == 'd')
794 return d * 3600 * 24;
795 if (fail[0] == 'w')
796 return d * 3600 * 24 * 7;
797 return d; // treat as seconds
798}
799
801String
802string_from_errno (int errno_val)
803{
804 if (errno_val < 0)
805 errno_val = -errno_val; // fixup library return values
806 char buffer[1024] = { 0, };
807 const char *errstr = strerror_r (errno_val, buffer, sizeof (buffer));
808 if (!errstr || !errstr[0]) // fallback for possible strerror_r breakage encountered on _GNU_SOURCE systems
809 return strerror (errno_val);
810 return errstr;
811}
812
814bool
815string_is_uuid (const String &uuid_string) /* check uuid formatting */
816{
817 int i, l = uuid_string.size();
818 if (l != 36)
819 return false;
820 // 00000000-0000-0000-0000-000000000000
821 for (i = 0; i < l; i++)
822 if (i == 8 || i == 13 || i == 18 || i == 23)
823 {
824 if (uuid_string[i] != '-')
825 return false;
826 continue;
827 }
828 else if ((uuid_string[i] >= '0' && uuid_string[i] <= '9') ||
829 (uuid_string[i] >= 'a' && uuid_string[i] <= 'f') ||
830 (uuid_string[i] >= 'A' && uuid_string[i] <= 'F'))
831 continue;
832 else
833 return false;
834 return true;
835}
836
838int
839string_cmp_uuid (const String &uuid_string1, const String &uuid_string2)
840{
841 return strcasecmp (uuid_string1.c_str(), uuid_string2.c_str()); /* good enough for numeric equality and defines stable order */
842}
843
845bool
846string_startswith (const String &string, const String &fragment)
847{
848 return fragment.size() <= string.size() && 0 == string.compare (0, fragment.size(), fragment);
849}
850
852bool
853string_startswith (const String &string, const StringS &fragments)
854{
855 for (const String &frag : fragments)
856 if (string_startswith (string, frag))
857 return true;
858 return false;
859}
860
862bool
863string_endswith (const String &string, const String &fragment)
864{
865 return fragment.size() <= string.size() && 0 == string.compare (string.size() - fragment.size(), fragment.size(), fragment);
866}
867
869bool
870string_endswith (const String &string, const StringS &fragments)
871{
872 for (const String &frag : fragments)
873 if (string_endswith (string, frag))
874 return true;
875 return false;
876}
877
878static inline char
879identifier_char_canon (char c)
880{
881 if (c >= '0' && c <= '9')
882 return c;
883 else if (c >= 'A' && c <= 'Z')
884 return c - 'A' + 'a';
885 else if (c >= 'a' && c <= 'z')
886 return c;
887 else
888 return '-';
889}
890
891static inline bool
892identifier_match (const char *str1, const char *str2)
893{
894 while (*str1 && *str2)
895 {
896 const uint8 s1 = identifier_char_canon (*str1++);
897 const uint8 s2 = identifier_char_canon (*str2++);
898 if (s1 != s2)
899 return false;
900 }
901 return *str1 == 0 && *str2 == 0;
902}
903
904static bool
905match_identifier_detailed (const String &ident, const String &tail)
906{
907 assert_return (ident.size() >= tail.size(), false);
908 const char *word = ident.c_str() + ident.size() - tail.size();
909 if (word > ident.c_str()) // allow partial matches on word boundary only
910 {
911 if (c_isalnum (word[-1]) && c_isalnum (word[0])) // no word boundary
912 return false;
913 }
914 return identifier_match (word, tail.c_str());
915}
916
918bool
919string_match_identifier_tail (const String &ident, const String &tail)
920{
921 return ident.size() >= tail.size() && match_identifier_detailed (ident, tail);
922}
923
925bool
926string_match_identifier (const String &ident1, const String &ident2)
927{
928 return ident1.size() == ident2.size() && match_identifier_detailed (ident1, ident2);
929}
930
933String
934string_from_pretty_function_name (const char *cxx_pretty_function)
935{
936 // get rid of g++'s anon prefixes
937 const String p1 = string_replace (cxx_pretty_function, "{anonymous}::", "");
938 // get rid of clang++'s anon prefixes
939 const String p2 = string_replace (p1, "(anonymous namespace)::", "");
940 const char *const pretty_function = p2.c_str();
941 /* finding the function name is non-trivial in the presence of function pointer
942 * return types. the following code assumes the function name preceedes the
943 * first opening parenthesis not followed by a star.
944 */
945 const char *op = strchr (pretty_function, '(');
946 while (op && op[1] == '*')
947 op = strchr (op + 1, '(');
948 if (!op)
949 return pretty_function;
950 // *op == '(' && op[1] != '*'
951 const char *last = op - 1;
952 while (last >= pretty_function && strchr (" \t\n", *last))
953 last--; // skip spaces before '('
954 if (last < pretty_function)
955 return pretty_function;
956 // scan across function name characters
957 const char *first = last;
958 while (first >= pretty_function && strchr ("0123456789_ABCDEFGHIJKLMNOPQRSTUVWXYZ:abcdefghijklmnopqrstuvwxyz$", *first))
959 first--;
960 String result = String (first + 1, last - first);
961 return result;
962}
964String
965string_url_decode (const String &urlstr, const bool form_url_encoded)
966{
967 String s;
968 s.reserve (urlstr.size());
969 for (size_t i = 0; i < urlstr.size(); i++)
970 {
971 const char c = urlstr[i];
972 if (c == '%' && isxdigit (urlstr[i+1]) && isxdigit (urlstr[i+2]))
973 {
974 const char buf[3] = { urlstr[i+1], urlstr[i+2], 0 };
975 const long l = strtol (buf, nullptr, 16);
976 s.push_back (char (l));
977 i += 2;
978 }
979 else if (form_url_encoded && c == '+')
980 s.push_back (' ');
981 else
982 s.push_back (c);
983 }
984 return s;
985}
986
988String
989string_url_encode (const String &rawstr, const bool form_url_encoded)
990{
991 String s;
992 s.reserve (rawstr.size());
993 const char *const unescaped = /*unreserved:*/ "-._~" /*ok-in-FF:*/ "[]!()*";
994 for (const uint8_t c : rawstr)
995 if (isalnum (c) || strchr (unescaped, c))
996 s.push_back (c);
997 else if (form_url_encoded && c == ' ')
998 s.push_back ('+');
999 else
1000 {
1001 const char *const hex = "0123456789ABCDEF";
1002 const char buf[4] = { '%', hex[c >> 4], hex[c & 0x0f], 0 };
1003 s += buf;
1004 }
1005 return s;
1006}
1007
1010String
1012{
1013 String buffer;
1014 for (String::const_iterator it = str.begin(); it != str.end(); it++)
1015 {
1016 const uint8 d = *it;
1017 if (d == '\a') buffer += "\\a";
1018 else if (d == '\b') buffer += "\\b";
1019 else if (d == '\t') buffer += "\\t";
1020 else if (d == '\n') buffer += "\\n";
1021 else if (d == '\v') buffer += "\\v";
1022 else if (d == '\f') buffer += "\\f";
1023 else if (d == '\r') buffer += "\\r";
1024 else if (d == '"') buffer += "\\\"";
1025 else if (d == '\\') buffer += "\\\\";
1026 else if (d < 32 || d > 126)
1027 buffer += string_format ("\\%03o", d);
1028 else
1029 buffer += d;
1030 }
1031 return buffer;
1032}
1033
1035String
1037{
1038 return String() + "\"" + string_to_cescape (str) + "\"";
1039}
1040
1042String
1044{
1045 uint i = 0;
1046 if (i < input.size() && (input[i] == '"' || input[i] == '\''))
1047 {
1048 const char qchar = input[i];
1049 i++;
1050 String out;
1051 bool be = false;
1052 while (i < input.size() && (input[i] != qchar || be))
1053 {
1054 if (!be && input[i] == '\\')
1055 be = true;
1056 else
1057 {
1058 if (be)
1059 switch (input[i])
1060 {
1061 uint k, oc;
1062 case '0': case '1': case '2': case '3':
1063 case '4': case '5': case '6': case '7':
1064 k = MIN (input.size(), i + 3);
1065 oc = input[i++] - '0';
1066 while (i < k && input[i] >= '0' && input[i] <= '7')
1067 oc = oc * 8 + input[i++] - '0';
1068 out += char (oc);
1069 i--;
1070 break;
1071 case 'a': out += '\a'; break;
1072 case 'n': out += '\n'; break;
1073 case 'r': out += '\r'; break;
1074 case 't': out += '\t'; break;
1075 case 'b': out += '\b'; break;
1076 case 'f': out += '\f'; break;
1077 case 'v': out += '\v'; break;
1078 default: out += input[i]; break;
1079 }
1080 else
1081 out += input[i];
1082 be = false;
1083 }
1084 i++;
1085 }
1086 if (i < input.size() && input[i] == qchar)
1087 {
1088 i++;
1089 if (i < input.size())
1090 return input; // extraneous characters after string quotes
1091 return out;
1092 }
1093 else
1094 return input; // unclosed string quotes
1095 }
1096 else if (i == input.size())
1097 return input; // empty string arg: ""
1098 else
1099 return input; // missing string quotes
1100}
1101
1102static const char *whitespaces = " \t\v\f\n\r";
1103
1105String
1106string_lstrip (const String &input)
1107{
1108 uint64 i = 0;
1109 while (i < input.size() && strchr (whitespaces, input[i]))
1110 i++;
1111 return i ? input.substr (i) : input;
1112}
1113
1115String
1116string_rstrip (const String &input)
1117{
1118 uint64 i = input.size();
1119 while (i > 0 && strchr (whitespaces, input[i - 1]))
1120 i--;
1121 return i < input.size() ? input.substr (0, i) : input;
1122}
1123
1125String
1126string_strip (const String &input)
1127{
1128 uint64 a = 0;
1129 while (a < input.size() && strchr (whitespaces, input[a]))
1130 a++;
1131 uint64 b = input.size();
1132 while (b > 0 && strchr (whitespaces, input[b - 1]))
1133 b--;
1134 if (a == 0 && b == input.size())
1135 return input;
1136 else if (b == 0)
1137 return "";
1138 else
1139 return input.substr (a, b - a);
1140}
1141
1143String
1144string_replace (const String &input, const String &marker, const String &replacement, size_t maxn)
1145{
1146 String s = input;
1147 size_t i = s.find (marker);
1148 while (i != String::npos && maxn-- > 0)
1149 {
1150 s = s.substr (0, i) + replacement + s.substr (i + marker.size());
1151 i = s.find (marker, i + replacement.size());
1152 }
1153 return s;
1154}
1155
1156
1158String
1159string_substitute_char (const String &input, const char match, const char subst)
1160{
1161 String output = input;
1162 if (match != subst)
1163 for (String::size_type i = 0; i < output.size(); i++)
1164 if (output.data()[i] == match)
1165 output[i] = subst; // unshares string
1166 return output;
1167}
1168
1170String
1171string_to_hex (const String &input)
1172{
1173 String s;
1174 s.reserve (input.size() * 2);
1175 for (const char &c : input)
1176 s += string_format ("%02x", uint8_t (c));
1177 return s;
1178}
1179
1183String
1184string_hexdump (const void *addr, size_t length, size_t initial_offset)
1185{
1186 // 000000d0 00 34 00 00 08 00 00 00 40 00 00 00 61 00 00 00 |.4......@...a...|
1187 const unsigned char *data = (const unsigned char*) addr;
1188 size_t i;
1189 String out, cx, cc = "|";
1190 for (i = 0; i < length;)
1191 {
1192 if (i % 8 == 0)
1193 cx += " ";
1194 cx += string_format (" %02x", data[i]);
1195 cc += string_format ("%c", data[i] < ' ' || data[i] > '~' ? '.' : data[i]);
1196 i++;
1197 if (i && i % 16 == 0)
1198 {
1199 cc += "|";
1200 out += string_format ("%08x%s %s\n", initial_offset + i - 16, cx.c_str(), cc.c_str());
1201 cx = "";
1202 cc = "|";
1203 }
1204 }
1205 if (i % 16)
1206 {
1207 for (; i % 16; i++)
1208 {
1209 if (i % 8 == 0)
1210 cx += " ";
1211 cx += " ";
1212 }
1213 cc += "|";
1214 out += string_format ("%08x%s %s\n", initial_offset + i - 16, cx.c_str(), cc.c_str());
1215 }
1216 return out;
1217}
1218
1220void
1221memset4 (uint32 *mem, uint32 filler, uint length)
1222{
1223 static_assert (sizeof (*mem) == 4, "");
1224 static_assert (sizeof (filler) == 4, "");
1225 static_assert (sizeof (wchar_t) == 4, "");
1226 wmemset ((wchar_t*) mem, filler, length);
1227}
1228
1234String
1235string_vector_find (const StringS &svector, const String &prefix, const String &fallback)
1236{
1237 for (size_t i = svector.size(); i > 0; i--)
1238 {
1239 const String &s = svector[i-1];
1240 if (s.size() >= prefix.size() && strncmp (s.data(), prefix.data(), prefix.size()) == 0)
1241 return s;
1242 }
1243 return fallback;
1244}
1245
1251String
1252string_vector_find_value (const StringS &svector, const String &prefix, const String &fallback)
1253{
1254 for (size_t i = svector.size(); i > 0; i--)
1255 {
1256 const String &s = svector[i-1];
1257 if (s.size() >= prefix.size() && strncmp (s.data(), prefix.data(), prefix.size()) == 0)
1258 return s.substr (prefix.size());
1259 }
1260 return fallback;
1261}
1262
1264StringS
1265cstrings_to_vector (const char *s, ...)
1266{
1267 StringS sv;
1268 if (s)
1269 {
1270 sv.push_back (s);
1271 va_list args;
1272 va_start (args, s);
1273 s = va_arg (args, const char*);
1274 while (s)
1275 {
1276 sv.push_back (s);
1277 s = va_arg (args, const char*);
1278 }
1279 va_end (args);
1280 }
1281 return sv;
1282}
1283
1284// == Key=Value Pairs ==
1285String
1286kvpair_key (const String &key_value_pair)
1287{
1288 const char *const eq = strchr (key_value_pair.c_str(), '=');
1289 return eq ? key_value_pair.substr (0, eq - key_value_pair.c_str()) : key_value_pair;
1290}
1291
1292String
1293kvpair_value (const String &key_value_pair)
1294{
1295 const char *const eq = strchr (key_value_pair.c_str(), '=');
1296 return eq ? key_value_pair.substr (eq - key_value_pair.c_str() + 1) : "";
1297}
1298
1299String
1300kvpairs_fetch (const StringS &kvs, const String &key, bool casesensitive)
1301{
1302 const ssize_t i = kvpairs_search (kvs, key, casesensitive);
1303 return i >= 0 ? kvs[i].data() + key.size() + 1 : "";
1304}
1305
1306ssize_t
1307kvpairs_search (const StringS &kvs, const String &k, const bool casesensitive)
1308{
1309 const size_t l = k.size();
1310 for (size_t i = 0; i < kvs.size(); i++)
1311 if (kvs[i].size() > l && kvs[i][l] == '=') {
1312 if (casesensitive) {
1313 if (strncmp (kvs[i].data(), k.data(), l) == 0)
1314 return i;
1315 } else { // !casesensitive
1316 if (strncasecmp (kvs[i].data(), k.data(), l) == 0)
1317 return i;
1318 }
1319 }
1320 return -1;
1321}
1322
1323bool
1324kvpairs_assign (StringS &kvs, const String &key_value_pair, bool casesensitive)
1325{
1326 const char *const eq = strchr (key_value_pair.c_str(), '=');
1327 const String key = eq ? key_value_pair.substr (0, eq - key_value_pair.c_str()) : "";
1328 assert_return (key.size() > 0, false);
1329 const ssize_t i = kvpairs_search (kvs, key, casesensitive);
1330 if (key_value_pair.size() == key.size() + 1 && key_value_pair[key_value_pair.size()-1] == '=') {
1331 // value is empty
1332 if (i >= 0)
1333 kvs.erase (kvs.begin() + i); // empty value, erase
1334 } else if (i >= 0)
1335 kvs[i] = key_value_pair; // replace
1336 else
1337 kvs.push_back (key_value_pair); // insert
1338 return i >= 0; // replaced old value
1339}
1340
1341// === String Options ===
1342static bool is_separator (char c) { return c == ';' || c == ':'; }
1343
1344static const char*
1345find_option (const char *haystack, const char *const needle, const size_t l, const int allowoption)
1346{
1347 const char *match = nullptr;
1348 for (const char *c = strcasestr (haystack, needle); c; c = strcasestr (c + 1, needle))
1349 if (!allowoption &&
1350 (c[l] == 0 && is_separator (c[l])) &&
1351 ((c == haystack + 3 && strncasecmp (haystack, "no-", 3) == 0) ||
1352 (c >= haystack + 4 && is_separator (haystack[0]) && strncasecmp (haystack + 1, "no-", 3) == 0)))
1353 match = c;
1354 else if (allowoption &&
1355 ((allowoption >= 2 && c[l] == '=') || c[l] == 0 || is_separator (c[l])) &&
1356 (c == haystack || is_separator (c[-1])))
1357 match = c;
1358 return match;
1359}
1360
1361static size_t
1362separator_strlen (const char *const s)
1363{
1364 const char *c = s;
1365 while (c[0] && !is_separator (c[0]))
1366 c++;
1367 return c - s;
1368}
1369
1370static std::string_view
1371string_option_find_value (const char *string, const char *feature, const char *fallback, const char *denied, const int matching)
1372{
1373 if (!string || !feature || !string[0] || !feature[0])
1374 return { fallback, strlen (fallback) }; // not-found
1375 const size_t l = strlen (feature);
1376 const char *match = find_option (string, feature, l, 2); // .prio=2
1377 if (matching >= 2) {
1378 const char *deny = find_option (string, feature, l, 0); // .prio=1
1379 if (deny > match)
1380 return { denied, strlen (denied) }; // denied
1381 }
1382 if (match && match[l] == '=')
1383 return { match + l + 1, separator_strlen (match + l + 1) }; // value
1384 if (match)
1385 return { "1", 1 }; // allowed
1386 if (matching >= 3) {
1387 if (find_option (string, "all", 3, 1)) // .prio=3
1388 return { "1", 1 }; // allowed
1389 if (find_option (string, "none", 4, 1)) // .prio=4
1390 return { denied, strlen (denied) }; // denied
1391 }
1392 return { fallback, strlen (fallback) }; // not-found
1393}
1394
1397string_option_find_value (const char *string, const char *feature, const char *fallback, const char *denied, bool matchallnone)
1398{
1399 return string_option_find_value (string, feature, fallback, denied, matchallnone ? 3 : 2);
1400}
1401
1403String
1404string_option_find (const String &optionlist, const String &feature, const String &fallback)
1405{
1406 std::string_view sv = string_option_find_value (optionlist.data(), feature.data(), fallback.data(), "0", 3);
1407 return { sv.data(), sv.size() };
1408}
1409
1411bool
1412string_option_check (const String &optionlist, const String &feature)
1413{
1414 return string_to_bool (string_option_find (optionlist, feature, "0"), true);
1415}
1416
1417// == Strings ==
1418Strings::Strings (CS &s1)
1419{ push_back (s1); }
1420Strings::Strings (CS &s1, CS &s2)
1421{ push_back (s1); push_back (s2); }
1422Strings::Strings (CS &s1, CS &s2, CS &s3)
1423{ push_back (s1); push_back (s2); push_back (s3); }
1424Strings::Strings (CS &s1, CS &s2, CS &s3, CS &s4)
1425{ push_back (s1); push_back (s2); push_back (s3); push_back (s4); }
1426Strings::Strings (CS &s1, CS &s2, CS &s3, CS &s4, CS &s5)
1427{ push_back (s1); push_back (s2); push_back (s3); push_back (s4); push_back (s5); }
1428Strings::Strings (CS &s1, CS &s2, CS &s3, CS &s4, CS &s5, CS &s6)
1429{ push_back (s1); push_back (s2); push_back (s3); push_back (s4); push_back (s5); push_back (s6); }
1430Strings::Strings (CS &s1, CS &s2, CS &s3, CS &s4, CS &s5, CS &s6, CS &s7)
1431{ push_back (s1); push_back (s2); push_back (s3); push_back (s4); push_back (s5); push_back (s6); push_back (s7); }
1432Strings::Strings (CS &s1, CS &s2, CS &s3, CS &s4, CS &s5, CS &s6, CS &s7, CS &s8)
1433{ push_back (s1); push_back (s2); push_back (s3); push_back (s4); push_back (s5); push_back (s6);
1434 push_back (s7); push_back (s8); }
1435Strings::Strings (CS &s1, CS &s2, CS &s3, CS &s4, CS &s5, CS &s6, CS &s7, CS &s8, CS &s9)
1436{ push_back (s1); push_back (s2); push_back (s3); push_back (s4); push_back (s5); push_back (s6);
1437 push_back (s7); push_back (s8); push_back (s9); }
1438Strings::Strings (CS &s1, CS &s2, CS &s3, CS &s4, CS &s5, CS &s6, CS &s7, CS &s8, CS &s9, CS &sA)
1439{ push_back (s1); push_back (s2); push_back (s3); push_back (s4); push_back (s5); push_back (s6);
1440 push_back (s7); push_back (s8); push_back (s9); push_back (sA); }
1441Strings::Strings (CS &s1, CS &s2, CS &s3, CS &s4, CS &s5, CS &s6, CS &s7, CS &s8, CS &s9, CS &sA, CS &sB)
1442{ push_back (s1); push_back (s2); push_back (s3); push_back (s4); push_back (s5); push_back (s6);
1443 push_back (s7); push_back (s8); push_back (s9); push_back (sA); push_back (sB); }
1444Strings::Strings (CS &s1, CS &s2, CS &s3, CS &s4, CS &s5, CS &s6, CS &s7, CS &s8, CS &s9, CS &sA, CS &sB, CS &sC)
1445{ push_back (s1); push_back (s2); push_back (s3); push_back (s4); push_back (s5); push_back (s6);
1446 push_back (s7); push_back (s8); push_back (s9); push_back (sA); push_back (sB); push_back (sC); }
1447
1448// === Charset Conversions ===
1449static bool
1450unalias_encoding (String &name)
1451{
1452 /* list of common aliases for MIME encodings */
1453 static const char *encoding_aliases[] = {
1454 /* alias MIME (GNU CANONICAL) */
1455 "UTF8", "UTF-8",
1456 /* ascii */
1457 "646", "ASCII",
1458 "ISO_646.IRV:1983", "ASCII",
1459 "CP20127", "ASCII",
1460 /* iso8859 aliases */
1461 "LATIN1", "ISO-8859-1",
1462 "LATIN2", "ISO-8859-2",
1463 "LATIN3", "ISO-8859-3",
1464 "LATIN4", "ISO-8859-4",
1465 "LATIN5", "ISO-8859-9",
1466 "LATIN6", "ISO-8859-10",
1467 "LATIN7", "ISO-8859-13",
1468 "LATIN8", "ISO-8859-14",
1469 "LATIN9", "ISO-8859-15",
1470 "LATIN10", "ISO-8859-16",
1471 "ISO8859-1", "ISO-8859-1",
1472 "ISO8859-2", "ISO-8859-2",
1473 "ISO8859-3", "ISO-8859-3",
1474 "ISO8859-4", "ISO-8859-4",
1475 "ISO8859-5", "ISO-8859-5",
1476 "ISO8859-6", "ISO-8859-6",
1477 "ISO8859-7", "ISO-8859-7",
1478 "ISO8859-8", "ISO-8859-8",
1479 "ISO8859-9", "ISO-8859-9",
1480 "ISO8859-13", "ISO-8859-13",
1481 "ISO8859-15", "ISO-8859-15",
1482 "CP28591", "ISO-8859-1",
1483 "CP28592", "ISO-8859-2",
1484 "CP28593", "ISO-8859-3",
1485 "CP28594", "ISO-8859-4",
1486 "CP28595", "ISO-8859-5",
1487 "CP28596", "ISO-8859-6",
1488 "CP28597", "ISO-8859-7",
1489 "CP28598", "ISO-8859-8",
1490 "CP28599", "ISO-8859-9",
1491 "CP28603", "ISO-8859-13",
1492 "CP28605", "ISO-8859-15",
1493 /* EUC aliases */
1494 "eucCN", "GB2312",
1495 "IBM-eucCN", "GB2312",
1496 "dechanzi", "GB2312",
1497 "eucJP", "EUC-JP",
1498 "IBM-eucJP", "EUC-JP",
1499 "sdeckanji", "EUC-JP",
1500 "eucKR", "EUC-KR",
1501 "IBM-eucKR", "EUC-KR",
1502 "deckorean", "EUC-KR",
1503 "eucTW", "EUC-TW",
1504 "IBM-eucTW", "EUC-TW",
1505 "CNS11643", "EUC-TW",
1506 "CP20866", "KOI8-R",
1507 /* misc */
1508 "PCK", "SHIFT_JIS",
1509 "SJIS", "SHIFT_JIS",
1510 };
1511 /* find a MIME encoding from alias list */
1512 for (uint i = 0; i < sizeof (encoding_aliases) / sizeof (encoding_aliases[0]); i += 2)
1513 if (strcasecmp (encoding_aliases[i], name.c_str()) == 0)
1514 {
1515 name = encoding_aliases[i + 1];
1516 return true;
1517 }
1518 /* last resort, try upper-casing the encoding name */
1519 String upper = name;
1520 for (uint i = 0; i < upper.size(); i++)
1521 if (upper[i] >= 'a' && upper[i] <= 'z')
1522 upper[i] += 'A' - 'a';
1523 if (upper != name)
1524 {
1525 name = upper;
1526 return true;
1527 }
1528 /* alias not found */
1529 return false;
1530}
1531
1532static iconv_t
1533aliased_iconv_open (const String &tocode,
1534 const String &fromcode)
1535{
1536 const iconv_t icNONE = (iconv_t) -1;
1537 iconv_t cd = iconv_open (tocode.c_str(), fromcode.c_str());
1538 if (cd != icNONE)
1539 return cd;
1540 /* lookup destination encoding from alias and retry */
1541 String to_encoding = tocode;
1542 if (unalias_encoding (to_encoding))
1543 {
1544 cd = iconv_open (to_encoding.c_str(), fromcode.c_str());
1545 if (cd != icNONE)
1546 return cd;
1547 /* lookup source and destination encoding from alias and retry */
1548 String from_encoding = fromcode;
1549 if (unalias_encoding (from_encoding))
1550 {
1551 cd = iconv_open (to_encoding.c_str(), from_encoding.c_str());
1552 if (cd != icNONE)
1553 return cd;
1554 }
1555 }
1556 /* lookup source encoding from alias and retry */
1557 String from_encoding = fromcode;
1558 if (unalias_encoding (from_encoding))
1559 {
1560 cd = iconv_open (tocode.c_str(), from_encoding.c_str());
1561 if (cd != icNONE)
1562 return cd;
1563 }
1564 return icNONE; /* encoding not found */
1565}
1566
1572bool
1573text_convert (const String &to_charset,
1574 String &output_string,
1575 const String &from_charset,
1576 const String &input_string,
1577 const String &fallback_charset,
1578 const String &output_mark)
1579{
1580 output_string = "";
1581 const iconv_t icNONE = (iconv_t) -1;
1582 iconv_t alt_cd = icNONE, cd = aliased_iconv_open (to_charset, from_charset);
1583 if (cd == icNONE)
1584 return false; /* failed to perform the requested conversion */
1585 const char *iptr = input_string.c_str();
1586 size_t ilength = input_string.size();
1587 char obuffer[1024]; /* declared outside loop to spare re-initialization */
1588 String alt_charset = fallback_charset;
1589 while (ilength)
1590 {
1591 /* convert */
1592 char *optr = obuffer;
1593 size_t olength = sizeof (obuffer);
1594 size_t n = iconv (cd, const_cast<char**> (&iptr), &ilength, &optr, &olength);
1595 /* transfer output */
1596 output_string.append (obuffer, optr - obuffer);
1597 /* handle conversion errors */
1598 if (ilength && /* ignore past end errors */
1599 n == (size_t) -1)
1600 {
1601 if (errno == EINVAL || /* unfinished multibyte sequences follows (near end of string) */
1602 errno == EILSEQ) /* invalid multibyte sequence follows */
1603 {
1604 /* open alternate converter */
1605 if (alt_cd == icNONE && alt_charset.size())
1606 {
1607 alt_cd = aliased_iconv_open (to_charset, alt_charset);
1608 alt_charset = ""; /* don't retry iconv_open() */
1609 }
1610 size_t former_ilength = ilength;
1611 if (alt_cd != icNONE)
1612 {
1613 /* convert from alt_charset */
1614 optr = obuffer;
1615 olength = sizeof (obuffer);
1616 n = iconv (alt_cd, const_cast<char**> (&iptr), &ilength, &optr, &olength);
1617 (void) n;
1618 /* transfer output */
1619 output_string.append (obuffer, optr - obuffer);
1620 }
1621 if (ilength == former_ilength)
1622 {
1623 /* failed alternate conversion, mark invalid character */
1624 output_string += output_mark;
1625 iptr++;
1626 ilength--;
1627 }
1628 }
1629 else /* all other errors are considered fatal */
1630 return false; /* failed to perform the requested conversion */
1631 }
1632 }
1633 iconv_close (cd);
1634 if (alt_cd != icNONE)
1635 iconv_close (alt_cd);
1636 return true;
1637
1638}
1639
1641const char*
1642strerror (int errno_num)
1643{
1644 const int old_errno = errno;
1645 const char *result;
1646 {
1647 ScopedPosixLocale posix_locale; // use POSIX locale for this scope
1648 result = ::strerror (errno_num);
1649 }
1650 errno = old_errno;
1651 return result;
1652}
1653
1654const char*
1655strerror()
1656{
1657 return strerror (errno);
1658}
1659
1660} // Ase
1661
1662#include "testing.hh"
1663
1664namespace { // Anon
1665using namespace Ase;
1666
1667TEST_INTEGRITY (string_tests);
1668static void
1669string_tests()
1670{
1671 const char *s;
1672 s = "abcabc"; TASSERT (strrstr (s, "bc") == s + 4);
1673 TASSERT (Ase::kvpair_key ("foo=bar=baz") == "foo");
1674 TASSERT (Ase::kvpair_value ("foo=bar=baz") == "bar=baz");
1675 StringS sv;
1676 sv = string_split_any ("a, b, c", ", ");
1677 TCMP (string_join (";", sv), ==, "a;;b;;c");
1678 sv = string_split_any ("a, b, c", ", ", 1);
1679 TCMP (string_join (";", sv), ==, "a; b, c");
1680 sv = string_split_any ("abcdef", "");
1681 TCMP (string_join (";", sv), ==, "a;b;c;d;e;f");
1682 sv = string_split_any ("abcdef", "", 2);
1683 TCMP (string_join (";", sv), ==, "a;b;cdef");
1684 sv = string_split_any (" foo , bar , \t\t baz \n", ",");
1685 TCMP (string_join (";", sv), ==, " foo ; bar ; \t\t baz \n");
1686 TASSERT (string_option_check (":foo:", "foo") == true);
1687 TASSERT (string_option_check (":foo9:", "foo9") == true);
1688 TASSERT (string_option_check (":foo7:", "foo9") == false);
1689 TASSERT (string_option_check (":bar:", "bar") == true);
1690 TASSERT (string_option_check (":bar=:", "bar") == true);
1691 TASSERT (string_option_find (":bar:", "bar") == "1");
1692 TASSERT (string_option_check (":bar=0:", "bar") == false);
1693 TASSERT (string_option_find (":bar=0:", "bar") == "0");
1694 TASSERT (string_option_find (":no-bar:", "bar") == "");
1695 TASSERT (string_option_check (":bar=no:", "bar") == false);
1696 TASSERT (string_option_check (":bar=false:", "bar") == false);
1697 TASSERT (string_option_check (":bar=off:", "bar") == false);
1698 TASSERT (string_option_check (":bar=1:", "bar") == true);
1699 TASSERT (string_option_check (":bar=2:", "bar") == true);
1700 TASSERT (string_option_check (":bar=3:", "bar") == true);
1701 TASSERT (string_option_check (":bar=4:", "bar") == true);
1702 TASSERT (string_option_check (":bar=5:", "bar") == true);
1703 TASSERT (string_option_check (":bar=6:", "bar") == true);
1704 TASSERT (string_option_check (":bar=7:", "bar") == true);
1705 TASSERT (string_option_check (":bar=8:", "bar") == true);
1706 TASSERT (string_option_check (":bar=9:", "bar") == true);
1707 TASSERT (string_option_check (":bar=09:", "bar") == true);
1708 TASSERT (string_option_check (":bar=yes:", "bar") == true);
1709 TASSERT (string_option_check (":bar=true:", "bar") == true);
1710 TASSERT (string_option_check (":bar=on:", "bar") == true);
1711 TASSERT (string_option_check (":bar=1false:", "bar") == true);
1712 TASSERT (string_option_check (":bar=0true:", "bar") == false);
1713 TASSERT (string_option_check (":foo:", "foo") == true);
1714 TASSERT (string_option_check (":foo9:", "foo9") == true);
1715 TASSERT (string_option_check (":foo7:", "foo9") == false);
1716 TASSERT (string_option_check (":bar:", "bar") == true);
1717 TASSERT (string_option_check (":bar=:", "bar") == true);
1718 TASSERT (string_option_check (":bar=0:", "bar") == false);
1719 TASSERT (string_option_check (":bar=no:", "bar") == false);
1720 TASSERT (string_option_check (":bar=false:", "bar") == false);
1721 TASSERT (string_option_check (":bar=off:", "bar") == false);
1722 TASSERT (string_option_check (":bar=1:", "bar") == true);
1723 TASSERT (string_option_check (":bar=2:", "bar") == true);
1724 TASSERT (string_option_check (":bar=3:", "bar") == true);
1725 TASSERT (string_option_check (":bar=4:", "bar") == true);
1726 TASSERT (string_option_check (":bar=5:", "bar") == true);
1727 TASSERT (string_option_check (":bar=6:", "bar") == true);
1728 TASSERT (string_option_check (":bar=7:", "bar") == true);
1729 TASSERT (string_option_check (":bar=8:", "bar") == true);
1730 TASSERT (string_option_check (":bar=9:", "bar") == true);
1731 TASSERT (string_option_check (":bar=09:", "bar") == true);
1732 TASSERT (string_option_check (":bar=yes:", "bar") == true);
1733 TASSERT (string_option_check (":bar=true:", "bar") == true);
1734 TASSERT (string_option_check (":bar=on:", "bar") == true);
1735 TASSERT (string_option_check (":bar=1false:", "bar") == true);
1736 TASSERT (string_option_check (":bar=0true:", "bar") == false);
1737 String r;
1738 r = string_option_find ("a:b", "a"); TCMP (r, ==, "1");
1739 r = string_option_find ("a:b", "b"); TCMP (r, ==, "1");
1740 r = string_option_find ("a:b", "c", "0"); TCMP (r, ==, "0");
1741 r = string_option_find ("a:b", "c", "7"); TCMP (r, ==, "7");
1742 r = string_option_find ("a:no-b", "b", "0"); TCMP (r, ==, "0");
1743 r = string_option_find ("no-a:b", "a", "0"); TCMP (r, ==, "0");
1744 r = string_option_find ("no-a:b:a", "a"); TCMP (r, ==, "1");
1745 r = string_option_find ("no-a:b:a=5", "a"); TCMP (r, ==, "5");
1746 r = string_option_find ("no-a:b:a=5:c", "a"); TCMP (r, ==, "5");
1747 bool b;
1748 b = string_option_check ("", "a"); TCMP (b, ==, false);
1749 b = string_option_check ("a:b:c", "a"); TCMP (b, ==, true);
1750 b = string_option_check ("no-a:b:c", "a"); TCMP (b, ==, false);
1751 b = string_option_check ("no-a:b:a=5:c", "b"); TCMP (b, ==, true);
1752 b = string_option_check ("x:all", ""); TCMP (b, ==, false); // must have feature?
1753 TASSERT (typeid_name<int>() == String ("int"));
1754 TASSERT (typeid_name<bool>() == String ("bool"));
1755 TASSERT (typeid_name<::Ase::Strings>() == String ("Ase::Strings"));
1756 TASSERT (string_from_double (1.0) == "1");
1757 TASSERT (string_from_double (-1.0) == "-1");
1758 TASSERT (string_from_double (0.0) == "0");
1759 TASSERT (string_from_double (0.5) == "0.5");
1760 TASSERT (string_from_double (-0.5) == "-0.5");
1761 TASSERT (string_to_int ("-1") == -1);
1762 TASSERT (string_to_int ("9223372036854775807") == 9223372036854775807LL);
1763 TASSERT (string_to_int ("-9223372036854775808") == -9223372036854775807LL - 1);
1764 TASSERT (string_to_uint ("0") == 0);
1765 TASSERT (string_to_uint ("1") == 1);
1766 TASSERT (string_to_uint ("18446744073709551615") == 18446744073709551615ULL);
1767 TASSERT (string_to_bool ("0") == false);
1768 TASSERT (string_to_bool ("1") == true);
1769 TASSERT (string_to_bool ("true") == true);
1770 TASSERT (string_to_bool ("false") == false);
1771 TASSERT (string_to_bool ("on") == 1);
1772 TASSERT (string_to_bool ("off") == 0);
1773 TCMP (string_to_cquote ("\""), ==, "\"\\\"\"");
1774 TCMP (string_to_cquote ("\1"), ==, "\"\\001\"");
1775 TCMP (string_to_cquote ("A\nB"), ==, "\"A\\nB\"");
1776 TASSERT (string_startswith ("foo", "fo") == true);
1777 TASSERT (string_startswith ("foo", "o") == false);
1778 TASSERT (string_match_identifier_tail ("x.FOO", "Foo") == true);
1779 TASSERT (string_match_identifier_tail ("x.FOO", "X-Foo") == true);
1780 TASSERT (string_match_identifier_tail ("xFOO", "Foo") == false);
1781 TASSERT (string_is_uuid ("c18888f8-f026-4f70-92dd-78d4b16e54d5") == true);
1782 TASSERT (string_cmp_uuid ("c18888f8-f026-4f70-92dd-78D4B16E54D5", "C18888F8-f026-4f70-92dd-78d4b16e54d5") == 0);
1783 TASSERT (string_cmp_uuid ("c18888f8-f026-4f70-92dd-78d4b16e54d4", "c18888f8-f026-4f70-92dd-78d4b16e54d5") < 0);
1784 TASSERT (string_cmp_uuid ("c18888f8-f026-4f70-92dd-78d4b16e54d5", "c18888f8-f026-4f70-92dd-78d4b16e54d4") > 0);
1785 TCMP (string_url_encode ("x + z"), ==, "x%20%2B%20z");
1786 TCMP (string_url_encode ("x + z", true), ==, "x+%2B+z");
1787 TCMP (string_url_decode ("x%20%2B%20z"), ==, "x + z");
1788 TCMP (string_url_decode ("x+%2B+z"), ==, "x+++z");
1789 TCMP (string_url_decode ("x+%2B+z", true), ==, "x + z");
1790 TASSERT (string_find_word ("mygzip", "gzip") == nullptr);
1791 TASSERT (string_find_word ("gzip2", "gzip") == nullptr);
1792 TASSERT (string_find_word ("mygzip,gzip-2", "gzip") != nullptr);
1793}
1794
1795} // Anon
#define EINVAL
T append(T... args)
T begin(T... args)
T c_str(T... args)
Class to push the POSIX/C locale_t (UTF-8) for the scope of its lifetime.
Definition formatter.hh:19
T data(T... args)
T empty(T... args)
T end(T... args)
T erase(T... args)
errno
T find(T... args)
iconv_close
iconv
iconv_open
#define assert_return(expr,...)
Return from the current function if expr is unmet and issue an assertion warning.
Definition internal.hh:29
#define MIN(a, b)
Yield minimum of a and b.
Definition internal.hh:55
#define return_unless(cond,...)
Return silently if cond does not evaluate to true with return value ...
Definition internal.hh:71
#define TEST_INTEGRITY(FUNC)
Register func as an integrity test.
Definition internal.hh:77
isalnum
isalpha
T isinf(T... args)
T isnan(T... args)
isxdigit
typedef char
T max(T... args)
T min(T... args)
The Anklang C++ API namespace.
Definition api.hh:9
std::string string_format(const char *format, const Args &...args) __attribute__((__format__(__printf__
Format a string similar to sprintf(3) with support for std::string and std::ostringstream convertible...
bool string_isupper(const String &str)
Check if all string characters are Unicode upper case characters.
Definition strings.cc:166
String string_to_identifier(const String &input)
Force lower case, alphanumerics + underscore and non-digit start.
Definition strings.cc:54
std::vector< double > string_to_double_vector(const String &string)
Parse a string into a list of doubles, expects ';' as delimiter.
Definition strings.cc:721
int string_cmp(const String &s1, const String &s2)
Like strcmp(3) for UTF-8 strings.
Definition strings.cc:259
String string_from_cquote(const String &input)
Parse a possibly quoted C string into regular string.
Definition strings.cc:1043
String string_from_errno(int errno_val)
Returns a String describing the passed in errno value, similar to strerror().
Definition strings.cc:802
uint64_t uint64
A 64-bit unsigned integer, use PRI*64 in format strings.
Definition cxxaux.hh:25
String string_casefold(const String &src)
Yield UTF-8 string useful for case insensitive comparisons.
Definition strings.cc:249
String string_join(const String &junctor, const StringS &strvec)
Definition strings.cc:452
bool string_is_uuid(const String &uuid_string)
Returns whether uuid_string contains a properly formatted UUID string.
Definition strings.cc:815
String string_totitle(const String &str)
Convert all string characters into Unicode title characters.
Definition strings.cc:176
String string_vprintf(const char *format, va_list vargs)
Formatted printing ala vprintf() into a String, using the POSIX/C locale.
Definition strings.cc:309
StringS string_split(const String &string, const String &splitter, size_t maxn)
Definition strings.cc:343
String string_hexdump(const void *addr, size_t length, size_t initial_offset)
Definition strings.cc:1184
bool string_is_canonified(const String &string, const String &valid_chars)
Check if string_canonify() would modify string.
Definition strings.cc:90
String string_from_long_double(long double value)
Convert a long double into a string, using the POSIX/C locale.
Definition strings.cc:710
String string_to_hex(const String &input)
Convert bytes in string input to hexadecimal numbers.
Definition strings.cc:1171
String string_from_double(double value)
Convert a double into a string, using the POSIX/C locale.
Definition strings.cc:699
String string_tolower(const String &str)
Convert all string characters into Unicode lower case characters.
Definition strings.cc:136
uint8_t uint8
An 8-bit unsigned integer.
Definition cxxaux.hh:22
bool string_match_identifier_tail(const String &ident, const String &tail)
Variant of string_match_identifier() that matches tail against ident at word boundary.
Definition strings.cc:919
String string_lstrip(const String &input)
Strip whitespaces from the left of a string.
Definition strings.cc:1106
bool string_option_check(const String &optionlist, const String &feature)
Check if an option is set/unset in an options list string.
Definition strings.cc:1412
int64_t int64
A 64-bit unsigned integer, use PRI*64 in format strings.
Definition cxxaux.hh:29
String string_capitalize(const String &str, size_t maxn, bool rest_tolower)
Capitalize words, so the first letter is upper case, the rest lower case.
Definition strings.cc:186
String string_vector_find_value(const StringS &svector, const String &prefix, const String &fallback)
Definition strings.cc:1252
String string_rstrip(const String &input)
Strip whitespaces from the right of a string.
Definition strings.cc:1116
String string_toupper(const String &str)
Convert all string characters into Unicode upper case characters.
Definition strings.cc:156
String string_normalize_nfd(const String &src)
Yield normalized decomposed UTF-8 string.
Definition strings.cc:219
int string_cmp_uuid(const String &uuid_string1, const String &uuid_string2)
Returns whether uuid_string1 compares smaller (-1), equal (0) or greater (+1) to uuid_string2.
Definition strings.cc:839
int string_casecmp(const String &s1, const String &s2)
Like strcasecmp(3) for UTF-8 strings.
Definition strings.cc:266
String string_from_int(int64 value)
Convert a 64bit signed integer into a string.
Definition strings.cc:604
String string_from_uint(uint64 value)
Convert a 64bit unsigned integer into a string.
Definition strings.cc:561
const char * string_find_word(const char *haystack, const char *word)
Find occurance of word in haystack.
Definition strings.cc:520
String string_from_double_vector(const std::vector< double > &dvec, const String &delim)
Construct a string out of all double values passed in dvec, separated by delim.
Definition strings.cc:759
void string_vector_erase_empty(StringS &svector)
Remove empty elements from a string vector.
Definition strings.cc:414
long double posix_locale_strtold(const char *nptr, char **endptr)
Parse a double from a string ala strtod(), trying locale specific characters and POSIX/C formatting.
Definition strings.cc:626
uint32_t unichar
A 32-bit unsigned integer used for Unicode characters.
Definition cxxaux.hh:30
String string_from_pretty_function_name(const char *cxx_pretty_function)
Definition strings.cc:934
double string_to_seconds(const String &string, double fallback)
Parse string into seconds.
Definition strings.cc:773
StringS cstrings_to_vector(const char *s,...)
Construct a StringS from a NULL terminated list of string arguments.
Definition strings.cc:1265
const String & string_set_ascii_alnum()
Returns a string containing all of 0-9, A-Z and a-z.
Definition strings.cc:118
long double string_to_long_double(const String &string)
Parse a long double from a string, trying locale specific characters and POSIX/C formatting.
Definition strings.cc:660
int64 string_to_int(const String &string, size_t *consumed, uint base)
Parse a string into a 64bit integer, optionally specifying the expected number base.
Definition strings.cc:578
String string_replace(const String &input, const String &marker, const String &replacement, size_t maxn)
Replace substring marker in input with replacement, at most maxn times.
Definition strings.cc:1144
StringS string_split_any(const String &string, const String &splitchars, size_t maxn)
Definition strings.cc:365
String string_normalize_nfkc(const String &src)
Formatting stripped normalized composed UTF-8 string.
Definition strings.cc:229
void string_vector_strip(StringS &svector)
Strip all elements of a string vector, see string_strip().
Definition strings.cc:442
String string_url_decode(const String &urlstr, const bool form_url_encoded)
Decode URL %-sequences in a string, decode '+' if form_url_encoded.
Definition strings.cc:965
String string_option_find(const String &optionlist, const String &feature, const String &fallback)
Retrieve the option value from an options list separated by ':' or ';' or fallback.
Definition strings.cc:1404
uint64 string_to_uint(const String &string, size_t *consumed, uint base)
Parse a string into a 64bit unsigned integer, optionally specifying the expected number base.
Definition strings.cc:540
double string_to_double(const String &string)
Parse a double from a string, trying locale specific characters and POSIX/C formatting.
Definition strings.cc:674
const String & string_set_a2z()
Returns a string containing all of a-z.
Definition strings.cc:102
bool string_has_int(const String &string)
Checks if a string contains a digit, optionally preceeded by whitespaces.
Definition strings.cc:568
String string_normalize_nfkd(const String &src)
Formatting stripped normalized decomposed UTF-8 string.
Definition strings.cc:239
String string_normalize_nfc(const String &src)
Yield normalized composed UTF-8 string.
Definition strings.cc:209
std::string String
Convenience alias for std::string.
Definition cxxaux.hh:35
String string_vector_find(const StringS &svector, const String &prefix, const String &fallback)
Definition strings.cc:1235
bool string_to_bool(const String &string, bool fallback)
Definition strings.cc:467
void string_vector_rstrip(StringS &svector)
Right-strip all elements of a string vector, see string_rstrip().
Definition strings.cc:434
String string_multiply(const String &s, uint64 count)
Reproduce a string s for count times.
Definition strings.cc:31
long double current_locale_strtold(const char *nptr, char **endptr)
Parse a double from a string ala strtod(), trying locale specific characters and POSIX/C formatting.
Definition strings.cc:638
String string_canonify(const String &string, const String &valid_chars, const String &substitute)
Definition strings.cc:68
String string_strip(const String &input)
Strip whitespaces from the left and right of a string.
Definition strings.cc:1126
const String & string_set_A2Z()
Returns a string containing all of A-Z.
Definition strings.cc:110
String string_from_bool(bool value)
Convert a boolean value into a string.
Definition strings.cc:533
uint32_t uint32
A 32-bit unsigned integer.
Definition cxxaux.hh:24
uint32_t uint
Provide 'uint' as convenience type.
Definition cxxaux.hh:18
String string_url_encode(const String &rawstr, const bool form_url_encoded)
Encode special characters to URL %-sequences, encode space as '+' if form_url_encoded.
Definition strings.cc:989
bool string_endswith(const String &string, const String &fragment)
Returns whether string ends with fragment.
Definition strings.cc:863
String string_substitute_char(const String &input, const char match, const char subst)
Replace all occouranes of match in input with subst.
Definition strings.cc:1159
bool string_islower(const String &str)
Check if all string characters are Unicode lower case characters.
Definition strings.cc:146
bool string_match_identifier(const String &ident1, const String &ident2)
Check equality of strings canonicalized to "[0-9a-z_]+".
Definition strings.cc:926
bool text_convert(const String &to_charset, String &output_string, const String &from_charset, const String &input_string, const String &fallback_charset, const String &output_mark)
Definition strings.cc:1573
String string_to_cquote(const String &str)
Returns a string as C string including double quotes.
Definition strings.cc:1036
String string_from_float(float value)
Convert a float into a string, using the POSIX/C locale.
Definition strings.cc:688
void string_vector_lstrip(StringS &svector)
Left-strip all elements of a string vector, see string_lstrip().
Definition strings.cc:426
void memset4(uint32 *mem, uint32 filler, uint length)
Fill a memory area with a 32-bit quantitiy.
Definition strings.cc:1221
String string_to_cescape(const String &str)
Definition strings.cc:1011
String string_locale_vprintf(const char *format, va_list vargs)
Formatted printing like string_vprintf using the current locale.
Definition strings.cc:316
bool string_startswith(const String &string, const String &fragment)
Returns whether string starts with fragment.
Definition strings.cc:846
T push_back(T... args)
T reserve(T... args)
T signbit(T... args)
T size(T... args)
T sort(T... args)
typedef uint64_t
strcasecmp
strchr
strerror_r
strlen
strncmp
strstr
strtold
strtol
strtoull
T substr(T... args)
typedef ssize_t
#define TASSERT(cond)
Unconditional test assertion, enters breakpoint if not fullfilled.
Definition testing.hh:24
#define TCMP(a, cmp, b)
Compare a and b according to operator cmp, verbose on failiure.
Definition testing.hh:23
tolower
toupper
va_copy
vsnprintf
wmemset