36 static bool isIdentifierStart (
const juce_wchar c)
noexcept
39 || c ==
'_' || c ==
'@';
42 static bool isIdentifierBody (
const juce_wchar c)
noexcept
45 || c ==
'_' || c ==
'@';
50 static const char*
const keywords2Char[] =
51 {
"do",
"if",
"or",
nullptr };
53 static const char*
const keywords3Char[] =
54 {
"and",
"asm",
"for",
"int",
"new",
"not",
"try",
"xor",
nullptr };
56 static const char*
const keywords4Char[] =
57 {
"auto",
"bool",
"case",
"char",
"else",
"enum",
"goto",
58 "long",
"this",
"true",
"void",
nullptr };
60 static const char*
const keywords5Char[] =
61 {
"bitor",
"break",
"catch",
"class",
"compl",
"const",
"false",
"final",
62 "float",
"or_eq",
"short",
"throw",
"union",
"using",
"while",
nullptr };
64 static const char*
const keywords6Char[] =
65 {
"and_eq",
"bitand",
"delete",
"double",
"export",
"extern",
"friend",
66 "import",
"inline",
"module",
"not_eq",
"public",
"return",
"signed",
67 "sizeof",
"static",
"struct",
"switch",
"typeid",
"xor_eq",
nullptr };
69 static const char*
const keywords7Char[] =
70 {
"__cdecl",
"_Pragma",
"alignas",
"alignof",
"concept",
"default",
71 "mutable",
"nullptr",
"private",
"typedef",
"uint8_t",
"virtual",
74 static const char*
const keywordsOther[] =
75 {
"@class",
"@dynamic",
"@end",
"@implementation",
"@interface",
"@public",
76 "@private",
"@protected",
"@property",
"@synthesize",
"__fastcall",
"__stdcall",
77 "atomic_cancel",
"atomic_commit",
"atomic_noexcept",
"char16_t",
"char32_t",
78 "co_await",
"co_return",
"co_yield",
"const_cast",
"constexpr",
"continue",
79 "decltype",
"dynamic_cast",
"explicit",
"namespace",
"noexcept",
"operator",
"override",
80 "protected",
"register",
"reinterpret_cast",
"requires",
"static_assert",
81 "static_cast",
"synchronized",
"template",
"thread_local",
"typename",
"unsigned",
82 "volatile",
nullptr };
88 case 2: k = keywords2Char;
break;
89 case 3: k = keywords3Char;
break;
90 case 4: k = keywords4Char;
break;
91 case 5: k = keywords5Char;
break;
92 case 6: k = keywords6Char;
break;
93 case 7: k = keywords7Char;
break;
96 if (tokenLength < 2 || tokenLength > 16)
103 for (
int i = 0; k[i] !=
nullptr; ++i)
110 template <
typename Iterator>
111 static int parseIdentifier (Iterator& source)
noexcept
114 String::CharPointerType::CharType possibleIdentifier[100] = {};
117 while (isIdentifierBody (source.peekNextChar()))
119 auto c = source.nextChar();
121 if (tokenLength < 20)
127 if (tokenLength > 1 && tokenLength <= 16)
132 return CPlusPlusCodeTokeniser::tokenType_keyword;
135 return CPlusPlusCodeTokeniser::tokenType_identifier;
138 template <
typename Iterator>
139 static bool skipNumberSuffix (Iterator& source)
141 auto c = source.peekNextChar();
143 if (c ==
'l' || c ==
'L' || c ==
'u' || c ==
'U')
152 static bool isHexDigit (
const juce_wchar c)
noexcept
154 return (c >=
'0' && c <=
'9')
155 || (c >=
'a' && c <=
'f')
156 || (c >=
'A' && c <=
'F');
159 template <
typename Iterator>
160 static bool parseHexLiteral (Iterator& source)
noexcept
162 if (source.peekNextChar() ==
'-')
165 if (source.nextChar() !=
'0')
168 auto c = source.nextChar();
170 if (c !=
'x' && c !=
'X')
175 while (isHexDigit (source.peekNextChar()))
184 return skipNumberSuffix (source);
187 static bool isOctalDigit (
const juce_wchar c)
noexcept
189 return c >=
'0' && c <=
'7';
192 template <
typename Iterator>
193 static bool parseOctalLiteral (Iterator& source)
noexcept
195 if (source.peekNextChar() ==
'-')
198 if (source.nextChar() !=
'0')
201 if (! isOctalDigit (source.nextChar()))
204 while (isOctalDigit (source.peekNextChar()))
207 return skipNumberSuffix (source);
210 static bool isDecimalDigit (
const juce_wchar c)
noexcept
212 return c >=
'0' && c <=
'9';
215 template <
typename Iterator>
216 static bool parseDecimalLiteral (Iterator& source)
noexcept
218 if (source.peekNextChar() ==
'-')
222 while (isDecimalDigit (source.peekNextChar()))
231 return skipNumberSuffix (source);
234 template <
typename Iterator>
235 static bool parseFloatLiteral (Iterator& source)
noexcept
237 if (source.peekNextChar() ==
'-')
242 while (isDecimalDigit (source.peekNextChar()))
248 const bool hasPoint = (source.peekNextChar() ==
'.');
254 while (isDecimalDigit (source.peekNextChar()))
264 auto c = source.peekNextChar();
265 bool hasExponent = (c ==
'e' || c ==
'E');
270 c = source.peekNextChar();
272 if (c ==
'+' || c ==
'-')
275 int numExpDigits = 0;
277 while (isDecimalDigit (source.peekNextChar()))
283 if (numExpDigits == 0)
287 c = source.peekNextChar();
289 if (c ==
'f' || c ==
'F')
291 else if (! (hasExponent || hasPoint))
297 template <
typename Iterator>
298 static int parseNumber (Iterator& source)
300 const Iterator original (source);
302 if (parseFloatLiteral (source))
return CPlusPlusCodeTokeniser::tokenType_float;
305 if (parseHexLiteral (source))
return CPlusPlusCodeTokeniser::tokenType_integer;
308 if (parseOctalLiteral (source))
return CPlusPlusCodeTokeniser::tokenType_integer;
311 if (parseDecimalLiteral (source))
return CPlusPlusCodeTokeniser::tokenType_integer;
314 return CPlusPlusCodeTokeniser::tokenType_error;
317 template <
typename Iterator>
318 static void skipQuotedString (Iterator& source)
noexcept
320 auto quote = source.nextChar();
324 auto c = source.nextChar();
326 if (c == quote || c == 0)
334 template <
typename Iterator>
335 static void skipComment (Iterator& source)
noexcept
337 bool lastWasStar =
false;
341 auto c = source.nextChar();
343 if (c == 0 || (c ==
'/' && lastWasStar))
346 lastWasStar = (c ==
'*');
350 template <
typename Iterator>
351 static void skipPreprocessorLine (Iterator& source)
noexcept
353 bool lastWasBackslash =
false;
357 auto c = source.peekNextChar();
361 skipQuotedString (source);
367 Iterator next (source);
369 auto c2 = next.peekNextChar();
371 if (c2 ==
'/' || c2 ==
'*')
378 if (c ==
'\n' || c ==
'\r')
380 source.skipToEndOfLine();
382 if (lastWasBackslash)
383 skipPreprocessorLine (source);
388 lastWasBackslash = (c ==
'\\');
393 template <
typename Iterator>
394 static void skipIfNextCharMatches (Iterator& source,
const juce_wchar c)
noexcept
396 if (source.peekNextChar() == c)
400 template <
typename Iterator>
401 static void skipIfNextCharMatches (Iterator& source,
const juce_wchar c1,
const juce_wchar c2)
noexcept
403 auto c = source.peekNextChar();
405 if (c == c1 || c == c2)
409 template <
typename Iterator>
410 static int readNextToken (Iterator& source)
412 source.skipWhitespace();
413 auto firstChar = source.peekNextChar();
420 case '0':
case '1':
case '2':
case '3':
case '4':
421 case '5':
case '6':
case '7':
case '8':
case '9':
424 auto result = parseNumber (source);
426 if (result == CPlusPlusCodeTokeniser::tokenType_error)
430 if (firstChar ==
'.')
431 return CPlusPlusCodeTokeniser::tokenType_punctuation;
441 return CPlusPlusCodeTokeniser::tokenType_punctuation;
447 return CPlusPlusCodeTokeniser::tokenType_bracket;
451 skipQuotedString (source);
452 return CPlusPlusCodeTokeniser::tokenType_string;
456 skipIfNextCharMatches (source,
'+',
'=');
457 return CPlusPlusCodeTokeniser::tokenType_operator;
462 auto result = parseNumber (source);
464 if (result == CPlusPlusCodeTokeniser::tokenType_error)
466 skipIfNextCharMatches (source,
'-',
'=');
467 return CPlusPlusCodeTokeniser::tokenType_operator;
476 skipIfNextCharMatches (source,
'=');
477 return CPlusPlusCodeTokeniser::tokenType_operator;
482 auto nextChar = source.peekNextChar();
486 source.skipToEndOfLine();
487 return CPlusPlusCodeTokeniser::tokenType_comment;
493 skipComment (source);
494 return CPlusPlusCodeTokeniser::tokenType_comment;
500 return CPlusPlusCodeTokeniser::tokenType_operator;
506 return CPlusPlusCodeTokeniser::tokenType_operator;
509 case '|':
case '&':
case '^':
511 skipIfNextCharMatches (source, firstChar);
512 skipIfNextCharMatches (source,
'=');
513 return CPlusPlusCodeTokeniser::tokenType_operator;
516 skipPreprocessorLine (source);
517 return CPlusPlusCodeTokeniser::tokenType_preprocessor;
520 if (isIdentifierStart (firstChar))
521 return parseIdentifier (source);
527 return CPlusPlusCodeTokeniser::tokenType_error;
539 juce_wchar peekNextChar()
noexcept {
return *t; }
540 void skip()
noexcept {
if (! isEOF()) { ++t; ++numChars; } }
541 void skipWhitespace()
noexcept {
while (t.
isWhitespace()) skip(); }
542 void skipToEndOfLine()
noexcept {
while (*t !=
'\r' && *t !=
'\n' && *t != 0) skip(); }
543 bool isEOF()
const noexcept {
return t.
isEmpty(); }
561 const int maxCharsOnLine,
const bool breakAtNewLines,
562 const bool replaceSingleQuotes,
const bool allowStringBreaks)
565 bool lastWasHexEscapeCode =
false;
566 bool trigraphDetected =
false;
568 for (
int i = 0; i < numBytesToRead || numBytesToRead < 0; ++i)
570 auto c = (
unsigned char) utf8[i];
571 bool startNewLine =
false;
576 case '\t': out <<
"\\t"; trigraphDetected =
false; lastWasHexEscapeCode =
false; charsOnLine += 2;
break;
577 case '\r': out <<
"\\r"; trigraphDetected =
false; lastWasHexEscapeCode =
false; charsOnLine += 2;
break;
578 case '\n': out <<
"\\n"; trigraphDetected =
false; lastWasHexEscapeCode =
false; charsOnLine += 2; startNewLine = breakAtNewLines;
break;
579 case '\\': out <<
"\\\\"; trigraphDetected =
false; lastWasHexEscapeCode =
false; charsOnLine += 2;
break;
580 case '\"': out <<
"\\\""; trigraphDetected =
false; lastWasHexEscapeCode =
false; charsOnLine += 2;
break;
583 if (trigraphDetected)
587 trigraphDetected =
false;
592 trigraphDetected =
true;
595 lastWasHexEscapeCode =
false;
600 if (numBytesToRead < 0)
604 lastWasHexEscapeCode =
true;
605 trigraphDetected =
false;
610 if (replaceSingleQuotes)
613 lastWasHexEscapeCode =
false;
614 trigraphDetected =
false;
622 if (c >= 32 && c < 127 && ! (lastWasHexEscapeCode
626 lastWasHexEscapeCode =
false;
627 trigraphDetected =
false;
630 else if (allowStringBreaks && lastWasHexEscapeCode && c >= 32 && c < 127)
632 out <<
"\"\"" << (
char) c;
633 lastWasHexEscapeCode =
false;
634 trigraphDetected =
false;
640 lastWasHexEscapeCode =
true;
641 trigraphDetected =
false;
648 if ((startNewLine || (maxCharsOnLine > 0 && charsOnLine >= maxCharsOnLine))
649 && (numBytesToRead < 0 || i < numBytesToRead - 1))
652 out <<
"\"" <<
newLine <<
"\"";
653 lastWasHexEscapeCode =
false;