JUCE-7.0.12-0-g4f43011b96 JUCE-7.0.12-0-g4f43011b96
JUCE — C++ application framework with suport for VST, VST3, LV2 audio plug-ins

« « « Anklang Documentation
Loading...
Searching...
No Matches
juce_CPlusPlusCodeTokeniserFunctions.h
Go to the documentation of this file.
1 /*
2 ==============================================================================
3
4 This file is part of the JUCE library.
5 Copyright (c) 2022 - Raw Material Software Limited
6
7 JUCE is an open source library subject to commercial or open-source
8 licensing.
9
10 By using JUCE, you agree to the terms of both the JUCE 7 End-User License
11 Agreement and JUCE Privacy Policy.
12
13 End User License Agreement: www.juce.com/juce-7-licence
14 Privacy Policy: www.juce.com/juce-privacy-policy
15
16 Or: You may also use this code under the terms of the GPL v3 (see
17 www.gnu.org/licenses).
18
19 JUCE IS PROVIDED "AS IS" WITHOUT ANY WARRANTY, AND ALL WARRANTIES, WHETHER
20 EXPRESSED OR IMPLIED, INCLUDING MERCHANTABILITY AND FITNESS FOR PURPOSE, ARE
21 DISCLAIMED.
22
23 ==============================================================================
24*/
25
26namespace juce
27{
28
29//==============================================================================
35{
36 static bool isIdentifierStart (const juce_wchar c) noexcept
37 {
39 || c == '_' || c == '@';
40 }
41
42 static bool isIdentifierBody (const juce_wchar c) noexcept
43 {
45 || c == '_' || c == '@';
46 }
47
48 static bool isReservedKeyword (String::CharPointerType token, const int tokenLength) noexcept
49 {
50 static const char* const keywords2Char[] =
51 { "do", "if", "or", nullptr };
52
53 static const char* const keywords3Char[] =
54 { "and", "asm", "for", "int", "new", "not", "try", "xor", nullptr };
55
56 static const char* const keywords4Char[] =
57 { "auto", "bool", "case", "char", "else", "enum", "goto",
58 "long", "this", "true", "void", nullptr };
59
60 static const char* const keywords5Char[] =
61 { "bitor", "break", "catch", "class", "compl", "const", "false", "final",
62 "float", "or_eq", "short", "throw", "union", "using", "while", nullptr };
63
64 static const char* const keywords6Char[] =
65 { "and_eq", "bitand", "delete", "double", "export", "extern", "friend",
66 "import", "inline", "module", "not_eq", "public", "return", "signed",
67 "sizeof", "static", "struct", "switch", "typeid", "xor_eq", nullptr };
68
69 static const char* const keywords7Char[] =
70 { "__cdecl", "_Pragma", "alignas", "alignof", "concept", "default",
71 "mutable", "nullptr", "private", "typedef", "uint8_t", "virtual",
72 "wchar_t", nullptr };
73
74 static const char* const keywordsOther[] =
75 { "@class", "@dynamic", "@end", "@implementation", "@interface", "@public",
76 "@private", "@protected", "@property", "@synthesize", "__fastcall", "__stdcall",
77 "atomic_cancel", "atomic_commit", "atomic_noexcept", "char16_t", "char32_t",
78 "co_await", "co_return", "co_yield", "const_cast", "constexpr", "continue",
79 "decltype", "dynamic_cast", "explicit", "namespace", "noexcept", "operator", "override",
80 "protected", "register", "reinterpret_cast", "requires", "static_assert",
81 "static_cast", "synchronized", "template", "thread_local", "typename", "unsigned",
82 "volatile", nullptr };
83
84 const char* const* k;
85
86 switch (tokenLength)
87 {
88 case 2: k = keywords2Char; break;
89 case 3: k = keywords3Char; break;
90 case 4: k = keywords4Char; break;
91 case 5: k = keywords5Char; break;
92 case 6: k = keywords6Char; break;
93 case 7: k = keywords7Char; break;
94
95 default:
97 return false;
98
99 k = keywordsOther;
100 break;
101 }
102
103 for (int i = 0; k[i] != nullptr; ++i)
104 if (token.compare (CharPointer_ASCII (k[i])) == 0)
105 return true;
106
107 return false;
108 }
109
110 template <typename Iterator>
111 static int parseIdentifier (Iterator& source) noexcept
112 {
113 int tokenLength = 0;
114 String::CharPointerType::CharType possibleIdentifier[100] = {};
116
117 while (isIdentifierBody (source.peekNextChar()))
118 {
119 auto c = source.nextChar();
120
121 if (tokenLength < 20)
122 possible.write (c);
123
124 ++tokenLength;
125 }
126
127 if (tokenLength > 1 && tokenLength <= 16)
128 {
129 possible.writeNull();
130
131 if (isReservedKeyword (String::CharPointerType (possibleIdentifier), tokenLength))
132 return CPlusPlusCodeTokeniser::tokenType_keyword;
133 }
134
135 return CPlusPlusCodeTokeniser::tokenType_identifier;
136 }
137
138 template <typename Iterator>
139 static bool skipNumberSuffix (Iterator& source)
140 {
141 auto c = source.peekNextChar();
142
143 if (c == 'l' || c == 'L' || c == 'u' || c == 'U')
144 source.skip();
145
146 if (CharacterFunctions::isLetterOrDigit (source.peekNextChar()))
147 return false;
148
149 return true;
150 }
151
152 static bool isHexDigit (const juce_wchar c) noexcept
153 {
154 return (c >= '0' && c <= '9')
155 || (c >= 'a' && c <= 'f')
156 || (c >= 'A' && c <= 'F');
157 }
158
159 template <typename Iterator>
160 static bool parseHexLiteral (Iterator& source) noexcept
161 {
162 if (source.peekNextChar() == '-')
163 source.skip();
164
165 if (source.nextChar() != '0')
166 return false;
167
168 auto c = source.nextChar();
169
170 if (c != 'x' && c != 'X')
171 return false;
172
173 int numDigits = 0;
174
175 while (isHexDigit (source.peekNextChar()))
176 {
177 ++numDigits;
178 source.skip();
179 }
180
181 if (numDigits == 0)
182 return false;
183
184 return skipNumberSuffix (source);
185 }
186
187 static bool isOctalDigit (const juce_wchar c) noexcept
188 {
189 return c >= '0' && c <= '7';
190 }
191
192 template <typename Iterator>
193 static bool parseOctalLiteral (Iterator& source) noexcept
194 {
195 if (source.peekNextChar() == '-')
196 source.skip();
197
198 if (source.nextChar() != '0')
199 return false;
200
201 if (! isOctalDigit (source.nextChar()))
202 return false;
203
204 while (isOctalDigit (source.peekNextChar()))
205 source.skip();
206
207 return skipNumberSuffix (source);
208 }
209
210 static bool isDecimalDigit (const juce_wchar c) noexcept
211 {
212 return c >= '0' && c <= '9';
213 }
214
215 template <typename Iterator>
216 static bool parseDecimalLiteral (Iterator& source) noexcept
217 {
218 if (source.peekNextChar() == '-')
219 source.skip();
220
221 int numChars = 0;
222 while (isDecimalDigit (source.peekNextChar()))
223 {
224 ++numChars;
225 source.skip();
226 }
227
228 if (numChars == 0)
229 return false;
230
231 return skipNumberSuffix (source);
232 }
233
234 template <typename Iterator>
235 static bool parseFloatLiteral (Iterator& source) noexcept
236 {
237 if (source.peekNextChar() == '-')
238 source.skip();
239
240 int numDigits = 0;
241
242 while (isDecimalDigit (source.peekNextChar()))
243 {
244 source.skip();
245 ++numDigits;
246 }
247
248 const bool hasPoint = (source.peekNextChar() == '.');
249
250 if (hasPoint)
251 {
252 source.skip();
253
254 while (isDecimalDigit (source.peekNextChar()))
255 {
256 source.skip();
257 ++numDigits;
258 }
259 }
260
261 if (numDigits == 0)
262 return false;
263
264 auto c = source.peekNextChar();
265 bool hasExponent = (c == 'e' || c == 'E');
266
267 if (hasExponent)
268 {
269 source.skip();
270 c = source.peekNextChar();
271
272 if (c == '+' || c == '-')
273 source.skip();
274
275 int numExpDigits = 0;
276
277 while (isDecimalDigit (source.peekNextChar()))
278 {
279 source.skip();
280 ++numExpDigits;
281 }
282
283 if (numExpDigits == 0)
284 return false;
285 }
286
287 c = source.peekNextChar();
288
289 if (c == 'f' || c == 'F')
290 source.skip();
291 else if (! (hasExponent || hasPoint))
292 return false;
293
294 return true;
295 }
296
297 template <typename Iterator>
298 static int parseNumber (Iterator& source)
299 {
300 const Iterator original (source);
301
302 if (parseFloatLiteral (source)) return CPlusPlusCodeTokeniser::tokenType_float;
303 source = original;
304
305 if (parseHexLiteral (source)) return CPlusPlusCodeTokeniser::tokenType_integer;
306 source = original;
307
308 if (parseOctalLiteral (source)) return CPlusPlusCodeTokeniser::tokenType_integer;
309 source = original;
310
311 if (parseDecimalLiteral (source)) return CPlusPlusCodeTokeniser::tokenType_integer;
312 source = original;
313
314 return CPlusPlusCodeTokeniser::tokenType_error;
315 }
316
317 template <typename Iterator>
318 static void skipQuotedString (Iterator& source) noexcept
319 {
320 auto quote = source.nextChar();
321
322 for (;;)
323 {
324 auto c = source.nextChar();
325
326 if (c == quote || c == 0)
327 break;
328
329 if (c == '\\')
330 source.skip();
331 }
332 }
333
334 template <typename Iterator>
335 static void skipComment (Iterator& source) noexcept
336 {
337 bool lastWasStar = false;
338
339 for (;;)
340 {
341 auto c = source.nextChar();
342
343 if (c == 0 || (c == '/' && lastWasStar))
344 break;
345
346 lastWasStar = (c == '*');
347 }
348 }
349
350 template <typename Iterator>
351 static void skipPreprocessorLine (Iterator& source) noexcept
352 {
353 bool lastWasBackslash = false;
354
355 for (;;)
356 {
357 auto c = source.peekNextChar();
358
359 if (c == '"')
360 {
361 skipQuotedString (source);
362 continue;
363 }
364
365 if (c == '/')
366 {
367 Iterator next (source);
368 next.skip();
369 auto c2 = next.peekNextChar();
370
371 if (c2 == '/' || c2 == '*')
372 return;
373 }
374
375 if (c == 0)
376 break;
377
378 if (c == '\n' || c == '\r')
379 {
380 source.skipToEndOfLine();
381
383 skipPreprocessorLine (source);
384
385 break;
386 }
387
388 lastWasBackslash = (c == '\\');
389 source.skip();
390 }
391 }
392
393 template <typename Iterator>
394 static void skipIfNextCharMatches (Iterator& source, const juce_wchar c) noexcept
395 {
396 if (source.peekNextChar() == c)
397 source.skip();
398 }
399
400 template <typename Iterator>
401 static void skipIfNextCharMatches (Iterator& source, const juce_wchar c1, const juce_wchar c2) noexcept
402 {
403 auto c = source.peekNextChar();
404
405 if (c == c1 || c == c2)
406 source.skip();
407 }
408
409 template <typename Iterator>
410 static int readNextToken (Iterator& source)
411 {
412 source.skipWhitespace();
413 auto firstChar = source.peekNextChar();
414
415 switch (firstChar)
416 {
417 case 0:
418 break;
419
420 case '0': case '1': case '2': case '3': case '4':
421 case '5': case '6': case '7': case '8': case '9':
422 case '.':
423 {
424 auto result = parseNumber (source);
425
426 if (result == CPlusPlusCodeTokeniser::tokenType_error)
427 {
428 source.skip();
429
430 if (firstChar == '.')
431 return CPlusPlusCodeTokeniser::tokenType_punctuation;
432 }
433
434 return result;
435 }
436
437 case ',':
438 case ';':
439 case ':':
440 source.skip();
441 return CPlusPlusCodeTokeniser::tokenType_punctuation;
442
443 case '(': case ')':
444 case '{': case '}':
445 case '[': case ']':
446 source.skip();
447 return CPlusPlusCodeTokeniser::tokenType_bracket;
448
449 case '"':
450 case '\'':
451 skipQuotedString (source);
452 return CPlusPlusCodeTokeniser::tokenType_string;
453
454 case '+':
455 source.skip();
456 skipIfNextCharMatches (source, '+', '=');
457 return CPlusPlusCodeTokeniser::tokenType_operator;
458
459 case '-':
460 {
461 source.skip();
462 auto result = parseNumber (source);
463
464 if (result == CPlusPlusCodeTokeniser::tokenType_error)
465 {
466 skipIfNextCharMatches (source, '-', '=');
467 return CPlusPlusCodeTokeniser::tokenType_operator;
468 }
469
470 return result;
471 }
472
473 case '*': case '%':
474 case '=': case '!':
475 source.skip();
476 skipIfNextCharMatches (source, '=');
477 return CPlusPlusCodeTokeniser::tokenType_operator;
478
479 case '/':
480 {
481 source.skip();
482 auto nextChar = source.peekNextChar();
483
484 if (nextChar == '/')
485 {
486 source.skipToEndOfLine();
487 return CPlusPlusCodeTokeniser::tokenType_comment;
488 }
489
490 if (nextChar == '*')
491 {
492 source.skip();
493 skipComment (source);
494 return CPlusPlusCodeTokeniser::tokenType_comment;
495 }
496
497 if (nextChar == '=')
498 source.skip();
499
500 return CPlusPlusCodeTokeniser::tokenType_operator;
501 }
502
503 case '?':
504 case '~':
505 source.skip();
506 return CPlusPlusCodeTokeniser::tokenType_operator;
507
508 case '<': case '>':
509 case '|': case '&': case '^':
510 source.skip();
511 skipIfNextCharMatches (source, firstChar);
512 skipIfNextCharMatches (source, '=');
513 return CPlusPlusCodeTokeniser::tokenType_operator;
514
515 case '#':
516 skipPreprocessorLine (source);
517 return CPlusPlusCodeTokeniser::tokenType_preprocessor;
518
519 default:
520 if (isIdentifierStart (firstChar))
521 return parseIdentifier (source);
522
523 source.skip();
524 break;
525 }
526
527 return CPlusPlusCodeTokeniser::tokenType_error;
528 }
529
534 {
535 StringIterator (const String& s) noexcept : t (s.getCharPointer()) {}
536 StringIterator (String::CharPointerType s) noexcept : t (s) {}
537
538 juce_wchar nextChar() noexcept { if (isEOF()) return 0; ++numChars; return t.getAndAdvance(); }
539 juce_wchar peekNextChar()noexcept { return *t; }
540 void skip() noexcept { if (! isEOF()) { ++t; ++numChars; } }
541 void skipWhitespace() noexcept { while (t.isWhitespace()) skip(); }
542 void skipToEndOfLine() noexcept { while (*t != '\r' && *t != '\n' && *t != 0) skip(); }
543 bool isEOF() const noexcept { return t.isEmpty(); }
544
546 int numChars = 0;
547 };
548
549 //==============================================================================
560 static void writeEscapeChars (OutputStream& out, const char* utf8, const int numBytesToRead,
561 const int maxCharsOnLine, const bool breakAtNewLines,
562 const bool replaceSingleQuotes, const bool allowStringBreaks)
563 {
564 int charsOnLine = 0;
565 bool lastWasHexEscapeCode = false;
566 bool trigraphDetected = false;
567
568 for (int i = 0; i < numBytesToRead || numBytesToRead < 0; ++i)
569 {
570 auto c = (unsigned char) utf8[i];
571 bool startNewLine = false;
572
573 switch (c)
574 {
575
576 case '\t': out << "\\t"; trigraphDetected = false; lastWasHexEscapeCode = false; charsOnLine += 2; break;
577 case '\r': out << "\\r"; trigraphDetected = false; lastWasHexEscapeCode = false; charsOnLine += 2; break;
578 case '\n': out << "\\n"; trigraphDetected = false; lastWasHexEscapeCode = false; charsOnLine += 2; startNewLine = breakAtNewLines; break;
579 case '\\': out << "\\\\"; trigraphDetected = false; lastWasHexEscapeCode = false; charsOnLine += 2; break;
580 case '\"': out << "\\\""; trigraphDetected = false; lastWasHexEscapeCode = false; charsOnLine += 2; break;
581
582 case '?':
584 {
585 out << "\\?";
586 charsOnLine++;
587 trigraphDetected = false;
588 }
589 else
590 {
591 out << "?";
592 trigraphDetected = true;
593 }
594
595 lastWasHexEscapeCode = false;
596 charsOnLine++;
597 break;
598
599 case 0:
600 if (numBytesToRead < 0)
601 return;
602
603 out << "\\0";
605 trigraphDetected = false;
606 charsOnLine += 2;
607 break;
608
609 case '\'':
611 {
612 out << "\\\'";
613 lastWasHexEscapeCode = false;
614 trigraphDetected = false;
615 charsOnLine += 2;
616 break;
617 }
618 // deliberate fall-through...
620
621 default:
622 if (c >= 32 && c < 127 && ! (lastWasHexEscapeCode // (have to avoid following a hex escape sequence with a valid hex digit)
624 {
625 out << (char) c;
626 lastWasHexEscapeCode = false;
627 trigraphDetected = false;
628 ++charsOnLine;
629 }
630 else if (allowStringBreaks && lastWasHexEscapeCode && c >= 32 && c < 127)
631 {
632 out << "\"\"" << (char) c;
633 lastWasHexEscapeCode = false;
634 trigraphDetected = false;
635 charsOnLine += 3;
636 }
637 else
638 {
639 out << (c < 16 ? "\\x0" : "\\x") << String::toHexString ((int) c);
641 trigraphDetected = false;
642 charsOnLine += 4;
643 }
644
645 break;
646 }
647
649 && (numBytesToRead < 0 || i < numBytesToRead - 1))
650 {
651 charsOnLine = 0;
652 out << "\"" << newLine << "\"";
653 lastWasHexEscapeCode = false;
654 }
655 }
656 }
657
666 static String addEscapeChars (const String& s)
667 {
669 writeEscapeChars (mo, s.toRawUTF8(), -1, -1, false, true, true);
670 return mo.toString();
671 }
672};
673
674} // namespace juce
Wraps a pointer to a null-terminated ASCII character string, and provides various methods to operate ...
Wraps a pointer to a null-terminated UTF-8 character string, and provides various methods to operate ...
juce_wchar getAndAdvance() noexcept
Returns the character that this pointer is currently pointing to, and then advances the pointer to po...
bool isEmpty() const noexcept
Returns true if this pointer is pointing to a null character.
bool isWhitespace() const noexcept
Returns true if the first character of this string is whitespace.
static bool isLetter(char character) noexcept
Checks whether a character is alphabetic.
static int getHexDigitValue(juce_wchar digit) noexcept
Returns 0 to 16 for '0' to 'F", or -1 for characters that aren't a legal hex digit.
static bool isLetterOrDigit(char character) noexcept
Checks whether a character is alphabetic or numeric.
Writes data to an internal memory buffer, which grows as required.
The base class for streams that write data to some kind of destination.
The JUCE String class!
Definition juce_String.h:53
CharPointerType getCharPointer() const noexcept
Returns the character pointer currently being used to store this string.
const char * toRawUTF8() const
Returns a pointer to a UTF-8 version of this string.
static String toHexString(IntegerType number)
Returns a string representing this numeric value in hexadecimal.
#define JUCE_FALLTHROUGH
Used to silence Wimplicit-fallthrough on Clang and GCC where available as there are a few places in t...
typedef char
JUCE Namespace.
NewLine newLine
A predefined object representing a new-line, which can be written to a string or stream.
wchar_t juce_wchar
A platform-independent 32-bit unicode character type.
Type unalignedPointerCast(void *ptr) noexcept
Casts a pointer to another type via void*, which suppresses the cast-align warning which sometimes ar...
Definition juce_Memory.h:88
A class that can be passed to the CppTokeniserFunctions functions in order to parse a String.
Class containing some basic functions for simple tokenising of C++ code.
static String addEscapeChars(const String &s)
Takes a string and returns a version of it where standard C++ escape sequences have been used to repl...
static void writeEscapeChars(OutputStream &out, const char *utf8, const int numBytesToRead, const int maxCharsOnLine, const bool breakAtNewLines, const bool replaceSingleQuotes, const bool allowStringBreaks)
Takes a UTF8 string and writes it to a stream using standard C++ escape sequences for any non-ascii b...