71namespace XmlIdentifierChars
73 static bool isIdentifierCharSlow (
juce_wchar c)
noexcept
76 || c ==
'_' || c ==
'-' || c ==
':' || c ==
'.';
79 static bool isIdentifierChar (
juce_wchar c)
noexcept
81 static const uint32 legalChars[] = { 0, 0x7ff6000, 0x87fffffe, 0x7fffffe, 0 };
84 : isIdentifierCharSlow (c);
103 while (isIdentifierChar (*p))
112 if (originalText.
isEmpty() && inputSource !=
nullptr)
121 #if JUCE_STRING_UTF_TYPE == 8
122 if (data.getDataSize() > 2)
125 auto* text =
static_cast<const char*
> (data.getData());
130 originalText = data.toString();
142 originalText = data.toString();
164void XmlDocument::setLastError (
const String& desc,
const bool carryOn)
170String XmlDocument::getFileContents (
const String& filename)
const
172 if (inputSource !=
nullptr)
177 return in->readEntireStreamAsString();
183juce_wchar XmlDocument::readNextChar() noexcept
200 errorOccurred =
false;
202 needToLoadDTD =
true;
206 lastError =
"not enough input";
208 else if (! parseHeader())
210 lastError =
"malformed header";
212 else if (! parseDTD())
214 lastError =
"malformed DTD";
228bool XmlDocument::parseHeader()
230 skipNextWhiteSpace();
240 auto encoding = String (input,
headerEnd)
241 .fromFirstOccurrenceOf (
"encoding",
false,
true)
242 .fromFirstOccurrenceOf (
"=",
false,
false)
243 .fromFirstOccurrenceOf (
"\"",
false,
false)
244 .upToFirstOccurrenceOf (
"\"",
false,
false)
254 jassert (encoding.isEmpty() || encoding.startsWithIgnoreCase (
"utf-"));
258 skipNextWhiteSpace();
264bool XmlDocument::parseDTD()
271 for (
int n = 1; n > 0;)
273 auto c = readNextChar();
284 dtdText = String (
dtdStart, input - 1).trim();
290void XmlDocument::skipNextWhiteSpace()
324 auto closeBracket = input.
indexOf (CharPointer_ASCII (
"?>"));
326 if (closeBracket < 0)
332 input += closeBracket + 2;
341void XmlDocument::readQuotedString (String& result)
343 auto quote = readNextChar();
347 auto c = readNextChar();
364 auto character = *input;
366 if (character ==
quote)
368 result.appendCharPointer (start, input);
373 if (character ==
'&')
375 result.appendCharPointer (start, input);
381 setLastError (
"unmatched quotes",
false);
394 XmlElement* node =
nullptr;
395 skipNextWhiteSpace();
403 auto endOfToken = XmlIdentifierChars::findEndOfToken (input);
408 skipNextWhiteSpace();
409 endOfToken = XmlIdentifierChars::findEndOfToken (input);
413 setLastError (
"tag name missing",
false);
420 LinkedListPointer<XmlElement::XmlAttributeNode>::Appender
attributeAppender (node->attributes);
425 skipNextWhiteSpace();
429 if (c ==
'/' && input[1] ==
'>')
441 readChildElements (*node);
447 if (XmlIdentifierChars::isIdentifierChar (c))
449 auto attNameEnd = XmlIdentifierChars::findEndOfToken (input);
455 skipNextWhiteSpace();
457 if (readNextChar() ==
'=')
459 skipNextWhiteSpace();
460 auto nextChar = *input;
462 if (nextChar ==
'"' || nextChar ==
'\'')
465 readQuotedString (
newAtt->value);
472 setLastError (
"expected '=' after attribute '"
481 setLastError (
"illegal character found in " + node->getTagName() +
": '" + c +
"'",
false);
491void XmlDocument::readChildElements (XmlElement& parent)
493 LinkedListPointer<XmlElement>::Appender
childAppender (parent.firstChildElement);
498 skipNextWhiteSpace();
502 setLastError (
"unmatched tags",
false);
532 setLastError (
"unterminated CDATA section",
false);
537 if (
c0 ==
']' && input[1] ==
']' && input[2] ==
'>')
550 if (
auto* n = readNextElement (
true))
568 if (input[1] ==
'!' && input[2] ==
'-' && input[3] ==
'-')
575 setLastError (
"unterminated comment",
false);
589 setLastError (
"unmatched tags",
false);
604 input =
entity.getCharPointer();
607 while (
auto* n = readNextElement (
true))
623 auto nextChar = *input;
625 if (nextChar ==
'\r')
629 if (input[1] ==
'\n')
633 if (nextChar ==
'<' || nextChar ==
'&')
638 setLastError (
"unmatched tags",
false);
655void XmlDocument::readEntity (String& result)
685 else if (*input ==
'#')
690 if (*input ==
'x' || *input ==
'X')
695 while (input[0] !=
';')
701 setLastError (
"illegal escape sequence",
true);
711 else if (input[0] >=
'0' && input[0] <=
'9')
721 setLastError (
"unexpected end of input",
true);
730 setLastError (
"illegal escape sequence",
true);
742 setLastError (
"illegal escape sequence",
true);
767String XmlDocument::expandEntity (
const String&
ent)
785 setLastError (
"illegal escape sequence",
false);
789 return expandExternalEntity (
ent);
792String XmlDocument::expandExternalEntity (
const String&
entity)
801 if (tokenisedDTD[tokenisedDTD.
size() - 2].equalsIgnoreCase (
"system")
802 && tokenisedDTD[tokenisedDTD.
size() - 1].isQuotedString())
804 auto fn = tokenisedDTD[tokenisedDTD.
size() - 1];
806 tokenisedDTD.
clear();
807 tokenisedDTD.
addTokens (getFileContents (fn),
true);
811 tokenisedDTD.
clear();
818 if (closeBracket > openBracket)
820 closeBracket),
true);
824 for (
int i = tokenisedDTD.
size(); --i >= 0;)
826 if (tokenisedDTD[i].startsWithChar (
'%')
827 && tokenisedDTD[i].endsWithChar (
';'))
829 auto parsed = getParameterEntity (tokenisedDTD[i].substring (1, tokenisedDTD[i].length() - 1));
841 needToLoadDTD =
false;
844 for (
int i = 0; i < tokenisedDTD.
size(); ++i)
846 if (tokenisedDTD[i] ==
entity)
848 if (tokenisedDTD[i - 1].equalsIgnoreCase (
"<!entity"))
850 auto ent = tokenisedDTD [i + 1].trimCharactersAtEnd (
">").
trim().unquoted();
861 setLastError (
"entity without terminating semi-colon",
false);
879 setLastError (
"unknown entity",
true);
883String XmlDocument::getParameterEntity (
const String&
entity)
885 for (
int i = 0; i < tokenisedDTD.
size(); ++i)
887 if (tokenisedDTD[i] ==
entity
888 && tokenisedDTD [i - 1] ==
"%"
889 && tokenisedDTD [i - 2].equalsIgnoreCase (
"<!entity"))
891 auto ent = tokenisedDTD [i + 1].trimCharactersAtEnd (
">");
893 if (
ent.equalsIgnoreCase (
"system"))
894 return getFileContents (tokenisedDTD [i + 2].trimCharactersAtEnd (
">"));
896 return ent.trim().unquoted();
static bool isByteOrderMarkBigEndian(const void *possibleByteOrder) noexcept
Returns true if the first pair of bytes in this pointer are the UTF16 byte-order mark (big endian).
static bool isByteOrderMarkLittleEndian(const void *possibleByteOrder) noexcept
Returns true if the first pair of bytes in this pointer are the UTF16 byte-order mark (little endian)...
Wraps a pointer to a null-terminated UTF-8 character string, and provides various methods to operate ...
int compareIgnoreCaseUpTo(const CharPointer other, const int maxChars) const noexcept
Compares this string with another one, up to a specified number of characters.
void incrementToEndOfWhitespace() noexcept
Move this pointer to the first non-whitespace character in the string.
juce_wchar getAndAdvance() noexcept
Returns the character that this pointer is currently pointing to, and then advances the pointer to po...
bool isEmpty() const noexcept
Returns true if this pointer is pointing to a null character.
int indexOf(const CharPointer stringToFind) const noexcept
Returns the character index of a substring, or -1 if it isn't found.
static bool isByteOrderMark(const void *possibleByteOrder) noexcept
Returns true if the first three bytes in this pointer are the UTF8 byte-order mark (BOM).
static bool isWhitespace(char character) noexcept
Checks whether a character is whitespace.
static int getHexDigitValue(juce_wchar digit) noexcept
Returns 0 to 16 for '0' to 'F", or -1 for characters that aren't a legal hex digit.
static bool isLetterOrDigit(char character) noexcept
Checks whether a character is alphabetic or numeric.
static CharPointerType1 find(CharPointerType1 textToSearch, const CharPointerType2 substringToLookFor) noexcept
Returns a pointer to the first occurrence of a substring in a string.
static int compareUpTo(CharPointerType1 s1, CharPointerType2 s2, int maxChars) noexcept
Compares two null-terminated character strings, up to a given number of characters.
Represents a local file or directory.
Writes data to an internal memory buffer, which grows as required.
void insert(int index, String stringToAdd)
Inserts a string into the array.
void clear()
Removes all elements from the array.
int size() const noexcept
Returns the number of strings in the array.
void trim()
Deletes any whitespace characters from the starts and ends of all the strings.
void remove(int index)
Removes a string from the array.
int addTokens(StringRef stringToTokenise, bool preserveQuotedStrings)
Breaks up a string into tokens and adds them to this array.
A simple class for holding temporary references to a string literal or String.
CharPointerType getCharPointer() const noexcept
Returns the character pointer currently being used to store this string.
int indexOfChar(juce_wchar characterToLookFor) const noexcept
Searches for a character inside this string.
bool isEmpty() const noexcept
Returns true if the string contains no characters.
void clear() noexcept
Resets this string to be empty.
int lastIndexOfChar(juce_wchar character) const noexcept
Searches for a character inside this string (working backwards from the end of the string).
String trimCharactersAtEnd(StringRef charactersToTrim) const
Returns a copy of this string, having removed a specified set of characters from its end.
static String charToString(juce_wchar character)
Creates a string from a single character.
String substring(int startIndex, int endIndex) const
Returns a subsection of the string.
CharPointer_UTF8 CharPointerType
This is the character encoding type used internally to store the string.
bool isNotEmpty() const noexcept
Returns true if the string contains at least one character.
Parses a text-based XML document and creates an XmlElement object from it.
const String & getLastParseError() const noexcept
Returns the parsing error that occurred the last time getDocumentElement was called.
std::unique_ptr< XmlElement > getDocumentElementIfTagMatches(StringRef requiredTag)
Does an inexpensive check to see whether the outer element has the given tag name,...
std::unique_ptr< XmlElement > getDocumentElement(bool onlyReadOuterDocumentElement=false)
Creates an XmlElement object to represent the main document node.
XmlDocument(const String &documentText)
Creates an XmlDocument from the xml text.
static std::unique_ptr< XmlElement > parse(const File &file)
A handy static method that parses a file.
void setInputSource(InputSource *newSource) noexcept
Sets an input source object to use for parsing documents that reference external entities.
~XmlDocument()
Destructor.
void setEmptyTextElementsIgnored(bool shouldBeIgnored) noexcept
Sets a flag to change the treatment of empty text elements.
static XmlElement * createTextElement(const String &text)
Creates a text element that can be added to a parent element.
wchar_t juce_wchar
A platform-independent 32-bit unicode character type.
std::unique_ptr< XmlElement > parseXMLIfTagMatches(const String &textToParse, StringRef requiredTag)
Does an inexpensive check to see whether the top-level element has the given tag name,...
std::unique_ptr< XmlElement > parseXML(const String &textToParse)
Attempts to parse some XML text, returning a new XmlElement if it was valid.
Type unalignedPointerCast(void *ptr) noexcept
Casts a pointer to another type via void*, which suppresses the cast-align warning which sometimes ar...
unsigned int uint32
A platform-independent 32-bit unsigned integer type.
constexpr int numElementsInArray(Type(&)[N]) noexcept
Handy function for getting the number of elements in a simple const C array.