aboutsummaryrefslogtreecommitdiff
path: root/3rdparty/gumbo-parser/src/error.h
diff options
context:
space:
mode:
Diffstat (limited to '3rdparty/gumbo-parser/src/error.h')
-rw-r--r--3rdparty/gumbo-parser/src/error.h225
1 files changed, 225 insertions, 0 deletions
diff --git a/3rdparty/gumbo-parser/src/error.h b/3rdparty/gumbo-parser/src/error.h
new file mode 100644
index 0000000..afc998b
--- /dev/null
+++ b/3rdparty/gumbo-parser/src/error.h
@@ -0,0 +1,225 @@
+// Copyright 2010 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Author: jdtang@google.com (Jonathan Tang)
+//
+// Error types, enums, and handling functions.
+
+#ifndef GUMBO_ERROR_H_
+#define GUMBO_ERROR_H_
+#ifdef _MSC_VER
+#define _CRT_SECURE_NO_WARNINGS
+#endif
+#include <stdint.h>
+
+#include "gumbo.h"
+#include "insertion_mode.h"
+#include "string_buffer.h"
+#include "token_type.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct GumboInternalParser;
+
+typedef enum {
+ GUMBO_ERR_UTF8_INVALID,
+ GUMBO_ERR_UTF8_TRUNCATED,
+ GUMBO_ERR_UTF8_NULL,
+ GUMBO_ERR_NUMERIC_CHAR_REF_NO_DIGITS,
+ GUMBO_ERR_NUMERIC_CHAR_REF_WITHOUT_SEMICOLON,
+ GUMBO_ERR_NUMERIC_CHAR_REF_INVALID,
+ GUMBO_ERR_NAMED_CHAR_REF_WITHOUT_SEMICOLON,
+ GUMBO_ERR_NAMED_CHAR_REF_INVALID,
+ GUMBO_ERR_TAG_STARTS_WITH_QUESTION,
+ GUMBO_ERR_TAG_EOF,
+ GUMBO_ERR_TAG_INVALID,
+ GUMBO_ERR_CLOSE_TAG_EMPTY,
+ GUMBO_ERR_CLOSE_TAG_EOF,
+ GUMBO_ERR_CLOSE_TAG_INVALID,
+ GUMBO_ERR_SCRIPT_EOF,
+ GUMBO_ERR_ATTR_NAME_EOF,
+ GUMBO_ERR_ATTR_NAME_INVALID,
+ GUMBO_ERR_ATTR_DOUBLE_QUOTE_EOF,
+ GUMBO_ERR_ATTR_SINGLE_QUOTE_EOF,
+ GUMBO_ERR_ATTR_UNQUOTED_EOF,
+ GUMBO_ERR_ATTR_UNQUOTED_RIGHT_BRACKET,
+ GUMBO_ERR_ATTR_UNQUOTED_EQUALS,
+ GUMBO_ERR_ATTR_AFTER_EOF,
+ GUMBO_ERR_ATTR_AFTER_INVALID,
+ GUMBO_ERR_DUPLICATE_ATTR,
+ GUMBO_ERR_SOLIDUS_EOF,
+ GUMBO_ERR_SOLIDUS_INVALID,
+ GUMBO_ERR_DASHES_OR_DOCTYPE,
+ GUMBO_ERR_COMMENT_EOF,
+ GUMBO_ERR_COMMENT_INVALID,
+ GUMBO_ERR_COMMENT_BANG_AFTER_DOUBLE_DASH,
+ GUMBO_ERR_COMMENT_DASH_AFTER_DOUBLE_DASH,
+ GUMBO_ERR_COMMENT_SPACE_AFTER_DOUBLE_DASH,
+ GUMBO_ERR_COMMENT_END_BANG_EOF,
+ GUMBO_ERR_DOCTYPE_EOF,
+ GUMBO_ERR_DOCTYPE_INVALID,
+ GUMBO_ERR_DOCTYPE_SPACE,
+ GUMBO_ERR_DOCTYPE_RIGHT_BRACKET,
+ GUMBO_ERR_DOCTYPE_SPACE_OR_RIGHT_BRACKET,
+ GUMBO_ERR_DOCTYPE_END,
+ GUMBO_ERR_PARSER,
+ GUMBO_ERR_UNACKNOWLEDGED_SELF_CLOSING_TAG,
+} GumboErrorType;
+
+// Additional data for duplicated attributes.
+typedef struct GumboInternalDuplicateAttrError {
+ // The name of the attribute. Owned by this struct.
+ const char* name;
+
+ // The (0-based) index within the attributes vector of the original
+ // occurrence.
+ unsigned int original_index;
+
+ // The (0-based) index where the new occurrence would be.
+ unsigned int new_index;
+} GumboDuplicateAttrError;
+
+// A simplified representation of the tokenizer state, designed to be more
+// useful to clients of this library than the internal representation. This
+// condenses the actual states used in the tokenizer state machine into a few
+// values that will be familiar to users of HTML.
+typedef enum {
+ GUMBO_ERR_TOKENIZER_DATA,
+ GUMBO_ERR_TOKENIZER_CHAR_REF,
+ GUMBO_ERR_TOKENIZER_RCDATA,
+ GUMBO_ERR_TOKENIZER_RAWTEXT,
+ GUMBO_ERR_TOKENIZER_PLAINTEXT,
+ GUMBO_ERR_TOKENIZER_SCRIPT,
+ GUMBO_ERR_TOKENIZER_TAG,
+ GUMBO_ERR_TOKENIZER_SELF_CLOSING_TAG,
+ GUMBO_ERR_TOKENIZER_ATTR_NAME,
+ GUMBO_ERR_TOKENIZER_ATTR_VALUE,
+ GUMBO_ERR_TOKENIZER_MARKUP_DECLARATION,
+ GUMBO_ERR_TOKENIZER_COMMENT,
+ GUMBO_ERR_TOKENIZER_DOCTYPE,
+ GUMBO_ERR_TOKENIZER_CDATA,
+} GumboTokenizerErrorState;
+
+// Additional data for tokenizer errors.
+// This records the current state and codepoint encountered - this is usually
+// enough to reconstruct what went wrong and provide a friendly error message.
+typedef struct GumboInternalTokenizerError {
+ // The bad codepoint encountered.
+ int codepoint;
+
+ // The state that the tokenizer was in at the time.
+ GumboTokenizerErrorState state;
+} GumboTokenizerError;
+
+// Additional data for parse errors.
+typedef struct GumboInternalParserError {
+ // The type of input token that resulted in this error.
+ GumboTokenType input_type;
+
+ // The HTML tag of the input token. TAG_UNKNOWN if this was not a tag token.
+ GumboTag input_tag;
+
+ // The insertion mode that the parser was in at the time.
+ GumboInsertionMode parser_state;
+
+ // The tag stack at the point of the error. Note that this is an GumboVector
+ // of GumboTag's *stored by value* - cast the void* to an GumboTag directly to
+ // get at the tag.
+ GumboVector /* GumboTag */ tag_stack;
+} GumboParserError;
+
+// The overall error struct representing an error in decoding/tokenizing/parsing
+// the HTML. This contains an enumerated type flag, a source position, and then
+// a union of fields containing data specific to the error.
+typedef struct GumboInternalError {
+ // The type of error.
+ GumboErrorType type;
+
+ // The position within the source file where the error occurred.
+ GumboSourcePosition position;
+
+ // A pointer to the byte within the original source file text where the error
+ // occurred (note that this is not the same as position.offset, as that gives
+ // character-based instead of byte-based offsets).
+ const char* original_text;
+
+ // Type-specific error information.
+ union {
+ // The code point we encountered, for:
+ // * GUMBO_ERR_UTF8_INVALID
+ // * GUMBO_ERR_UTF8_TRUNCATED
+ // * GUMBO_ERR_NUMERIC_CHAR_REF_WITHOUT_SEMICOLON
+ // * GUMBO_ERR_NUMERIC_CHAR_REF_INVALID
+ uint64_t codepoint;
+
+ // Tokenizer errors.
+ GumboTokenizerError tokenizer;
+
+ // Short textual data, for:
+ // * GUMBO_ERR_NAMED_CHAR_REF_WITHOUT_SEMICOLON
+ // * GUMBO_ERR_NAMED_CHAR_REF_INVALID
+ GumboStringPiece text;
+
+ // Duplicate attribute data, for GUMBO_ERR_DUPLICATE_ATTR.
+ GumboDuplicateAttrError duplicate_attr;
+
+ // Parser state, for GUMBO_ERR_PARSER and
+ // GUMBO_ERR_UNACKNOWLEDGE_SELF_CLOSING_TAG.
+ struct GumboInternalParserError parser;
+ } v;
+} GumboError;
+
+// Adds a new error to the parser's error list, and returns a pointer to it so
+// that clients can fill out the rest of its fields. May return NULL if we're
+// already over the max_errors field specified in GumboOptions.
+GumboError* gumbo_add_error(struct GumboInternalParser* parser);
+
+// Initializes the errors vector in the parser.
+void gumbo_init_errors(struct GumboInternalParser* errors);
+
+// Frees all the errors in the 'errors_' field of the parser.
+void gumbo_destroy_errors(struct GumboInternalParser* errors);
+
+// Frees the memory used for a single GumboError.
+void gumbo_error_destroy(struct GumboInternalParser* parser, GumboError* error);
+
+// Prints an error to a string. This fills an empty GumboStringBuffer with a
+// freshly-allocated buffer containing the error message text. The caller is
+// responsible for deleting the buffer. (Note that the buffer is allocated with
+// the allocator specified in the GumboParser config and hence should be freed
+// by gumbo_parser_deallocate().)
+void gumbo_error_to_string(struct GumboInternalParser* parser,
+ const GumboError* error, GumboStringBuffer* output);
+
+// Prints a caret diagnostic to a string. This fills an empty GumboStringBuffer
+// with a freshly-allocated buffer containing the error message text. The
+// caller is responsible for deleting the buffer. (Note that the buffer is
+// allocated with the allocator specified in the GumboParser config and hence
+// should be freed by gumbo_parser_deallocate().)
+void gumbo_caret_diagnostic_to_string(struct GumboInternalParser* parser,
+ const GumboError* error, const char* source_text,
+ GumboStringBuffer* output);
+
+// Like gumbo_caret_diagnostic_to_string, but prints the text to stdout instead
+// of writing to a string.
+void gumbo_print_caret_diagnostic(struct GumboInternalParser* parser,
+ const GumboError* error, const char* source_text);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // GUMBO_ERROR_H_