C++ gumbo
Date: 2023-01-31Last modified: 2023-02-01
Table of contents
Introduction
void printElementStructure(GumboNode* node, string p) {
fmt::print("\n{} has {} children:", p, node->v.element.children.length);
for (unsigned int i = 0; i < node->v.element.children.length; ++i) {
auto elem = static_cast<GumboNode*>(node->v.element.children.data[i]);
if (elem->type == GUMBO_NODE_ELEMENT) {
auto tag = gumbo_normalized_tagname(elem->v.element.tag);
fmt::print("\n {} -> TAG {}", p, tag);
auto p2 = fmt::format("{}/{}", p, tag);
printElementStructure(elem, p2);
} else if (elem->type == GUMBO_NODE_TEXT) {
fmt::print("\n {} -> TEXT {}", p, elem->v.text.text);
}
}
};
// GumboOutput* output = gumbo_parse("<h1>Hello, <span>World!</span></h1>");
string markup{"<h1>Hello, <span>World!</span></h1>"};
GumboOutput* output = gumbo_parse_with_options(
&kGumboDefaultOptions, markup.data(), markup.length());
assert(output->root->type == GUMBO_NODE_ELEMENT);
// Node types:
// GUMBO_NODE_DOCUMENT,
// GUMBO_NODE_ELEMENT,
// GUMBO_NODE_TEXT,
// GUMBO_NODE_CDATA,
// GUMBO_NODE_COMMENT,
// GUMBO_NODE_WHITESPACE,
// GUMBO_NODE_TEMPLATE
fmt::print("\nRoot element tag: {}",
gumbo_normalized_tagname(output->root->v.element.tag)); // html
auto children = output->root->v.element.children;
fmt::print("\nRoot element children: {}", children.length); // 2: head, body
printElementStructure(output->root, "html");
// gumbo.h: extern const GumboOptions kGumboDefaultOptions;
gumbo_destroy_output(&kGumboDefaultOptions, output);
Possible output
Root element tag: html
Root element children: 2
html has 2 children:
html -> TAG head
html/head has 0 children:
html -> TAG body
html/body has 1 children:
html/body -> TAG h1
html/body/h1 has 2 children:
html/body/h1 -> TEXT Hello,
html/body/h1 -> TAG span
html/body/h1/span has 1 children:
html/body/h1/span -> TEXT World!