Merge pull request #2927 from ehuss/fix-unbalanced-html-in-header

Handle unclosed HTML tags inside a markdown element
This commit is contained in:
Eric Huss
2025-11-06 15:38:21 +00:00
committed by GitHub
2 changed files with 62 additions and 19 deletions

View File

@@ -306,25 +306,7 @@ where
trace!("event={event:?}"); trace!("event={event:?}");
match event { match event {
Event::Start(tag) => self.start_tag(tag), Event::Start(tag) => self.start_tag(tag),
Event::End(tag) => { Event::End(tag) => self.end_tag(tag),
// TODO: This should validate that the event stack is
// properly synchronized with the tag stack.
self.pop();
match tag {
TagEnd::TableHead => {
self.table_state = TableState::Body;
self.push(Node::Element(Element::new("tbody")));
}
TagEnd::TableCell => {
self.table_cell_index += 1;
}
TagEnd::Table => {
// Pop tbody or thead
self.pop();
}
_ => {}
}
}
Event::Text(text) => { Event::Text(text) => {
self.append_text(text.into_tendril()); self.append_text(text.into_tendril());
} }
@@ -600,6 +582,46 @@ where
self.push(Node::Element(element)); self.push(Node::Element(element));
} }
fn end_tag(&mut self, tag: TagEnd) {
// TODO: This should validate that the event stack is properly
// synchronized with the tag stack. That, would likely require keeping
// a parallel "expected end tag" with the tag stack, since mapping a
// pulldown-cmark event tag to an HTML tag isn't always clear.
//
// Check for unclosed HTML tags when exiting a markdown event.
while let Some(node_id) = self.tag_stack.last() {
let node = self.tree.get(*node_id).unwrap().value();
let Node::Element(el) = node else {
break;
};
if !el.was_raw {
break;
}
warn!(
"unclosed HTML tag `<{}>` found in `{}` while exiting {tag:?}\n\
HTML tags must be closed before exiting a markdown element.",
el.name.local,
self.options.path.display(),
);
self.pop();
}
self.pop();
match tag {
TagEnd::TableHead => {
self.table_state = TableState::Body;
self.push(Node::Element(Element::new("tbody")));
}
TagEnd::TableCell => {
self.table_cell_index += 1;
}
TagEnd::Table => {
// Pop tbody or thead
self.pop();
}
_ => {}
}
}
/// Given some HTML, parse it into [`Node`] elements and append them to /// Given some HTML, parse it into [`Node`] elements and append them to
/// the current node. /// the current node.
fn append_html(&mut self, html: &str) { fn append_html(&mut self, html: &str) {

View File

@@ -283,3 +283,24 @@ Check that the HTML tags are properly balanced.
}) })
.check_main_file("book/chapter_1.html", str!["<div>x<span>foo</span></div>"]); .check_main_file("book/chapter_1.html", str!["<div>x<span>foo</span></div>"]);
} }
// Test for bug with unbalanced HTML handling in the heading.
#[test]
fn heading_with_unbalanced_html() {
BookTest::init(|_| {})
.change_file("src/chapter_1.md", "### Option<T>")
.run("build", |cmd| {
cmd.expect_stderr(str![[r#"
INFO Book building has started
INFO Running the html backend
WARN unclosed HTML tag `<t>` found in `chapter_1.md` while exiting Heading(H3)
HTML tags must be closed before exiting a markdown element.
INFO HTML book written to `[ROOT]/book`
"#]]);
})
.check_main_file(
"book/chapter_1.html",
str![[r##"<h3 id="option"><a class="header" href="#option">Option<t></t></a></h3>"##]],
);
}