Move markdown support to mdbook-markdown

This moves all the code responsible for markdown processing to the mdbook-markdown crate.
2025-12-27 10:16:09 -05:00 · 2025-07-21 15:46:36 -07:00
parent 3278f84373
commit 8f3b6b4776
10 changed files with 556 additions and 521 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1271,6 +1271,7 @@ dependencies = [
 "ignore",
 "log",
 "mdbook-core",
+ "mdbook-markdown",
 "mdbook-preprocessor",
 "mdbook-renderer",
 "mdbook-summary",
@@ -1311,6 +1312,15 @@ dependencies = [
 "toml",
 ]

+[[package]]
+name = "mdbook-markdown"
+version = "0.5.0-alpha.1"
+dependencies = [
+ "log",
+ "pulldown-cmark 0.10.3",
+ "regex",
+]
+
 [[package]]
 name = "mdbook-preprocessor"
 version = "0.5.0-alpha.1"
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -24,6 +24,7 @@ rust-version = "1.85.0" # Keep in sync with installation.md and .github/workflow
 anyhow = "1.0.98"
 log = "0.4.27"
 mdbook-core = { path = "crates/mdbook-core" }
+mdbook-markdown = { path = "crates/mdbook-markdown" }
 mdbook-preprocessor = { path = "crates/mdbook-preprocessor" }
 mdbook-renderer = { path = "crates/mdbook-renderer" }
 mdbook-summary = { path = "crates/mdbook-summary" }
@@ -63,6 +64,7 @@ handlebars = "6.0"
 hex = "0.4.3"
 log.workspace = true
 mdbook-core.workspace = true
+mdbook-markdown.workspace = true
 mdbook-preprocessor.workspace = true
 mdbook-renderer.workspace = true
 mdbook-summary.workspace = true
--- a/crates/mdbook-core/src/utils/mod.rs
+++ b/crates/mdbook-core/src/utils/mod.rs
@@ -2,12 +2,9 @@

 use anyhow::Error;
 use log::error;
-use pulldown_cmark::{CodeBlockKind, CowStr, Event, Options, Parser, Tag, TagEnd, html};
 use regex::Regex;
 use std::borrow::Cow;
 use std::collections::HashMap;
-use std::fmt::Write;
-use std::path::Path;
 use std::sync::LazyLock;

 pub mod fs;
@@ -83,338 +80,6 @@ pub fn unique_id_from_content(content: &str, id_counter: &mut HashMap<String, us
    unique_id
 }

-/// Fix links to the correct location.
-///
-/// This adjusts links, such as turning `.md` extensions to `.html`.
-///
-/// `path` is the path to the page being rendered relative to the root of the
-/// book. This is used for the `print.html` page so that links on the print
-/// page go to the original location. Normal page rendering sets `path` to
-/// None. Ideally, print page links would link to anchors on the print page,
-/// but that is very difficult.
-fn adjust_links<'a>(event: Event<'a>, path: Option<&Path>) -> Event<'a> {
-    static SCHEME_LINK: LazyLock<Regex> =
-        LazyLock::new(|| Regex::new(r"^[a-z][a-z0-9+.-]*:").unwrap());
-    static MD_LINK: LazyLock<Regex> =
-        LazyLock::new(|| Regex::new(r"(?P<link>.*)\.md(?P<anchor>#.*)?").unwrap());
-
-    fn fix<'a>(dest: CowStr<'a>, path: Option<&Path>) -> CowStr<'a> {
-        if dest.starts_with('#') {
-            // Fragment-only link.
-            if let Some(path) = path {
-                let mut base = path.display().to_string();
-                if base.ends_with(".md") {
-                    base.replace_range(base.len() - 3.., ".html");
-                }
-                return format!("{base}{dest}").into();
-            } else {
-                return dest;
-            }
-        }
-        // Don't modify links with schemes like `https`.
-        if !SCHEME_LINK.is_match(&dest) {
-            // This is a relative link, adjust it as necessary.
-            let mut fixed_link = String::new();
-            if let Some(path) = path {
-                let base = path
-                    .parent()
-                    .expect("path can't be empty")
-                    .to_str()
-                    .expect("utf-8 paths only");
-                if !base.is_empty() {
-                    write!(fixed_link, "{base}/").unwrap();
-                }
-            }
-
-            if let Some(caps) = MD_LINK.captures(&dest) {
-                fixed_link.push_str(&caps["link"]);
-                fixed_link.push_str(".html");
-                if let Some(anchor) = caps.name("anchor") {
-                    fixed_link.push_str(anchor.as_str());
-                }
-            } else {
-                fixed_link.push_str(&dest);
-            };
-            return CowStr::from(fixed_link);
-        }
-        dest
-    }
-
-    fn fix_html<'a>(html: CowStr<'a>, path: Option<&Path>) -> CowStr<'a> {
-        // This is a terrible hack, but should be reasonably reliable. Nobody
-        // should ever parse a tag with a regex. However, there isn't anything
-        // in Rust that I know of that is suitable for handling partial html
-        // fragments like those generated by pulldown_cmark.
-        //
-        // There are dozens of HTML tags/attributes that contain paths, so
-        // feel free to add more tags if desired; these are the only ones I
-        // care about right now.
-        static HTML_LINK: LazyLock<Regex> =
-            LazyLock::new(|| Regex::new(r#"(<(?:a|img) [^>]*?(?:src|href)=")([^"]+?)""#).unwrap());
-
-        HTML_LINK
-            .replace_all(&html, |caps: &regex::Captures<'_>| {
-                let fixed = fix(caps[2].into(), path);
-                format!("{}{}\"", &caps[1], fixed)
-            })
-            .into_owned()
-            .into()
-    }
-
-    match event {
-        Event::Start(Tag::Link {
-            link_type,
-            dest_url,
-            title,
-            id,
-        }) => Event::Start(Tag::Link {
-            link_type,
-            dest_url: fix(dest_url, path),
-            title,
-            id,
-        }),
-        Event::Start(Tag::Image {
-            link_type,
-            dest_url,
-            title,
-            id,
-        }) => Event::Start(Tag::Image {
-            link_type,
-            dest_url: fix(dest_url, path),
-            title,
-            id,
-        }),
-        Event::Html(html) => Event::Html(fix_html(html, path)),
-        Event::InlineHtml(html) => Event::InlineHtml(fix_html(html, path)),
-        _ => event,
-    }
-}
-
-/// Wrapper around the pulldown-cmark parser for rendering markdown to HTML.
-pub fn render_markdown(text: &str, smart_punctuation: bool) -> String {
-    render_markdown_with_path(text, smart_punctuation, None)
-}
-
-/// Creates a new pulldown-cmark parser of the given text.
-pub fn new_cmark_parser(text: &str, smart_punctuation: bool) -> Parser<'_> {
-    let mut opts = Options::empty();
-    opts.insert(Options::ENABLE_TABLES);
-    opts.insert(Options::ENABLE_FOOTNOTES);
-    opts.insert(Options::ENABLE_STRIKETHROUGH);
-    opts.insert(Options::ENABLE_TASKLISTS);
-    opts.insert(Options::ENABLE_HEADING_ATTRIBUTES);
-    if smart_punctuation {
-        opts.insert(Options::ENABLE_SMART_PUNCTUATION);
-    }
-    Parser::new_ext(text, opts)
-}
-
-/// Renders markdown to HTML.
-///
-/// `path` should only be set if this is being generated for the consolidated
-/// print page. It should point to the page being rendered relative to the
-/// root of the book.
-pub fn render_markdown_with_path(
-    text: &str,
-    smart_punctuation: bool,
-    path: Option<&Path>,
-) -> String {
-    let mut body = String::with_capacity(text.len() * 3 / 2);
-
-    // Based on
-    // https://github.com/pulldown-cmark/pulldown-cmark/blob/master/pulldown-cmark/examples/footnote-rewrite.rs
-
-    // This handling of footnotes is a two-pass process. This is done to
-    // support linkbacks, little arrows that allow you to jump back to the
-    // footnote reference. The first pass collects the footnote definitions.
-    // The second pass modifies those definitions to include the linkbacks,
-    // and inserts the definitions back into the `events` list.
-
-    // This is a map of name -> (number, count)
-    // `name` is the name of the footnote.
-    // `number` is the footnote number displayed in the output.
-    // `count` is the number of references to this footnote (used for multiple
-    // linkbacks, and checking for unused footnotes).
-    let mut footnote_numbers = HashMap::new();
-    // This is a map of name -> Vec<Event>
-    // `name` is the name of the footnote.
-    // The events list is the list of events needed to build the footnote definition.
-    let mut footnote_defs = HashMap::new();
-
-    // The following are used when currently processing a footnote definition.
-    //
-    // This is the name of the footnote (escaped).
-    let mut in_footnote_name = String::new();
-    // This is the list of events to build the footnote definition.
-    let mut in_footnote = Vec::new();
-
-    let events = new_cmark_parser(text, smart_punctuation)
-        .map(clean_codeblock_headers)
-        .map(|event| adjust_links(event, path))
-        .flat_map(|event| {
-            let (a, b) = wrap_tables(event);
-            a.into_iter().chain(b)
-        })
-        // Footnote rewriting must go last to ensure inner definition contents
-        // are processed (since they get pulled out of the initial stream).
-        .filter_map(|event| {
-            match event {
-                Event::Start(Tag::FootnoteDefinition(name)) => {
-                    if !in_footnote.is_empty() {
-                        log::warn!("internal bug: nested footnote not expected in {path:?}");
-                    }
-                    in_footnote_name = special_escape(&name);
-                    None
-                }
-                Event::End(TagEnd::FootnoteDefinition) => {
-                    let def_events = std::mem::take(&mut in_footnote);
-                    let name = std::mem::take(&mut in_footnote_name);
-
-                    if footnote_defs.contains_key(&name) {
-                        log::warn!(
-                            "footnote `{name}` in {} defined multiple times - \
-                             not updating to new definition",
-                            path.map_or_else(|| Cow::from("<unknown>"), |p| p.to_string_lossy())
-                        );
-                    } else {
-                        footnote_defs.insert(name, def_events);
-                    }
-                    None
-                }
-                Event::FootnoteReference(name) => {
-                    let name = special_escape(&name);
-                    let len = footnote_numbers.len() + 1;
-                    let (n, count) = footnote_numbers.entry(name.clone()).or_insert((len, 0));
-                    *count += 1;
-                    let html = Event::Html(
-                        format!(
-                            "<sup class=\"footnote-reference\" id=\"fr-{name}-{count}\">\
-                                <a href=\"#footnote-{name}\">{n}</a>\
-                             </sup>"
-                        )
-                        .into(),
-                    );
-                    if in_footnote_name.is_empty() {
-                        Some(html)
-                    } else {
-                        // While inside a footnote, we need to accumulate.
-                        in_footnote.push(html);
-                        None
-                    }
-                }
-                // While inside a footnote, accumulate all events into a local.
-                _ if !in_footnote_name.is_empty() => {
-                    in_footnote.push(event);
-                    None
-                }
-                _ => Some(event),
-            }
-        });
-
-    html::push_html(&mut body, events);
-
-    if !footnote_defs.is_empty() {
-        add_footnote_defs(
-            &mut body,
-            path,
-            footnote_defs.into_iter().collect(),
-            &footnote_numbers,
-        );
-    }
-
-    body
-}
-
-/// Adds all footnote definitions into `body`.
-fn add_footnote_defs(
-    body: &mut String,
-    path: Option<&Path>,
-    mut defs: Vec<(String, Vec<Event<'_>>)>,
-    numbers: &HashMap<String, (usize, u32)>,
-) {
-    // Remove unused.
-    defs.retain(|(name, _)| {
-        if !numbers.contains_key(name) {
-            log::warn!(
-                "footnote `{name}` in `{}` is defined but not referenced",
-                path.map_or_else(|| Cow::from("<unknown>"), |p| p.to_string_lossy())
-            );
-            false
-        } else {
-            true
-        }
-    });
-
-    defs.sort_by_cached_key(|(name, _)| numbers[name].0);
-
-    body.push_str(
-        "<hr>\n\
-         <ol class=\"footnote-definition\">",
-    );
-
-    // Insert the backrefs to the definition, and put the definitions in the output.
-    for (name, mut fn_events) in defs {
-        let count = numbers[&name].1;
-        fn_events.insert(
-            0,
-            Event::Html(format!("<li id=\"footnote-{name}\">").into()),
-        );
-        // Generate the linkbacks.
-        for usage in 1..=count {
-            let nth = if usage == 1 {
-                String::new()
-            } else {
-                usage.to_string()
-            };
-            let backlink =
-                Event::Html(format!(" <a href=\"#fr-{name}-{usage}\">↩{nth}</a>").into());
-            if matches!(fn_events.last(), Some(Event::End(TagEnd::Paragraph))) {
-                // Put the linkback at the end of the last paragraph instead
-                // of on a line by itself.
-                fn_events.insert(fn_events.len() - 1, backlink);
-            } else {
-                // Not a clear place to put it in this circumstance, so put it
-                // at the end.
-                fn_events.push(backlink);
-            }
-        }
-        fn_events.push(Event::Html("</li>\n".into()));
-        html::push_html(body, fn_events.into_iter());
-    }
-
-    body.push_str("</ol>");
-}
-
-/// Wraps tables in a `.table-wrapper` class to apply overflow-x rules to.
-fn wrap_tables(event: Event<'_>) -> (Option<Event<'_>>, Option<Event<'_>>) {
-    match event {
-        Event::Start(Tag::Table(_)) => (
-            Some(Event::Html(r#"<div class="table-wrapper">"#.into())),
-            Some(event),
-        ),
-        Event::End(TagEnd::Table) => (Some(event), Some(Event::Html(r#"</div>"#.into()))),
-        _ => (Some(event), None),
-    }
-}
-
-fn clean_codeblock_headers(event: Event<'_>) -> Event<'_> {
-    match event {
-        Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(ref info))) => {
-            let info: String = info
-                .chars()
-                .map(|x| match x {
-                    ' ' | '\t' => ',',
-                    _ => x,
-                })
-                .filter(|ch| !ch.is_whitespace())
-                .collect();
-
-            Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(CowStr::from(info))))
-        }
-        _ => event,
-    }
-}
-
 /// Prints a "backtrace" of some `Error`.
 pub fn log_backtrace(e: &Error) {
    error!("Error: {}", e);
@@ -424,27 +89,6 @@ pub fn log_backtrace(e: &Error) {
    }
 }

-/// Escape characters to make it safe for an HTML string.
-pub fn special_escape(mut s: &str) -> String {
-    let mut escaped = String::with_capacity(s.len());
-    let needs_escape: &[char] = &['<', '>', '\'', '"', '\\', '&'];
-    while let Some(next) = s.find(needs_escape) {
-        escaped.push_str(&s[..next]);
-        match s.as_bytes()[next] {
-            b'<' => escaped.push_str("&lt;"),
-            b'>' => escaped.push_str("&gt;"),
-            b'\'' => escaped.push_str("&#39;"),
-            b'"' => escaped.push_str("&quot;"),
-            b'\\' => escaped.push_str("&#92;"),
-            b'&' => escaped.push_str("&amp;"),
-            _ => unreachable!(),
-        }
-        s = &s[next + 1..];
-    }
-    escaped.push_str(s);
-    escaped
-}
-
 /// Escape `<` and `>` for HTML.
 pub fn bracket_escape(mut s: &str) -> String {
    let mut escaped = String::with_capacity(s.len());
@@ -464,143 +108,7 @@ pub fn bracket_escape(mut s: &str) -> String {

 #[cfg(test)]
 mod tests {
-    use super::{bracket_escape, special_escape};
-
-    mod render_markdown {
-        use super::super::render_markdown;
-
-        #[test]
-        fn preserves_external_links() {
-            assert_eq!(
-                render_markdown("[example](https://www.rust-lang.org/)", false),
-                "<p><a href=\"https://www.rust-lang.org/\">example</a></p>\n"
-            );
-        }
-
-        #[test]
-        fn it_can_adjust_markdown_links() {
-            assert_eq!(
-                render_markdown("[example](example.md)", false),
-                "<p><a href=\"example.html\">example</a></p>\n"
-            );
-            assert_eq!(
-                render_markdown("[example_anchor](example.md#anchor)", false),
-                "<p><a href=\"example.html#anchor\">example_anchor</a></p>\n"
-            );
-
-            // this anchor contains 'md' inside of it
-            assert_eq!(
-                render_markdown("[phantom data](foo.html#phantomdata)", false),
-                "<p><a href=\"foo.html#phantomdata\">phantom data</a></p>\n"
-            );
-        }
-
-        #[test]
-        fn it_can_wrap_tables() {
-            let src = r#"
-| Original        | Punycode        | Punycode + Encoding |
-|-----------------|-----------------|---------------------|
-| føø             | f-5gaa          | f_5gaa              |
-"#;
-            let out = r#"
-<div class="table-wrapper"><table><thead><tr><th>Original</th><th>Punycode</th><th>Punycode + Encoding</th></tr></thead><tbody>
-<tr><td>føø</td><td>f-5gaa</td><td>f_5gaa</td></tr>
-</tbody></table>
-</div>
-"#.trim();
-            assert_eq!(render_markdown(src, false), out);
-        }
-
-        #[test]
-        fn it_can_keep_quotes_straight() {
-            assert_eq!(render_markdown("'one'", false), "<p>'one'</p>\n");
-        }
-
-        #[test]
-        fn it_can_make_quotes_curly_except_when_they_are_in_code() {
-            let input = r#"
-'one'
-```
-'two'
-```
-`'three'` 'four'"#;
-            let expected = r#"<p>‘one’</p>
-<pre><code>'two'
-</code></pre>
-<p><code>'three'</code> ‘four’</p>
-"#;
-            assert_eq!(render_markdown(input, true), expected);
-        }
-
-        #[test]
-        fn whitespace_outside_of_codeblock_header_is_preserved() {
-            let input = r#"
-some text with spaces
-```rust
-fn main() {
-// code inside is unchanged
-}
-```
-more text with spaces
-"#;
-
-            let expected = r#"<p>some text with spaces</p>
-<pre><code class="language-rust">fn main() {
-// code inside is unchanged
-}
-</code></pre>
-<p>more text with spaces</p>
-"#;
-            assert_eq!(render_markdown(input, false), expected);
-            assert_eq!(render_markdown(input, true), expected);
-        }
-
-        #[test]
-        fn rust_code_block_properties_are_passed_as_space_delimited_class() {
-            let input = r#"
-```rust,no_run,should_panic,property_3
-```
-"#;
-
-            let expected = r#"<pre><code class="language-rust,no_run,should_panic,property_3"></code></pre>
-"#;
-            assert_eq!(render_markdown(input, false), expected);
-            assert_eq!(render_markdown(input, true), expected);
-        }
-
-        #[test]
-        fn rust_code_block_properties_with_whitespace_are_passed_as_space_delimited_class() {
-            let input = r#"
-```rust,    no_run,,,should_panic , ,property_3
-```
-"#;
-
-            let expected = r#"<pre><code class="language-rust,,,,,no_run,,,should_panic,,,,property_3"></code></pre>
-"#;
-            assert_eq!(render_markdown(input, false), expected);
-            assert_eq!(render_markdown(input, true), expected);
-        }
-
-        #[test]
-        fn rust_code_block_without_properties_has_proper_html_class() {
-            let input = r#"
-```rust
-```
-"#;
-
-            let expected = r#"<pre><code class="language-rust"></code></pre>
-"#;
-            assert_eq!(render_markdown(input, false), expected);
-            assert_eq!(render_markdown(input, true), expected);
-
-            let input = r#"
-```rust
-```
-"#;
-            assert_eq!(render_markdown(input, false), expected);
-            assert_eq!(render_markdown(input, true), expected);
-        }
-    }
+    use super::bracket_escape;

    #[allow(deprecated)]
    mod id_from_content {
@@ -690,17 +198,4 @@ more text with spaces
        assert_eq!(bracket_escape("'"), "'");
        assert_eq!(bracket_escape("\\"), "\\");
    }
-
-    #[test]
-    fn escaped_special() {
-        assert_eq!(special_escape(""), "");
-        assert_eq!(special_escape("<"), "&lt;");
-        assert_eq!(special_escape(">"), "&gt;");
-        assert_eq!(special_escape("<>"), "&lt;&gt;");
-        assert_eq!(special_escape("<test>"), "&lt;test&gt;");
-        assert_eq!(special_escape("a<test>b"), "a&lt;test&gt;b");
-        assert_eq!(special_escape("'"), "&#39;");
-        assert_eq!(special_escape("\\"), "&#92;");
-        assert_eq!(special_escape("&"), "&amp;");
-    }
 }
--- a/crates/mdbook-markdown/Cargo.toml
+++ b/crates/mdbook-markdown/Cargo.toml
@@ -0,0 +1,16 @@
+[package]
+name = "mdbook-markdown"
+version = "0.5.0-alpha.1"
+description = "Markdown processing used in mdBook"
+edition.workspace = true
+license.workspace = true
+repository.workspace = true
+rust-version.workspace = true
+
+[dependencies]
+log.workspace = true
+pulldown-cmark.workspace = true
+regex.workspace = true
+
+[lints]
+workspace = true
--- a/crates/mdbook-markdown/src/lib.rs
+++ b/crates/mdbook-markdown/src/lib.rs
@@ -0,0 +1,367 @@
+//! Markdown processing used in mdBook.
+
+use pulldown_cmark::{CodeBlockKind, CowStr, Event, Options, Parser, Tag, TagEnd, html};
+use regex::Regex;
+use std::borrow::Cow;
+use std::collections::HashMap;
+use std::fmt::Write;
+use std::path::Path;
+use std::sync::LazyLock;
+
+pub use pulldown_cmark;
+
+#[cfg(test)]
+mod tests;
+
+/// Wrapper around the pulldown-cmark parser for rendering markdown to HTML.
+pub fn render_markdown(text: &str, smart_punctuation: bool) -> String {
+    render_markdown_with_path(text, smart_punctuation, None)
+}
+
+/// Creates a new pulldown-cmark parser of the given text.
+pub fn new_cmark_parser(text: &str, smart_punctuation: bool) -> Parser<'_> {
+    let mut opts = Options::empty();
+    opts.insert(Options::ENABLE_TABLES);
+    opts.insert(Options::ENABLE_FOOTNOTES);
+    opts.insert(Options::ENABLE_STRIKETHROUGH);
+    opts.insert(Options::ENABLE_TASKLISTS);
+    opts.insert(Options::ENABLE_HEADING_ATTRIBUTES);
+    if smart_punctuation {
+        opts.insert(Options::ENABLE_SMART_PUNCTUATION);
+    }
+    Parser::new_ext(text, opts)
+}
+
+/// Renders markdown to HTML.
+///
+/// `path` should only be set if this is being generated for the consolidated
+/// print page. It should point to the page being rendered relative to the
+/// root of the book.
+pub fn render_markdown_with_path(
+    text: &str,
+    smart_punctuation: bool,
+    path: Option<&Path>,
+) -> String {
+    let mut body = String::with_capacity(text.len() * 3 / 2);
+
+    // Based on
+    // https://github.com/pulldown-cmark/pulldown-cmark/blob/master/pulldown-cmark/examples/footnote-rewrite.rs
+
+    // This handling of footnotes is a two-pass process. This is done to
+    // support linkbacks, little arrows that allow you to jump back to the
+    // footnote reference. The first pass collects the footnote definitions.
+    // The second pass modifies those definitions to include the linkbacks,
+    // and inserts the definitions back into the `events` list.
+
+    // This is a map of name -> (number, count)
+    // `name` is the name of the footnote.
+    // `number` is the footnote number displayed in the output.
+    // `count` is the number of references to this footnote (used for multiple
+    // linkbacks, and checking for unused footnotes).
+    let mut footnote_numbers = HashMap::new();
+    // This is a map of name -> Vec<Event>
+    // `name` is the name of the footnote.
+    // The events list is the list of events needed to build the footnote definition.
+    let mut footnote_defs = HashMap::new();
+
+    // The following are used when currently processing a footnote definition.
+    //
+    // This is the name of the footnote (escaped).
+    let mut in_footnote_name = String::new();
+    // This is the list of events to build the footnote definition.
+    let mut in_footnote = Vec::new();
+
+    let events = new_cmark_parser(text, smart_punctuation)
+        .map(clean_codeblock_headers)
+        .map(|event| adjust_links(event, path))
+        .flat_map(|event| {
+            let (a, b) = wrap_tables(event);
+            a.into_iter().chain(b)
+        })
+        // Footnote rewriting must go last to ensure inner definition contents
+        // are processed (since they get pulled out of the initial stream).
+        .filter_map(|event| {
+            match event {
+                Event::Start(Tag::FootnoteDefinition(name)) => {
+                    if !in_footnote.is_empty() {
+                        log::warn!("internal bug: nested footnote not expected in {path:?}");
+                    }
+                    in_footnote_name = special_escape(&name);
+                    None
+                }
+                Event::End(TagEnd::FootnoteDefinition) => {
+                    let def_events = std::mem::take(&mut in_footnote);
+                    let name = std::mem::take(&mut in_footnote_name);
+
+                    if footnote_defs.contains_key(&name) {
+                        log::warn!(
+                            "footnote `{name}` in {} defined multiple times - \
+                             not updating to new definition",
+                            path.map_or_else(|| Cow::from("<unknown>"), |p| p.to_string_lossy())
+                        );
+                    } else {
+                        footnote_defs.insert(name, def_events);
+                    }
+                    None
+                }
+                Event::FootnoteReference(name) => {
+                    let name = special_escape(&name);
+                    let len = footnote_numbers.len() + 1;
+                    let (n, count) = footnote_numbers.entry(name.clone()).or_insert((len, 0));
+                    *count += 1;
+                    let html = Event::Html(
+                        format!(
+                            "<sup class=\"footnote-reference\" id=\"fr-{name}-{count}\">\
+                                <a href=\"#footnote-{name}\">{n}</a>\
+                             </sup>"
+                        )
+                        .into(),
+                    );
+                    if in_footnote_name.is_empty() {
+                        Some(html)
+                    } else {
+                        // While inside a footnote, we need to accumulate.
+                        in_footnote.push(html);
+                        None
+                    }
+                }
+                // While inside a footnote, accumulate all events into a local.
+                _ if !in_footnote_name.is_empty() => {
+                    in_footnote.push(event);
+                    None
+                }
+                _ => Some(event),
+            }
+        });
+
+    html::push_html(&mut body, events);
+
+    if !footnote_defs.is_empty() {
+        add_footnote_defs(
+            &mut body,
+            path,
+            footnote_defs.into_iter().collect(),
+            &footnote_numbers,
+        );
+    }
+
+    body
+}
+
+/// Adds all footnote definitions into `body`.
+fn add_footnote_defs(
+    body: &mut String,
+    path: Option<&Path>,
+    mut defs: Vec<(String, Vec<Event<'_>>)>,
+    numbers: &HashMap<String, (usize, u32)>,
+) {
+    // Remove unused.
+    defs.retain(|(name, _)| {
+        if !numbers.contains_key(name) {
+            log::warn!(
+                "footnote `{name}` in `{}` is defined but not referenced",
+                path.map_or_else(|| Cow::from("<unknown>"), |p| p.to_string_lossy())
+            );
+            false
+        } else {
+            true
+        }
+    });
+
+    defs.sort_by_cached_key(|(name, _)| numbers[name].0);
+
+    body.push_str(
+        "<hr>\n\
+         <ol class=\"footnote-definition\">",
+    );
+
+    // Insert the backrefs to the definition, and put the definitions in the output.
+    for (name, mut fn_events) in defs {
+        let count = numbers[&name].1;
+        fn_events.insert(
+            0,
+            Event::Html(format!("<li id=\"footnote-{name}\">").into()),
+        );
+        // Generate the linkbacks.
+        for usage in 1..=count {
+            let nth = if usage == 1 {
+                String::new()
+            } else {
+                usage.to_string()
+            };
+            let backlink =
+                Event::Html(format!(" <a href=\"#fr-{name}-{usage}\">↩{nth}</a>").into());
+            if matches!(fn_events.last(), Some(Event::End(TagEnd::Paragraph))) {
+                // Put the linkback at the end of the last paragraph instead
+                // of on a line by itself.
+                fn_events.insert(fn_events.len() - 1, backlink);
+            } else {
+                // Not a clear place to put it in this circumstance, so put it
+                // at the end.
+                fn_events.push(backlink);
+            }
+        }
+        fn_events.push(Event::Html("</li>\n".into()));
+        html::push_html(body, fn_events.into_iter());
+    }
+
+    body.push_str("</ol>");
+}
+
+/// Wraps tables in a `.table-wrapper` class to apply overflow-x rules to.
+fn wrap_tables(event: Event<'_>) -> (Option<Event<'_>>, Option<Event<'_>>) {
+    match event {
+        Event::Start(Tag::Table(_)) => (
+            Some(Event::Html(r#"<div class="table-wrapper">"#.into())),
+            Some(event),
+        ),
+        Event::End(TagEnd::Table) => (Some(event), Some(Event::Html(r#"</div>"#.into()))),
+        _ => (Some(event), None),
+    }
+}
+
+fn clean_codeblock_headers(event: Event<'_>) -> Event<'_> {
+    match event {
+        Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(ref info))) => {
+            let info: String = info
+                .chars()
+                .map(|x| match x {
+                    ' ' | '\t' => ',',
+                    _ => x,
+                })
+                .filter(|ch| !ch.is_whitespace())
+                .collect();
+
+            Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(CowStr::from(info))))
+        }
+        _ => event,
+    }
+}
+
+/// Fix links to the correct location.
+///
+/// This adjusts links, such as turning `.md` extensions to `.html`.
+///
+/// `path` is the path to the page being rendered relative to the root of the
+/// book. This is used for the `print.html` page so that links on the print
+/// page go to the original location. Normal page rendering sets `path` to
+/// None. Ideally, print page links would link to anchors on the print page,
+/// but that is very difficult.
+fn adjust_links<'a>(event: Event<'a>, path: Option<&Path>) -> Event<'a> {
+    static SCHEME_LINK: LazyLock<Regex> =
+        LazyLock::new(|| Regex::new(r"^[a-z][a-z0-9+.-]*:").unwrap());
+    static MD_LINK: LazyLock<Regex> =
+        LazyLock::new(|| Regex::new(r"(?P<link>.*)\.md(?P<anchor>#.*)?").unwrap());
+
+    fn fix<'a>(dest: CowStr<'a>, path: Option<&Path>) -> CowStr<'a> {
+        if dest.starts_with('#') {
+            // Fragment-only link.
+            if let Some(path) = path {
+                let mut base = path.display().to_string();
+                if base.ends_with(".md") {
+                    base.replace_range(base.len() - 3.., ".html");
+                }
+                return format!("{base}{dest}").into();
+            } else {
+                return dest;
+            }
+        }
+        // Don't modify links with schemes like `https`.
+        if !SCHEME_LINK.is_match(&dest) {
+            // This is a relative link, adjust it as necessary.
+            let mut fixed_link = String::new();
+            if let Some(path) = path {
+                let base = path
+                    .parent()
+                    .expect("path can't be empty")
+                    .to_str()
+                    .expect("utf-8 paths only");
+                if !base.is_empty() {
+                    write!(fixed_link, "{base}/").unwrap();
+                }
+            }
+
+            if let Some(caps) = MD_LINK.captures(&dest) {
+                fixed_link.push_str(&caps["link"]);
+                fixed_link.push_str(".html");
+                if let Some(anchor) = caps.name("anchor") {
+                    fixed_link.push_str(anchor.as_str());
+                }
+            } else {
+                fixed_link.push_str(&dest);
+            };
+            return CowStr::from(fixed_link);
+        }
+        dest
+    }
+
+    fn fix_html<'a>(html: CowStr<'a>, path: Option<&Path>) -> CowStr<'a> {
+        // This is a terrible hack, but should be reasonably reliable. Nobody
+        // should ever parse a tag with a regex. However, there isn't anything
+        // in Rust that I know of that is suitable for handling partial html
+        // fragments like those generated by pulldown_cmark.
+        //
+        // There are dozens of HTML tags/attributes that contain paths, so
+        // feel free to add more tags if desired; these are the only ones I
+        // care about right now.
+        static HTML_LINK: LazyLock<Regex> =
+            LazyLock::new(|| Regex::new(r#"(<(?:a|img) [^>]*?(?:src|href)=")([^"]+?)""#).unwrap());
+
+        HTML_LINK
+            .replace_all(&html, |caps: &regex::Captures<'_>| {
+                let fixed = fix(caps[2].into(), path);
+                format!("{}{}\"", &caps[1], fixed)
+            })
+            .into_owned()
+            .into()
+    }
+
+    match event {
+        Event::Start(Tag::Link {
+            link_type,
+            dest_url,
+            title,
+            id,
+        }) => Event::Start(Tag::Link {
+            link_type,
+            dest_url: fix(dest_url, path),
+            title,
+            id,
+        }),
+        Event::Start(Tag::Image {
+            link_type,
+            dest_url,
+            title,
+            id,
+        }) => Event::Start(Tag::Image {
+            link_type,
+            dest_url: fix(dest_url, path),
+            title,
+            id,
+        }),
+        Event::Html(html) => Event::Html(fix_html(html, path)),
+        Event::InlineHtml(html) => Event::InlineHtml(fix_html(html, path)),
+        _ => event,
+    }
+}
+
+/// Escape characters to make it safe for an HTML string.
+pub fn special_escape(mut s: &str) -> String {
+    let mut escaped = String::with_capacity(s.len());
+    let needs_escape: &[char] = &['<', '>', '\'', '"', '\\', '&'];
+    while let Some(next) = s.find(needs_escape) {
+        escaped.push_str(&s[..next]);
+        match s.as_bytes()[next] {
+            b'<' => escaped.push_str("&lt;"),
+            b'>' => escaped.push_str("&gt;"),
+            b'\'' => escaped.push_str("&#39;"),
+            b'"' => escaped.push_str("&quot;"),
+            b'\\' => escaped.push_str("&#92;"),
+            b'&' => escaped.push_str("&amp;"),
+            _ => unreachable!(),
+        }
+        s = &s[next + 1..];
+    }
+    escaped.push_str(s);
+    escaped
+}
--- a/crates/mdbook-markdown/src/tests.rs
+++ b/crates/mdbook-markdown/src/tests.rs
@@ -0,0 +1,147 @@
+use super::render_markdown;
+use super::*;
+
+#[test]
+fn escaped_special() {
+    assert_eq!(special_escape(""), "");
+    assert_eq!(special_escape("<"), "&lt;");
+    assert_eq!(special_escape(">"), "&gt;");
+    assert_eq!(special_escape("<>"), "&lt;&gt;");
+    assert_eq!(special_escape("<test>"), "&lt;test&gt;");
+    assert_eq!(special_escape("a<test>b"), "a&lt;test&gt;b");
+    assert_eq!(special_escape("'"), "&#39;");
+    assert_eq!(special_escape("\\"), "&#92;");
+    assert_eq!(special_escape("&"), "&amp;");
+}
+
+#[test]
+fn preserves_external_links() {
+    assert_eq!(
+        render_markdown("[example](https://www.rust-lang.org/)", false),
+        "<p><a href=\"https://www.rust-lang.org/\">example</a></p>\n"
+    );
+}
+
+#[test]
+fn it_can_adjust_markdown_links() {
+    assert_eq!(
+        render_markdown("[example](example.md)", false),
+        "<p><a href=\"example.html\">example</a></p>\n"
+    );
+    assert_eq!(
+        render_markdown("[example_anchor](example.md#anchor)", false),
+        "<p><a href=\"example.html#anchor\">example_anchor</a></p>\n"
+    );
+
+    // this anchor contains 'md' inside of it
+    assert_eq!(
+        render_markdown("[phantom data](foo.html#phantomdata)", false),
+        "<p><a href=\"foo.html#phantomdata\">phantom data</a></p>\n"
+    );
+}
+
+#[test]
+fn it_can_wrap_tables() {
+    let src = r#"
+| Original        | Punycode        | Punycode + Encoding |
+|-----------------|-----------------|---------------------|
+| føø             | f-5gaa          | f_5gaa              |
+"#;
+    let out = r#"
+<div class="table-wrapper"><table><thead><tr><th>Original</th><th>Punycode</th><th>Punycode + Encoding</th></tr></thead><tbody>
+<tr><td>føø</td><td>f-5gaa</td><td>f_5gaa</td></tr>
+</tbody></table>
+</div>
+"#.trim();
+    assert_eq!(render_markdown(src, false), out);
+}
+
+#[test]
+fn it_can_keep_quotes_straight() {
+    assert_eq!(render_markdown("'one'", false), "<p>'one'</p>\n");
+}
+
+#[test]
+fn it_can_make_quotes_curly_except_when_they_are_in_code() {
+    let input = r#"
+'one'
+```
+'two'
+```
+`'three'` 'four'"#;
+    let expected = r#"<p>‘one’</p>
+<pre><code>'two'
+</code></pre>
+<p><code>'three'</code> ‘four’</p>
+"#;
+    assert_eq!(render_markdown(input, true), expected);
+}
+
+#[test]
+fn whitespace_outside_of_codeblock_header_is_preserved() {
+    let input = r#"
+some text with spaces
+```rust
+fn main() {
+// code inside is unchanged
+}
+```
+more text with spaces
+"#;
+
+    let expected = r#"<p>some text with spaces</p>
+<pre><code class="language-rust">fn main() {
+// code inside is unchanged
+}
+</code></pre>
+<p>more text with spaces</p>
+"#;
+    assert_eq!(render_markdown(input, false), expected);
+    assert_eq!(render_markdown(input, true), expected);
+}
+
+#[test]
+fn rust_code_block_properties_are_passed_as_space_delimited_class() {
+    let input = r#"
+```rust,no_run,should_panic,property_3
+```
+"#;
+
+    let expected = r#"<pre><code class="language-rust,no_run,should_panic,property_3"></code></pre>
+"#;
+    assert_eq!(render_markdown(input, false), expected);
+    assert_eq!(render_markdown(input, true), expected);
+}
+
+#[test]
+fn rust_code_block_properties_with_whitespace_are_passed_as_space_delimited_class() {
+    let input = r#"
+```rust,    no_run,,,should_panic , ,property_3
+```
+"#;
+
+    let expected = r#"<pre><code class="language-rust,,,,,no_run,,,should_panic,,,,property_3"></code></pre>
+"#;
+    assert_eq!(render_markdown(input, false), expected);
+    assert_eq!(render_markdown(input, true), expected);
+}
+
+#[test]
+fn rust_code_block_without_properties_has_proper_html_class() {
+    let input = r#"
+```rust
+```
+"#;
+
+    let expected = r#"<pre><code class="language-rust"></code></pre>
+"#;
+    assert_eq!(render_markdown(input, false), expected);
+    assert_eq!(render_markdown(input, true), expected);
+
+    let input = r#"
+```rust
+```
+"#;
+    assert_eq!(render_markdown(input, false), expected);
+    assert_eq!(render_markdown(input, true), expected);
+}
--- a/src/renderer/html_handlebars/hbs_renderer.rs
+++ b/src/renderer/html_handlebars/hbs_renderer.rs
@@ -17,6 +17,8 @@ use log::{debug, info, trace, warn};
 use mdbook_core::config::{BookConfig, Code, Config, HtmlConfig, Playground, RustEdition};
 use mdbook_core::utils;
 use mdbook_core::utils::fs::get_404_output_file;
+use mdbook_markdown::{render_markdown, render_markdown_with_path};
+
 use regex::{Captures, Regex};
 use serde_json::json;

@@ -57,13 +59,10 @@ impl HtmlHandlebars {
                .insert("git_repository_edit_url".to_owned(), json!(edit_url));
        }

-        let content = utils::render_markdown(&ch.content, ctx.html_config.smart_punctuation());
+        let content = render_markdown(&ch.content, ctx.html_config.smart_punctuation());

-        let fixed_content = utils::render_markdown_with_path(
-            &ch.content,
-            ctx.html_config.smart_punctuation(),
-            Some(path),
-        );
+        let fixed_content =
+            render_markdown_with_path(&ch.content, ctx.html_config.smart_punctuation(), Some(path));
        if !ctx.is_index && ctx.html_config.print.page_break {
            // Add page break between chapters
            // See https://developer.mozilla.org/en-US/docs/Web/CSS/break-before and https://developer.mozilla.org/en-US/docs/Web/CSS/page-break-before
@@ -178,8 +177,7 @@ impl HtmlHandlebars {
                    .to_string()
            }
        };
-        let html_content_404 =
-            utils::render_markdown(&content_404, html_config.smart_punctuation());
+        let html_content_404 = render_markdown(&content_404, html_config.smart_punctuation());

        let mut data_404 = data.clone();
        let base_url = if let Some(site_url) = &html_config.site_url {
--- a/src/renderer/html_handlebars/helpers/toc.rs
+++ b/src/renderer/html_handlebars/helpers/toc.rs
@@ -1,11 +1,10 @@
 use std::path::Path;
 use std::{cmp::Ordering, collections::BTreeMap};

-use mdbook_core::utils::special_escape;
-
 use handlebars::{
    Context, Handlebars, Helper, HelperDef, Output, RenderContext, RenderError, RenderErrorReason,
 };
+use mdbook_markdown::special_escape;

 // Handlebars helper to construct TOC
 #[derive(Clone, Copy)]
--- a/src/renderer/html_handlebars/search.rs
+++ b/src/renderer/html_handlebars/search.rs
@@ -8,6 +8,7 @@ use elasticlunr::{Index, IndexBuilder};
 use log::{debug, warn};
 use mdbook_core::config::{Search, SearchChapterSettings};
 use mdbook_core::utils;
+use mdbook_markdown::new_cmark_parser;
 use pulldown_cmark::*;
 use serde::Serialize;

@@ -134,7 +135,7 @@ fn render_item(
        .with_context(|| "Could not convert HTML path to str")?;
    let anchor_base = utils::fs::normalize_path(filepath);

-    let mut p = utils::new_cmark_parser(&chapter.content, false).peekable();
+    let mut p = new_cmark_parser(&chapter.content, false).peekable();

    let mut in_heading = false;
    let max_section_depth = u32::from(search_config.heading_split_level);
--- a/tests/testsuite/markdown.rs
+++ b/tests/testsuite/markdown.rs
@@ -22,10 +22,10 @@ fn footnotes() {
            cmd.expect_stderr(str![[r#"
 [TIMESTAMP] [INFO] (mdbook::book): Book building has started
 [TIMESTAMP] [INFO] (mdbook::book): Running the html backend
-[TIMESTAMP] [WARN] (mdbook_core::utils): footnote `multiple-definitions` in <unknown> defined multiple times - not updating to new definition
-[TIMESTAMP] [WARN] (mdbook_core::utils): footnote `unused` in `<unknown>` is defined but not referenced
-[TIMESTAMP] [WARN] (mdbook_core::utils): footnote `multiple-definitions` in footnotes.md defined multiple times - not updating to new definition
-[TIMESTAMP] [WARN] (mdbook_core::utils): footnote `unused` in `footnotes.md` is defined but not referenced
+[TIMESTAMP] [WARN] (mdbook_markdown): footnote `multiple-definitions` in <unknown> defined multiple times - not updating to new definition
+[TIMESTAMP] [WARN] (mdbook_markdown): footnote `unused` in `<unknown>` is defined but not referenced
+[TIMESTAMP] [WARN] (mdbook_markdown): footnote `multiple-definitions` in footnotes.md defined multiple times - not updating to new definition
+[TIMESTAMP] [WARN] (mdbook_markdown): footnote `unused` in `footnotes.md` is defined but not referenced
 [TIMESTAMP] [INFO] (mdbook::renderer::html_handlebars::hbs_renderer): HTML book written to `[ROOT]/book`

 "#]]);