Move markdown support to mdbook-markdown

This moves all the code responsible for markdown processing to the
mdbook-markdown crate.
This commit is contained in:
Eric Huss
2025-07-21 15:46:36 -07:00
parent 3278f84373
commit 8f3b6b4776
10 changed files with 556 additions and 521 deletions

10
Cargo.lock generated
View File

@@ -1271,6 +1271,7 @@ dependencies = [
"ignore",
"log",
"mdbook-core",
"mdbook-markdown",
"mdbook-preprocessor",
"mdbook-renderer",
"mdbook-summary",
@@ -1311,6 +1312,15 @@ dependencies = [
"toml",
]
[[package]]
name = "mdbook-markdown"
version = "0.5.0-alpha.1"
dependencies = [
"log",
"pulldown-cmark 0.10.3",
"regex",
]
[[package]]
name = "mdbook-preprocessor"
version = "0.5.0-alpha.1"

View File

@@ -24,6 +24,7 @@ rust-version = "1.85.0" # Keep in sync with installation.md and .github/workflow
anyhow = "1.0.98"
log = "0.4.27"
mdbook-core = { path = "crates/mdbook-core" }
mdbook-markdown = { path = "crates/mdbook-markdown" }
mdbook-preprocessor = { path = "crates/mdbook-preprocessor" }
mdbook-renderer = { path = "crates/mdbook-renderer" }
mdbook-summary = { path = "crates/mdbook-summary" }
@@ -63,6 +64,7 @@ handlebars = "6.0"
hex = "0.4.3"
log.workspace = true
mdbook-core.workspace = true
mdbook-markdown.workspace = true
mdbook-preprocessor.workspace = true
mdbook-renderer.workspace = true
mdbook-summary.workspace = true

View File

@@ -2,12 +2,9 @@
use anyhow::Error;
use log::error;
use pulldown_cmark::{CodeBlockKind, CowStr, Event, Options, Parser, Tag, TagEnd, html};
use regex::Regex;
use std::borrow::Cow;
use std::collections::HashMap;
use std::fmt::Write;
use std::path::Path;
use std::sync::LazyLock;
pub mod fs;
@@ -83,338 +80,6 @@ pub fn unique_id_from_content(content: &str, id_counter: &mut HashMap<String, us
unique_id
}
/// Fix links to the correct location.
///
/// This adjusts links, such as turning `.md` extensions to `.html`.
///
/// `path` is the path to the page being rendered relative to the root of the
/// book. This is used for the `print.html` page so that links on the print
/// page go to the original location. Normal page rendering sets `path` to
/// None. Ideally, print page links would link to anchors on the print page,
/// but that is very difficult.
fn adjust_links<'a>(event: Event<'a>, path: Option<&Path>) -> Event<'a> {
static SCHEME_LINK: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r"^[a-z][a-z0-9+.-]*:").unwrap());
static MD_LINK: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r"(?P<link>.*)\.md(?P<anchor>#.*)?").unwrap());
fn fix<'a>(dest: CowStr<'a>, path: Option<&Path>) -> CowStr<'a> {
if dest.starts_with('#') {
// Fragment-only link.
if let Some(path) = path {
let mut base = path.display().to_string();
if base.ends_with(".md") {
base.replace_range(base.len() - 3.., ".html");
}
return format!("{base}{dest}").into();
} else {
return dest;
}
}
// Don't modify links with schemes like `https`.
if !SCHEME_LINK.is_match(&dest) {
// This is a relative link, adjust it as necessary.
let mut fixed_link = String::new();
if let Some(path) = path {
let base = path
.parent()
.expect("path can't be empty")
.to_str()
.expect("utf-8 paths only");
if !base.is_empty() {
write!(fixed_link, "{base}/").unwrap();
}
}
if let Some(caps) = MD_LINK.captures(&dest) {
fixed_link.push_str(&caps["link"]);
fixed_link.push_str(".html");
if let Some(anchor) = caps.name("anchor") {
fixed_link.push_str(anchor.as_str());
}
} else {
fixed_link.push_str(&dest);
};
return CowStr::from(fixed_link);
}
dest
}
fn fix_html<'a>(html: CowStr<'a>, path: Option<&Path>) -> CowStr<'a> {
// This is a terrible hack, but should be reasonably reliable. Nobody
// should ever parse a tag with a regex. However, there isn't anything
// in Rust that I know of that is suitable for handling partial html
// fragments like those generated by pulldown_cmark.
//
// There are dozens of HTML tags/attributes that contain paths, so
// feel free to add more tags if desired; these are the only ones I
// care about right now.
static HTML_LINK: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r#"(<(?:a|img) [^>]*?(?:src|href)=")([^"]+?)""#).unwrap());
HTML_LINK
.replace_all(&html, |caps: &regex::Captures<'_>| {
let fixed = fix(caps[2].into(), path);
format!("{}{}\"", &caps[1], fixed)
})
.into_owned()
.into()
}
match event {
Event::Start(Tag::Link {
link_type,
dest_url,
title,
id,
}) => Event::Start(Tag::Link {
link_type,
dest_url: fix(dest_url, path),
title,
id,
}),
Event::Start(Tag::Image {
link_type,
dest_url,
title,
id,
}) => Event::Start(Tag::Image {
link_type,
dest_url: fix(dest_url, path),
title,
id,
}),
Event::Html(html) => Event::Html(fix_html(html, path)),
Event::InlineHtml(html) => Event::InlineHtml(fix_html(html, path)),
_ => event,
}
}
/// Wrapper around the pulldown-cmark parser for rendering markdown to HTML.
pub fn render_markdown(text: &str, smart_punctuation: bool) -> String {
render_markdown_with_path(text, smart_punctuation, None)
}
/// Creates a new pulldown-cmark parser of the given text.
pub fn new_cmark_parser(text: &str, smart_punctuation: bool) -> Parser<'_> {
let mut opts = Options::empty();
opts.insert(Options::ENABLE_TABLES);
opts.insert(Options::ENABLE_FOOTNOTES);
opts.insert(Options::ENABLE_STRIKETHROUGH);
opts.insert(Options::ENABLE_TASKLISTS);
opts.insert(Options::ENABLE_HEADING_ATTRIBUTES);
if smart_punctuation {
opts.insert(Options::ENABLE_SMART_PUNCTUATION);
}
Parser::new_ext(text, opts)
}
/// Renders markdown to HTML.
///
/// `path` should only be set if this is being generated for the consolidated
/// print page. It should point to the page being rendered relative to the
/// root of the book.
pub fn render_markdown_with_path(
text: &str,
smart_punctuation: bool,
path: Option<&Path>,
) -> String {
let mut body = String::with_capacity(text.len() * 3 / 2);
// Based on
// https://github.com/pulldown-cmark/pulldown-cmark/blob/master/pulldown-cmark/examples/footnote-rewrite.rs
// This handling of footnotes is a two-pass process. This is done to
// support linkbacks, little arrows that allow you to jump back to the
// footnote reference. The first pass collects the footnote definitions.
// The second pass modifies those definitions to include the linkbacks,
// and inserts the definitions back into the `events` list.
// This is a map of name -> (number, count)
// `name` is the name of the footnote.
// `number` is the footnote number displayed in the output.
// `count` is the number of references to this footnote (used for multiple
// linkbacks, and checking for unused footnotes).
let mut footnote_numbers = HashMap::new();
// This is a map of name -> Vec<Event>
// `name` is the name of the footnote.
// The events list is the list of events needed to build the footnote definition.
let mut footnote_defs = HashMap::new();
// The following are used when currently processing a footnote definition.
//
// This is the name of the footnote (escaped).
let mut in_footnote_name = String::new();
// This is the list of events to build the footnote definition.
let mut in_footnote = Vec::new();
let events = new_cmark_parser(text, smart_punctuation)
.map(clean_codeblock_headers)
.map(|event| adjust_links(event, path))
.flat_map(|event| {
let (a, b) = wrap_tables(event);
a.into_iter().chain(b)
})
// Footnote rewriting must go last to ensure inner definition contents
// are processed (since they get pulled out of the initial stream).
.filter_map(|event| {
match event {
Event::Start(Tag::FootnoteDefinition(name)) => {
if !in_footnote.is_empty() {
log::warn!("internal bug: nested footnote not expected in {path:?}");
}
in_footnote_name = special_escape(&name);
None
}
Event::End(TagEnd::FootnoteDefinition) => {
let def_events = std::mem::take(&mut in_footnote);
let name = std::mem::take(&mut in_footnote_name);
if footnote_defs.contains_key(&name) {
log::warn!(
"footnote `{name}` in {} defined multiple times - \
not updating to new definition",
path.map_or_else(|| Cow::from("<unknown>"), |p| p.to_string_lossy())
);
} else {
footnote_defs.insert(name, def_events);
}
None
}
Event::FootnoteReference(name) => {
let name = special_escape(&name);
let len = footnote_numbers.len() + 1;
let (n, count) = footnote_numbers.entry(name.clone()).or_insert((len, 0));
*count += 1;
let html = Event::Html(
format!(
"<sup class=\"footnote-reference\" id=\"fr-{name}-{count}\">\
<a href=\"#footnote-{name}\">{n}</a>\
</sup>"
)
.into(),
);
if in_footnote_name.is_empty() {
Some(html)
} else {
// While inside a footnote, we need to accumulate.
in_footnote.push(html);
None
}
}
// While inside a footnote, accumulate all events into a local.
_ if !in_footnote_name.is_empty() => {
in_footnote.push(event);
None
}
_ => Some(event),
}
});
html::push_html(&mut body, events);
if !footnote_defs.is_empty() {
add_footnote_defs(
&mut body,
path,
footnote_defs.into_iter().collect(),
&footnote_numbers,
);
}
body
}
/// Adds all footnote definitions into `body`.
fn add_footnote_defs(
body: &mut String,
path: Option<&Path>,
mut defs: Vec<(String, Vec<Event<'_>>)>,
numbers: &HashMap<String, (usize, u32)>,
) {
// Remove unused.
defs.retain(|(name, _)| {
if !numbers.contains_key(name) {
log::warn!(
"footnote `{name}` in `{}` is defined but not referenced",
path.map_or_else(|| Cow::from("<unknown>"), |p| p.to_string_lossy())
);
false
} else {
true
}
});
defs.sort_by_cached_key(|(name, _)| numbers[name].0);
body.push_str(
"<hr>\n\
<ol class=\"footnote-definition\">",
);
// Insert the backrefs to the definition, and put the definitions in the output.
for (name, mut fn_events) in defs {
let count = numbers[&name].1;
fn_events.insert(
0,
Event::Html(format!("<li id=\"footnote-{name}\">").into()),
);
// Generate the linkbacks.
for usage in 1..=count {
let nth = if usage == 1 {
String::new()
} else {
usage.to_string()
};
let backlink =
Event::Html(format!(" <a href=\"#fr-{name}-{usage}\">↩{nth}</a>").into());
if matches!(fn_events.last(), Some(Event::End(TagEnd::Paragraph))) {
// Put the linkback at the end of the last paragraph instead
// of on a line by itself.
fn_events.insert(fn_events.len() - 1, backlink);
} else {
// Not a clear place to put it in this circumstance, so put it
// at the end.
fn_events.push(backlink);
}
}
fn_events.push(Event::Html("</li>\n".into()));
html::push_html(body, fn_events.into_iter());
}
body.push_str("</ol>");
}
/// Wraps tables in a `.table-wrapper` class to apply overflow-x rules to.
fn wrap_tables(event: Event<'_>) -> (Option<Event<'_>>, Option<Event<'_>>) {
match event {
Event::Start(Tag::Table(_)) => (
Some(Event::Html(r#"<div class="table-wrapper">"#.into())),
Some(event),
),
Event::End(TagEnd::Table) => (Some(event), Some(Event::Html(r#"</div>"#.into()))),
_ => (Some(event), None),
}
}
fn clean_codeblock_headers(event: Event<'_>) -> Event<'_> {
match event {
Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(ref info))) => {
let info: String = info
.chars()
.map(|x| match x {
' ' | '\t' => ',',
_ => x,
})
.filter(|ch| !ch.is_whitespace())
.collect();
Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(CowStr::from(info))))
}
_ => event,
}
}
/// Prints a "backtrace" of some `Error`.
pub fn log_backtrace(e: &Error) {
error!("Error: {}", e);
@@ -424,27 +89,6 @@ pub fn log_backtrace(e: &Error) {
}
}
/// Escape characters to make it safe for an HTML string.
pub fn special_escape(mut s: &str) -> String {
let mut escaped = String::with_capacity(s.len());
let needs_escape: &[char] = &['<', '>', '\'', '"', '\\', '&'];
while let Some(next) = s.find(needs_escape) {
escaped.push_str(&s[..next]);
match s.as_bytes()[next] {
b'<' => escaped.push_str("&lt;"),
b'>' => escaped.push_str("&gt;"),
b'\'' => escaped.push_str("&#39;"),
b'"' => escaped.push_str("&quot;"),
b'\\' => escaped.push_str("&#92;"),
b'&' => escaped.push_str("&amp;"),
_ => unreachable!(),
}
s = &s[next + 1..];
}
escaped.push_str(s);
escaped
}
/// Escape `<` and `>` for HTML.
pub fn bracket_escape(mut s: &str) -> String {
let mut escaped = String::with_capacity(s.len());
@@ -464,143 +108,7 @@ pub fn bracket_escape(mut s: &str) -> String {
#[cfg(test)]
mod tests {
use super::{bracket_escape, special_escape};
mod render_markdown {
use super::super::render_markdown;
#[test]
fn preserves_external_links() {
assert_eq!(
render_markdown("[example](https://www.rust-lang.org/)", false),
"<p><a href=\"https://www.rust-lang.org/\">example</a></p>\n"
);
}
#[test]
fn it_can_adjust_markdown_links() {
assert_eq!(
render_markdown("[example](example.md)", false),
"<p><a href=\"example.html\">example</a></p>\n"
);
assert_eq!(
render_markdown("[example_anchor](example.md#anchor)", false),
"<p><a href=\"example.html#anchor\">example_anchor</a></p>\n"
);
// this anchor contains 'md' inside of it
assert_eq!(
render_markdown("[phantom data](foo.html#phantomdata)", false),
"<p><a href=\"foo.html#phantomdata\">phantom data</a></p>\n"
);
}
#[test]
fn it_can_wrap_tables() {
let src = r#"
| Original | Punycode | Punycode + Encoding |
|-----------------|-----------------|---------------------|
| føø | f-5gaa | f_5gaa |
"#;
let out = r#"
<div class="table-wrapper"><table><thead><tr><th>Original</th><th>Punycode</th><th>Punycode + Encoding</th></tr></thead><tbody>
<tr><td>føø</td><td>f-5gaa</td><td>f_5gaa</td></tr>
</tbody></table>
</div>
"#.trim();
assert_eq!(render_markdown(src, false), out);
}
#[test]
fn it_can_keep_quotes_straight() {
assert_eq!(render_markdown("'one'", false), "<p>'one'</p>\n");
}
#[test]
fn it_can_make_quotes_curly_except_when_they_are_in_code() {
let input = r#"
'one'
```
'two'
```
`'three'` 'four'"#;
let expected = r#"<p>one</p>
<pre><code>'two'
</code></pre>
<p><code>'three'</code> four</p>
"#;
assert_eq!(render_markdown(input, true), expected);
}
#[test]
fn whitespace_outside_of_codeblock_header_is_preserved() {
let input = r#"
some text with spaces
```rust
fn main() {
// code inside is unchanged
}
```
more text with spaces
"#;
let expected = r#"<p>some text with spaces</p>
<pre><code class="language-rust">fn main() {
// code inside is unchanged
}
</code></pre>
<p>more text with spaces</p>
"#;
assert_eq!(render_markdown(input, false), expected);
assert_eq!(render_markdown(input, true), expected);
}
#[test]
fn rust_code_block_properties_are_passed_as_space_delimited_class() {
let input = r#"
```rust,no_run,should_panic,property_3
```
"#;
let expected = r#"<pre><code class="language-rust,no_run,should_panic,property_3"></code></pre>
"#;
assert_eq!(render_markdown(input, false), expected);
assert_eq!(render_markdown(input, true), expected);
}
#[test]
fn rust_code_block_properties_with_whitespace_are_passed_as_space_delimited_class() {
let input = r#"
```rust, no_run,,,should_panic , ,property_3
```
"#;
let expected = r#"<pre><code class="language-rust,,,,,no_run,,,should_panic,,,,property_3"></code></pre>
"#;
assert_eq!(render_markdown(input, false), expected);
assert_eq!(render_markdown(input, true), expected);
}
#[test]
fn rust_code_block_without_properties_has_proper_html_class() {
let input = r#"
```rust
```
"#;
let expected = r#"<pre><code class="language-rust"></code></pre>
"#;
assert_eq!(render_markdown(input, false), expected);
assert_eq!(render_markdown(input, true), expected);
let input = r#"
```rust
```
"#;
assert_eq!(render_markdown(input, false), expected);
assert_eq!(render_markdown(input, true), expected);
}
}
use super::bracket_escape;
#[allow(deprecated)]
mod id_from_content {
@@ -690,17 +198,4 @@ more text with spaces
assert_eq!(bracket_escape("'"), "'");
assert_eq!(bracket_escape("\\"), "\\");
}
#[test]
fn escaped_special() {
assert_eq!(special_escape(""), "");
assert_eq!(special_escape("<"), "&lt;");
assert_eq!(special_escape(">"), "&gt;");
assert_eq!(special_escape("<>"), "&lt;&gt;");
assert_eq!(special_escape("<test>"), "&lt;test&gt;");
assert_eq!(special_escape("a<test>b"), "a&lt;test&gt;b");
assert_eq!(special_escape("'"), "&#39;");
assert_eq!(special_escape("\\"), "&#92;");
assert_eq!(special_escape("&"), "&amp;");
}
}

View File

@@ -0,0 +1,16 @@
[package]
name = "mdbook-markdown"
version = "0.5.0-alpha.1"
description = "Markdown processing used in mdBook"
edition.workspace = true
license.workspace = true
repository.workspace = true
rust-version.workspace = true
[dependencies]
log.workspace = true
pulldown-cmark.workspace = true
regex.workspace = true
[lints]
workspace = true

View File

@@ -0,0 +1,367 @@
//! Markdown processing used in mdBook.
use pulldown_cmark::{CodeBlockKind, CowStr, Event, Options, Parser, Tag, TagEnd, html};
use regex::Regex;
use std::borrow::Cow;
use std::collections::HashMap;
use std::fmt::Write;
use std::path::Path;
use std::sync::LazyLock;
pub use pulldown_cmark;
#[cfg(test)]
mod tests;
/// Wrapper around the pulldown-cmark parser for rendering markdown to HTML.
pub fn render_markdown(text: &str, smart_punctuation: bool) -> String {
render_markdown_with_path(text, smart_punctuation, None)
}
/// Creates a new pulldown-cmark parser of the given text.
pub fn new_cmark_parser(text: &str, smart_punctuation: bool) -> Parser<'_> {
let mut opts = Options::empty();
opts.insert(Options::ENABLE_TABLES);
opts.insert(Options::ENABLE_FOOTNOTES);
opts.insert(Options::ENABLE_STRIKETHROUGH);
opts.insert(Options::ENABLE_TASKLISTS);
opts.insert(Options::ENABLE_HEADING_ATTRIBUTES);
if smart_punctuation {
opts.insert(Options::ENABLE_SMART_PUNCTUATION);
}
Parser::new_ext(text, opts)
}
/// Renders markdown to HTML.
///
/// `path` should only be set if this is being generated for the consolidated
/// print page. It should point to the page being rendered relative to the
/// root of the book.
pub fn render_markdown_with_path(
text: &str,
smart_punctuation: bool,
path: Option<&Path>,
) -> String {
let mut body = String::with_capacity(text.len() * 3 / 2);
// Based on
// https://github.com/pulldown-cmark/pulldown-cmark/blob/master/pulldown-cmark/examples/footnote-rewrite.rs
// This handling of footnotes is a two-pass process. This is done to
// support linkbacks, little arrows that allow you to jump back to the
// footnote reference. The first pass collects the footnote definitions.
// The second pass modifies those definitions to include the linkbacks,
// and inserts the definitions back into the `events` list.
// This is a map of name -> (number, count)
// `name` is the name of the footnote.
// `number` is the footnote number displayed in the output.
// `count` is the number of references to this footnote (used for multiple
// linkbacks, and checking for unused footnotes).
let mut footnote_numbers = HashMap::new();
// This is a map of name -> Vec<Event>
// `name` is the name of the footnote.
// The events list is the list of events needed to build the footnote definition.
let mut footnote_defs = HashMap::new();
// The following are used when currently processing a footnote definition.
//
// This is the name of the footnote (escaped).
let mut in_footnote_name = String::new();
// This is the list of events to build the footnote definition.
let mut in_footnote = Vec::new();
let events = new_cmark_parser(text, smart_punctuation)
.map(clean_codeblock_headers)
.map(|event| adjust_links(event, path))
.flat_map(|event| {
let (a, b) = wrap_tables(event);
a.into_iter().chain(b)
})
// Footnote rewriting must go last to ensure inner definition contents
// are processed (since they get pulled out of the initial stream).
.filter_map(|event| {
match event {
Event::Start(Tag::FootnoteDefinition(name)) => {
if !in_footnote.is_empty() {
log::warn!("internal bug: nested footnote not expected in {path:?}");
}
in_footnote_name = special_escape(&name);
None
}
Event::End(TagEnd::FootnoteDefinition) => {
let def_events = std::mem::take(&mut in_footnote);
let name = std::mem::take(&mut in_footnote_name);
if footnote_defs.contains_key(&name) {
log::warn!(
"footnote `{name}` in {} defined multiple times - \
not updating to new definition",
path.map_or_else(|| Cow::from("<unknown>"), |p| p.to_string_lossy())
);
} else {
footnote_defs.insert(name, def_events);
}
None
}
Event::FootnoteReference(name) => {
let name = special_escape(&name);
let len = footnote_numbers.len() + 1;
let (n, count) = footnote_numbers.entry(name.clone()).or_insert((len, 0));
*count += 1;
let html = Event::Html(
format!(
"<sup class=\"footnote-reference\" id=\"fr-{name}-{count}\">\
<a href=\"#footnote-{name}\">{n}</a>\
</sup>"
)
.into(),
);
if in_footnote_name.is_empty() {
Some(html)
} else {
// While inside a footnote, we need to accumulate.
in_footnote.push(html);
None
}
}
// While inside a footnote, accumulate all events into a local.
_ if !in_footnote_name.is_empty() => {
in_footnote.push(event);
None
}
_ => Some(event),
}
});
html::push_html(&mut body, events);
if !footnote_defs.is_empty() {
add_footnote_defs(
&mut body,
path,
footnote_defs.into_iter().collect(),
&footnote_numbers,
);
}
body
}
/// Adds all footnote definitions into `body`.
fn add_footnote_defs(
body: &mut String,
path: Option<&Path>,
mut defs: Vec<(String, Vec<Event<'_>>)>,
numbers: &HashMap<String, (usize, u32)>,
) {
// Remove unused.
defs.retain(|(name, _)| {
if !numbers.contains_key(name) {
log::warn!(
"footnote `{name}` in `{}` is defined but not referenced",
path.map_or_else(|| Cow::from("<unknown>"), |p| p.to_string_lossy())
);
false
} else {
true
}
});
defs.sort_by_cached_key(|(name, _)| numbers[name].0);
body.push_str(
"<hr>\n\
<ol class=\"footnote-definition\">",
);
// Insert the backrefs to the definition, and put the definitions in the output.
for (name, mut fn_events) in defs {
let count = numbers[&name].1;
fn_events.insert(
0,
Event::Html(format!("<li id=\"footnote-{name}\">").into()),
);
// Generate the linkbacks.
for usage in 1..=count {
let nth = if usage == 1 {
String::new()
} else {
usage.to_string()
};
let backlink =
Event::Html(format!(" <a href=\"#fr-{name}-{usage}\">↩{nth}</a>").into());
if matches!(fn_events.last(), Some(Event::End(TagEnd::Paragraph))) {
// Put the linkback at the end of the last paragraph instead
// of on a line by itself.
fn_events.insert(fn_events.len() - 1, backlink);
} else {
// Not a clear place to put it in this circumstance, so put it
// at the end.
fn_events.push(backlink);
}
}
fn_events.push(Event::Html("</li>\n".into()));
html::push_html(body, fn_events.into_iter());
}
body.push_str("</ol>");
}
/// Wraps tables in a `.table-wrapper` class to apply overflow-x rules to.
fn wrap_tables(event: Event<'_>) -> (Option<Event<'_>>, Option<Event<'_>>) {
match event {
Event::Start(Tag::Table(_)) => (
Some(Event::Html(r#"<div class="table-wrapper">"#.into())),
Some(event),
),
Event::End(TagEnd::Table) => (Some(event), Some(Event::Html(r#"</div>"#.into()))),
_ => (Some(event), None),
}
}
fn clean_codeblock_headers(event: Event<'_>) -> Event<'_> {
match event {
Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(ref info))) => {
let info: String = info
.chars()
.map(|x| match x {
' ' | '\t' => ',',
_ => x,
})
.filter(|ch| !ch.is_whitespace())
.collect();
Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(CowStr::from(info))))
}
_ => event,
}
}
/// Fix links to the correct location.
///
/// This adjusts links, such as turning `.md` extensions to `.html`.
///
/// `path` is the path to the page being rendered relative to the root of the
/// book. This is used for the `print.html` page so that links on the print
/// page go to the original location. Normal page rendering sets `path` to
/// None. Ideally, print page links would link to anchors on the print page,
/// but that is very difficult.
fn adjust_links<'a>(event: Event<'a>, path: Option<&Path>) -> Event<'a> {
static SCHEME_LINK: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r"^[a-z][a-z0-9+.-]*:").unwrap());
static MD_LINK: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r"(?P<link>.*)\.md(?P<anchor>#.*)?").unwrap());
fn fix<'a>(dest: CowStr<'a>, path: Option<&Path>) -> CowStr<'a> {
if dest.starts_with('#') {
// Fragment-only link.
if let Some(path) = path {
let mut base = path.display().to_string();
if base.ends_with(".md") {
base.replace_range(base.len() - 3.., ".html");
}
return format!("{base}{dest}").into();
} else {
return dest;
}
}
// Don't modify links with schemes like `https`.
if !SCHEME_LINK.is_match(&dest) {
// This is a relative link, adjust it as necessary.
let mut fixed_link = String::new();
if let Some(path) = path {
let base = path
.parent()
.expect("path can't be empty")
.to_str()
.expect("utf-8 paths only");
if !base.is_empty() {
write!(fixed_link, "{base}/").unwrap();
}
}
if let Some(caps) = MD_LINK.captures(&dest) {
fixed_link.push_str(&caps["link"]);
fixed_link.push_str(".html");
if let Some(anchor) = caps.name("anchor") {
fixed_link.push_str(anchor.as_str());
}
} else {
fixed_link.push_str(&dest);
};
return CowStr::from(fixed_link);
}
dest
}
fn fix_html<'a>(html: CowStr<'a>, path: Option<&Path>) -> CowStr<'a> {
// This is a terrible hack, but should be reasonably reliable. Nobody
// should ever parse a tag with a regex. However, there isn't anything
// in Rust that I know of that is suitable for handling partial html
// fragments like those generated by pulldown_cmark.
//
// There are dozens of HTML tags/attributes that contain paths, so
// feel free to add more tags if desired; these are the only ones I
// care about right now.
static HTML_LINK: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r#"(<(?:a|img) [^>]*?(?:src|href)=")([^"]+?)""#).unwrap());
HTML_LINK
.replace_all(&html, |caps: &regex::Captures<'_>| {
let fixed = fix(caps[2].into(), path);
format!("{}{}\"", &caps[1], fixed)
})
.into_owned()
.into()
}
match event {
Event::Start(Tag::Link {
link_type,
dest_url,
title,
id,
}) => Event::Start(Tag::Link {
link_type,
dest_url: fix(dest_url, path),
title,
id,
}),
Event::Start(Tag::Image {
link_type,
dest_url,
title,
id,
}) => Event::Start(Tag::Image {
link_type,
dest_url: fix(dest_url, path),
title,
id,
}),
Event::Html(html) => Event::Html(fix_html(html, path)),
Event::InlineHtml(html) => Event::InlineHtml(fix_html(html, path)),
_ => event,
}
}
/// Escape characters to make it safe for an HTML string.
pub fn special_escape(mut s: &str) -> String {
let mut escaped = String::with_capacity(s.len());
let needs_escape: &[char] = &['<', '>', '\'', '"', '\\', '&'];
while let Some(next) = s.find(needs_escape) {
escaped.push_str(&s[..next]);
match s.as_bytes()[next] {
b'<' => escaped.push_str("&lt;"),
b'>' => escaped.push_str("&gt;"),
b'\'' => escaped.push_str("&#39;"),
b'"' => escaped.push_str("&quot;"),
b'\\' => escaped.push_str("&#92;"),
b'&' => escaped.push_str("&amp;"),
_ => unreachable!(),
}
s = &s[next + 1..];
}
escaped.push_str(s);
escaped
}

View File

@@ -0,0 +1,147 @@
use super::render_markdown;
use super::*;
#[test]
fn escaped_special() {
assert_eq!(special_escape(""), "");
assert_eq!(special_escape("<"), "&lt;");
assert_eq!(special_escape(">"), "&gt;");
assert_eq!(special_escape("<>"), "&lt;&gt;");
assert_eq!(special_escape("<test>"), "&lt;test&gt;");
assert_eq!(special_escape("a<test>b"), "a&lt;test&gt;b");
assert_eq!(special_escape("'"), "&#39;");
assert_eq!(special_escape("\\"), "&#92;");
assert_eq!(special_escape("&"), "&amp;");
}
#[test]
fn preserves_external_links() {
assert_eq!(
render_markdown("[example](https://www.rust-lang.org/)", false),
"<p><a href=\"https://www.rust-lang.org/\">example</a></p>\n"
);
}
#[test]
fn it_can_adjust_markdown_links() {
assert_eq!(
render_markdown("[example](example.md)", false),
"<p><a href=\"example.html\">example</a></p>\n"
);
assert_eq!(
render_markdown("[example_anchor](example.md#anchor)", false),
"<p><a href=\"example.html#anchor\">example_anchor</a></p>\n"
);
// this anchor contains 'md' inside of it
assert_eq!(
render_markdown("[phantom data](foo.html#phantomdata)", false),
"<p><a href=\"foo.html#phantomdata\">phantom data</a></p>\n"
);
}
#[test]
fn it_can_wrap_tables() {
let src = r#"
| Original | Punycode | Punycode + Encoding |
|-----------------|-----------------|---------------------|
| føø | f-5gaa | f_5gaa |
"#;
let out = r#"
<div class="table-wrapper"><table><thead><tr><th>Original</th><th>Punycode</th><th>Punycode + Encoding</th></tr></thead><tbody>
<tr><td>føø</td><td>f-5gaa</td><td>f_5gaa</td></tr>
</tbody></table>
</div>
"#.trim();
assert_eq!(render_markdown(src, false), out);
}
#[test]
fn it_can_keep_quotes_straight() {
assert_eq!(render_markdown("'one'", false), "<p>'one'</p>\n");
}
#[test]
fn it_can_make_quotes_curly_except_when_they_are_in_code() {
let input = r#"
'one'
```
'two'
```
`'three'` 'four'"#;
let expected = r#"<p>one</p>
<pre><code>'two'
</code></pre>
<p><code>'three'</code> four</p>
"#;
assert_eq!(render_markdown(input, true), expected);
}
#[test]
fn whitespace_outside_of_codeblock_header_is_preserved() {
let input = r#"
some text with spaces
```rust
fn main() {
// code inside is unchanged
}
```
more text with spaces
"#;
let expected = r#"<p>some text with spaces</p>
<pre><code class="language-rust">fn main() {
// code inside is unchanged
}
</code></pre>
<p>more text with spaces</p>
"#;
assert_eq!(render_markdown(input, false), expected);
assert_eq!(render_markdown(input, true), expected);
}
#[test]
fn rust_code_block_properties_are_passed_as_space_delimited_class() {
let input = r#"
```rust,no_run,should_panic,property_3
```
"#;
let expected = r#"<pre><code class="language-rust,no_run,should_panic,property_3"></code></pre>
"#;
assert_eq!(render_markdown(input, false), expected);
assert_eq!(render_markdown(input, true), expected);
}
#[test]
fn rust_code_block_properties_with_whitespace_are_passed_as_space_delimited_class() {
let input = r#"
```rust, no_run,,,should_panic , ,property_3
```
"#;
let expected = r#"<pre><code class="language-rust,,,,,no_run,,,should_panic,,,,property_3"></code></pre>
"#;
assert_eq!(render_markdown(input, false), expected);
assert_eq!(render_markdown(input, true), expected);
}
#[test]
fn rust_code_block_without_properties_has_proper_html_class() {
let input = r#"
```rust
```
"#;
let expected = r#"<pre><code class="language-rust"></code></pre>
"#;
assert_eq!(render_markdown(input, false), expected);
assert_eq!(render_markdown(input, true), expected);
let input = r#"
```rust
```
"#;
assert_eq!(render_markdown(input, false), expected);
assert_eq!(render_markdown(input, true), expected);
}

View File

@@ -17,6 +17,8 @@ use log::{debug, info, trace, warn};
use mdbook_core::config::{BookConfig, Code, Config, HtmlConfig, Playground, RustEdition};
use mdbook_core::utils;
use mdbook_core::utils::fs::get_404_output_file;
use mdbook_markdown::{render_markdown, render_markdown_with_path};
use regex::{Captures, Regex};
use serde_json::json;
@@ -57,13 +59,10 @@ impl HtmlHandlebars {
.insert("git_repository_edit_url".to_owned(), json!(edit_url));
}
let content = utils::render_markdown(&ch.content, ctx.html_config.smart_punctuation());
let content = render_markdown(&ch.content, ctx.html_config.smart_punctuation());
let fixed_content = utils::render_markdown_with_path(
&ch.content,
ctx.html_config.smart_punctuation(),
Some(path),
);
let fixed_content =
render_markdown_with_path(&ch.content, ctx.html_config.smart_punctuation(), Some(path));
if !ctx.is_index && ctx.html_config.print.page_break {
// Add page break between chapters
// See https://developer.mozilla.org/en-US/docs/Web/CSS/break-before and https://developer.mozilla.org/en-US/docs/Web/CSS/page-break-before
@@ -178,8 +177,7 @@ impl HtmlHandlebars {
.to_string()
}
};
let html_content_404 =
utils::render_markdown(&content_404, html_config.smart_punctuation());
let html_content_404 = render_markdown(&content_404, html_config.smart_punctuation());
let mut data_404 = data.clone();
let base_url = if let Some(site_url) = &html_config.site_url {

View File

@@ -1,11 +1,10 @@
use std::path::Path;
use std::{cmp::Ordering, collections::BTreeMap};
use mdbook_core::utils::special_escape;
use handlebars::{
Context, Handlebars, Helper, HelperDef, Output, RenderContext, RenderError, RenderErrorReason,
};
use mdbook_markdown::special_escape;
// Handlebars helper to construct TOC
#[derive(Clone, Copy)]

View File

@@ -8,6 +8,7 @@ use elasticlunr::{Index, IndexBuilder};
use log::{debug, warn};
use mdbook_core::config::{Search, SearchChapterSettings};
use mdbook_core::utils;
use mdbook_markdown::new_cmark_parser;
use pulldown_cmark::*;
use serde::Serialize;
@@ -134,7 +135,7 @@ fn render_item(
.with_context(|| "Could not convert HTML path to str")?;
let anchor_base = utils::fs::normalize_path(filepath);
let mut p = utils::new_cmark_parser(&chapter.content, false).peekable();
let mut p = new_cmark_parser(&chapter.content, false).peekable();
let mut in_heading = false;
let max_section_depth = u32::from(search_config.heading_split_level);

View File

@@ -22,10 +22,10 @@ fn footnotes() {
cmd.expect_stderr(str![[r#"
[TIMESTAMP] [INFO] (mdbook::book): Book building has started
[TIMESTAMP] [INFO] (mdbook::book): Running the html backend
[TIMESTAMP] [WARN] (mdbook_core::utils): footnote `multiple-definitions` in <unknown> defined multiple times - not updating to new definition
[TIMESTAMP] [WARN] (mdbook_core::utils): footnote `unused` in `<unknown>` is defined but not referenced
[TIMESTAMP] [WARN] (mdbook_core::utils): footnote `multiple-definitions` in footnotes.md defined multiple times - not updating to new definition
[TIMESTAMP] [WARN] (mdbook_core::utils): footnote `unused` in `footnotes.md` is defined but not referenced
[TIMESTAMP] [WARN] (mdbook_markdown): footnote `multiple-definitions` in <unknown> defined multiple times - not updating to new definition
[TIMESTAMP] [WARN] (mdbook_markdown): footnote `unused` in `<unknown>` is defined but not referenced
[TIMESTAMP] [WARN] (mdbook_markdown): footnote `multiple-definitions` in footnotes.md defined multiple times - not updating to new definition
[TIMESTAMP] [WARN] (mdbook_markdown): footnote `unused` in `footnotes.md` is defined but not referenced
[TIMESTAMP] [INFO] (mdbook::renderer::html_handlebars::hbs_renderer): HTML book written to `[ROOT]/book`
"#]]);