Merge pull request #2844 from ehuss/html-tokenize

Add a new HTML rendering pipeline
This commit is contained in:
Eric Huss
2025-09-17 03:36:21 +00:00
committed by GitHub
43 changed files with 2304 additions and 2061 deletions

View File

@@ -40,7 +40,7 @@ jobs:
- name: msrv
os: ubuntu-22.04
# sync MSRV with docs: guide/src/guide/installation.md and Cargo.toml
rust: 1.85.0
rust: 1.88.0
target: x86_64-unknown-linux-gnu
name: ${{ matrix.name }}
steps:

365
Cargo.lock generated
View File

@@ -26,19 +26,6 @@ dependencies = [
"memchr",
]
[[package]]
name = "ammonia"
version = "4.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d6b346764dd0814805de8abf899fe03065bcee69bb1a4771c785817e39f3978f"
dependencies = [
"cssparser",
"html5ever 0.35.0",
"maplit",
"tendril",
"url",
]
[[package]]
name = "anstream"
version = "0.6.19"
@@ -356,29 +343,6 @@ dependencies = [
"typenum",
]
[[package]]
name = "cssparser"
version = "0.35.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4e901edd733a1472f944a45116df3f846f54d37e67e68640ac8bb69689aca2aa"
dependencies = [
"cssparser-macros",
"dtoa-short",
"itoa",
"phf 0.11.3",
"smallvec",
]
[[package]]
name = "cssparser-macros"
version = "0.6.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "13b588ba4ac1a99f7f2964d24b3d896ddc6bf847ee3855dbd4366f058cfcd331"
dependencies = [
"quote",
"syn 2.0.104",
]
[[package]]
name = "darling"
version = "0.20.11"
@@ -451,12 +415,6 @@ dependencies = [
"syn 2.0.104",
]
[[package]]
name = "diff"
version = "0.1.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "56254986775e3233ffa9c4d7d3faaf6d36a2c09d30b20687e9f88bc8bafc16c8"
[[package]]
name = "digest"
version = "0.10.7"
@@ -467,38 +425,18 @@ dependencies = [
"crypto-common",
]
[[package]]
name = "displaydoc"
version = "0.2.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.104",
]
[[package]]
name = "dtoa"
version = "1.0.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d6add3b8cff394282be81f3fc1a0605db594ed69890078ca6e2cab1c408bcf04"
[[package]]
name = "dtoa-short"
version = "0.3.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cd1511a7b6a56299bd043a9c167a6d2bfb37bf84a6dfceaba651168adfb43c87"
dependencies = [
"dtoa",
]
[[package]]
name = "dunce"
version = "1.0.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "92773504d58c093f6de2459af4af33faa518c13451eb8f2b5698ed3d36e7c813"
[[package]]
name = "ego-tree"
version = "0.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b2972feb8dffe7bc8c5463b1dacda1b0dfbed3710e50f977d965429692d74cd8"
[[package]]
name = "elasticlunr-rs"
version = "3.0.2"
@@ -855,119 +793,12 @@ dependencies = [
"tower-service",
]
[[package]]
name = "icu_collections"
version = "2.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "200072f5d0e3614556f94a9930d5dc3e0662a652823904c3a75dc3b0af7fee47"
dependencies = [
"displaydoc",
"potential_utf",
"yoke",
"zerofrom",
"zerovec",
]
[[package]]
name = "icu_locale_core"
version = "2.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0cde2700ccaed3872079a65fb1a78f6c0a36c91570f28755dda67bc8f7d9f00a"
dependencies = [
"displaydoc",
"litemap",
"tinystr",
"writeable",
"zerovec",
]
[[package]]
name = "icu_normalizer"
version = "2.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "436880e8e18df4d7bbc06d58432329d6458cc84531f7ac5f024e93deadb37979"
dependencies = [
"displaydoc",
"icu_collections",
"icu_normalizer_data",
"icu_properties",
"icu_provider",
"smallvec",
"zerovec",
]
[[package]]
name = "icu_normalizer_data"
version = "2.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "00210d6893afc98edb752b664b8890f0ef174c8adbb8d0be9710fa66fbbf72d3"
[[package]]
name = "icu_properties"
version = "2.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "016c619c1eeb94efb86809b015c58f479963de65bdb6253345c1a1276f22e32b"
dependencies = [
"displaydoc",
"icu_collections",
"icu_locale_core",
"icu_properties_data",
"icu_provider",
"potential_utf",
"zerotrie",
"zerovec",
]
[[package]]
name = "icu_properties_data"
version = "2.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "298459143998310acd25ffe6810ed544932242d3f07083eee1084d83a71bd632"
[[package]]
name = "icu_provider"
version = "2.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "03c80da27b5f4187909049ee2d72f276f0d9f99a42c306bd0131ecfe04d8e5af"
dependencies = [
"displaydoc",
"icu_locale_core",
"stable_deref_trait",
"tinystr",
"writeable",
"yoke",
"zerofrom",
"zerotrie",
"zerovec",
]
[[package]]
name = "ident_case"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39"
[[package]]
name = "idna"
version = "1.0.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "686f825264d630750a544639377bae737628043f20d38bbc029e8f29ea968a7e"
dependencies = [
"idna_adapter",
"smallvec",
"utf8_iter",
]
[[package]]
name = "idna_adapter"
version = "1.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3acae9609540aa318d1bc588455225fb2085b9ed0c4f6bd0d9d5bcd86f1a0344"
dependencies = [
"icu_normalizer",
"icu_properties",
]
[[package]]
name = "ignore"
version = "0.4.23"
@@ -1086,12 +917,6 @@ version = "0.9.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cd945864f07fe9f5371a27ad7b52a172b4b499999f1d97574c9fa68373937e12"
[[package]]
name = "litemap"
version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "241eaef5fd12c88705a01fc1066c48c4b36e0dd4377dcdc7ec3942cea7a69956"
[[package]]
name = "lock_api"
version = "0.4.13"
@@ -1114,12 +939,6 @@ version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c41e0c4fef86961ac6d6f8a82609f55f31b05e4fce149ac5710e439df7619ba4"
[[package]]
name = "maplit"
version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3e2e65a1a2e43cfcb47a895c4c8b10d1f4a61097f9f254f183aee60cad9c651d"
[[package]]
name = "markup5ever"
version = "0.11.0"
@@ -1262,16 +1081,17 @@ dependencies = [
name = "mdbook-html"
version = "0.5.0-alpha.1"
dependencies = [
"ammonia",
"anyhow",
"ego-tree",
"elasticlunr-rs",
"font-awesome-as-a-crate",
"handlebars",
"hex",
"html5ever 0.35.0",
"indexmap",
"mdbook-core",
"mdbook-markdown",
"mdbook-renderer",
"pretty_assertions",
"pulldown-cmark",
"regex",
"serde",
@@ -1583,7 +1403,6 @@ version = "0.11.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1fd6780a80ae0c52cc120a26a1a42c1ae51b247a253e4e06113d23d2c2edd078"
dependencies = [
"phf_macros",
"phf_shared 0.11.3",
]
@@ -1627,19 +1446,6 @@ dependencies = [
"rand 0.8.5",
]
[[package]]
name = "phf_macros"
version = "0.11.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f84ac04429c13a7ff43785d75ad27569f2951ce0ffd30a3321230db2fc727216"
dependencies = [
"phf_generator 0.11.3",
"phf_shared 0.11.3",
"proc-macro2",
"quote",
"syn 2.0.104",
]
[[package]]
name = "phf_shared"
version = "0.10.0"
@@ -1670,15 +1476,6 @@ version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184"
[[package]]
name = "potential_utf"
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e5a7c30837279ca13e7c867e9e40053bc68740f988cb07f7ca6df43cc734b585"
dependencies = [
"zerovec",
]
[[package]]
name = "ppv-lite86"
version = "0.2.21"
@@ -1694,16 +1491,6 @@ version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c"
[[package]]
name = "pretty_assertions"
version = "1.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3ae130e2f271fbc2ac3a40fb1d07180839cdbbe443c7a27e1e3c13c5cac0116d"
dependencies = [
"diff",
"yansi",
]
[[package]]
name = "proc-macro2"
version = "1.0.95"
@@ -2086,12 +1873,6 @@ dependencies = [
"windows-sys 0.52.0",
]
[[package]]
name = "stable_deref_trait"
version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3"
[[package]]
name = "string_cache"
version = "0.8.9"
@@ -2151,17 +1932,6 @@ version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0bf256ce5efdfa370213c1dabab5935a12e49f2c58d15e9eac2870d3b4f27263"
[[package]]
name = "synstructure"
version = "0.13.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.104",
]
[[package]]
name = "tempfile"
version = "3.20.0"
@@ -2225,16 +1995,6 @@ dependencies = [
"cfg-if",
]
[[package]]
name = "tinystr"
version = "0.8.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5d4f6d1145dcb577acf783d4e601bc1d76a13337bb54e6233add580b07344c8b"
dependencies = [
"displaydoc",
"zerovec",
]
[[package]]
name = "tokio"
version = "1.46.1"
@@ -2497,17 +2257,6 @@ version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4a1a07cc7db3810833284e8d372ccdc6da29741639ecc70c9ec107df0fa6154c"
[[package]]
name = "url"
version = "2.5.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "32f8b686cadd1473f4bd0117a5d28d36b1ade384ea9b5069a1c40aefed7fda60"
dependencies = [
"form_urlencoded",
"idna",
"percent-encoding",
]
[[package]]
name = "utf-8"
version = "0.7.6"
@@ -2520,12 +2269,6 @@ version = "0.1.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "86bd8d4e895da8537e5315b8254664e6b769c4ff3db18321b297a1e7004392e3"
[[package]]
name = "utf8_iter"
version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be"
[[package]]
name = "utf8parse"
version = "0.2.2"
@@ -2760,12 +2503,6 @@ dependencies = [
"bitflags 2.9.1",
]
[[package]]
name = "writeable"
version = "0.6.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ea2f10b9bb0928dfb1b42b65e1f9e36f7f54dbdf08457afefb38afcdec4fa2bb"
[[package]]
name = "xml5ever"
version = "0.17.0"
@@ -2777,36 +2514,6 @@ dependencies = [
"markup5ever 0.11.0",
]
[[package]]
name = "yansi"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cfe53a6657fd280eaa890a3bc59152892ffa3e30101319d168b781ed6529b049"
[[package]]
name = "yoke"
version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5f41bb01b8226ef4bfd589436a297c53d118f65921786300e427be8d487695cc"
dependencies = [
"serde",
"stable_deref_trait",
"yoke-derive",
"zerofrom",
]
[[package]]
name = "yoke-derive"
version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "38da3c9736e16c5d3c8c597a9aaa5d1fa565d0532ae05e27c24aa62fb32c0ab6"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.104",
"synstructure",
]
[[package]]
name = "zerocopy"
version = "0.8.26"
@@ -2826,57 +2533,3 @@ dependencies = [
"quote",
"syn 2.0.104",
]
[[package]]
name = "zerofrom"
version = "0.1.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "50cc42e0333e05660c3587f3bf9d0478688e15d870fab3346451ce7f8c9fbea5"
dependencies = [
"zerofrom-derive",
]
[[package]]
name = "zerofrom-derive"
version = "0.1.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d71e5d6e06ab090c67b5e44993ec16b72dcbaabc526db883a360057678b48502"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.104",
"synstructure",
]
[[package]]
name = "zerotrie"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "36f0bbd478583f79edad978b407914f61b2972f5af6fa089686016be8f9af595"
dependencies = [
"displaydoc",
"yoke",
"zerofrom",
]
[[package]]
name = "zerovec"
version = "0.11.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4a05eb080e015ba39cc9e23bbe5e7fb04d5fb040350f99f34e338d5fdd294428"
dependencies = [
"yoke",
"zerofrom",
"zerovec-derive",
]
[[package]]
name = "zerovec-derive"
version = "0.11.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5b96237efa0c878c64bd89c436f661be4e46b2f3eff1ebb976f7ef2321d2f58f"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.104",
]

View File

@@ -22,20 +22,21 @@ unreachable_pub = "warn"
edition = "2024"
license = "MPL-2.0"
repository = "https://github.com/rust-lang/mdBook"
rust-version = "1.85.0" # Keep in sync with installation.md and .github/workflows/main.yml
rust-version = "1.88.0" # Keep in sync with installation.md and .github/workflows/main.yml
[workspace.dependencies]
ammonia = "4.1.1"
anyhow = "1.0.98"
axum = "0.8.4"
clap = { version = "4.5.41", features = ["cargo", "wrap_help"] }
clap_complete = "4.5.55"
ego-tree = "0.10.0"
elasticlunr-rs = "3.0.2"
font-awesome-as-a-crate = "0.3.0"
futures-util = "0.3.31"
glob = "0.3.3"
handlebars = "6.3.2"
hex = "0.4.3"
html5ever = "0.35.0"
indexmap = "2.10.0"
ignore = "0.4.23"
mdbook-core = { path = "crates/mdbook-core" }
@@ -50,7 +51,6 @@ notify = "8.1.0"
notify-debouncer-mini = "0.6.0"
opener = "0.8.2"
pathdiff = "0.2.3"
pretty_assertions = "1.4.1"
pulldown-cmark = { version = "0.13.0", default-features = false, features = ["html"] } # Do not update, part of the public api.
regex = "1.11.1"
select = "0.6.1"

View File

@@ -1135,4 +1135,11 @@ mod tests {
assert!(html_config.print.enable);
assert!(!html_config.print.page_break);
}
#[test]
fn test_json_direction() {
use serde_json::json;
assert_eq!(json!(TextDirection::RightToLeft), json!("rtl"));
assert_eq!(json!(TextDirection::LeftToRight), json!("ltr"));
}
}

View File

@@ -0,0 +1,78 @@
//! Utilities for dealing with HTML.
use std::borrow::Cow;
/// Escape characters to make it safe for an HTML string.
pub fn escape_html_attribute(text: &str) -> Cow<'_, str> {
let needs_escape: &[char] = &['<', '>', '\'', '"', '\\', '&'];
let mut s = text;
let mut output = String::new();
while let Some(next) = s.find(needs_escape) {
output.push_str(&s[..next]);
match s.as_bytes()[next] {
b'<' => output.push_str("&lt;"),
b'>' => output.push_str("&gt;"),
b'\'' => output.push_str("&#39;"),
b'"' => output.push_str("&quot;"),
b'\\' => output.push_str("&#92;"),
b'&' => output.push_str("&amp;"),
_ => unreachable!(),
}
s = &s[next + 1..];
}
if output.is_empty() {
Cow::Borrowed(text)
} else {
output.push_str(s);
Cow::Owned(output)
}
}
/// Escape `<`, `>`, and '&' for HTML.
pub fn escape_html(text: &str) -> Cow<'_, str> {
let needs_escape: &[char] = &['<', '>', '&'];
let mut s = text;
let mut output = String::new();
while let Some(next) = s.find(needs_escape) {
output.push_str(&s[..next]);
match s.as_bytes()[next] {
b'<' => output.push_str("&lt;"),
b'>' => output.push_str("&gt;"),
b'&' => output.push_str("&amp;"),
_ => unreachable!(),
}
s = &s[next + 1..];
}
if output.is_empty() {
Cow::Borrowed(text)
} else {
output.push_str(s);
Cow::Owned(output)
}
}
#[test]
fn attributes_are_escaped() {
assert_eq!(escape_html_attribute(""), "");
assert_eq!(escape_html_attribute("<"), "&lt;");
assert_eq!(escape_html_attribute(">"), "&gt;");
assert_eq!(escape_html_attribute("<>"), "&lt;&gt;");
assert_eq!(escape_html_attribute("<test>"), "&lt;test&gt;");
assert_eq!(escape_html_attribute("a<test>b"), "a&lt;test&gt;b");
assert_eq!(escape_html_attribute("'"), "&#39;");
assert_eq!(escape_html_attribute("\\"), "&#92;");
assert_eq!(escape_html_attribute("&"), "&amp;");
}
#[test]
fn html_is_escaped() {
assert_eq!(escape_html(""), "");
assert_eq!(escape_html("<"), "&lt;");
assert_eq!(escape_html(">"), "&gt;");
assert_eq!(escape_html("&"), "&amp;");
assert_eq!(escape_html("<>"), "&lt;&gt;");
assert_eq!(escape_html("<test>"), "&lt;test&gt;");
assert_eq!(escape_html("a<test>b"), "a&lt;test&gt;b");
assert_eq!(escape_html("'"), "'");
assert_eq!(escape_html("\\"), "\\");
}

View File

@@ -1,17 +1,17 @@
//! Various helpers and utilities.
use anyhow::Error;
use std::borrow::Cow;
use std::collections::HashMap;
use std::fmt::Write;
use tracing::error;
pub mod fs;
mod html;
mod string;
mod toml_ext;
pub(crate) use self::toml_ext::TomlExt;
pub use self::html::{escape_html, escape_html_attribute};
pub use self::string::{
take_anchored_lines, take_lines, take_rustdoc_include_anchored_lines,
take_rustdoc_include_lines,
@@ -30,65 +30,6 @@ macro_rules! static_regex {
};
}
/// Replaces multiple consecutive whitespace characters with a single space character.
pub fn collapse_whitespace(text: &str) -> Cow<'_, str> {
static_regex!(WS, r"\s\s+");
WS.replace_all(text, " ")
}
/// Convert the given string to a valid HTML element ID.
/// The only restriction is that the ID must not contain any ASCII whitespace.
pub fn normalize_id(content: &str) -> String {
content
.chars()
.filter_map(|ch| {
if ch.is_alphanumeric() || ch == '_' || ch == '-' {
Some(ch.to_ascii_lowercase())
} else if ch.is_whitespace() {
Some('-')
} else {
None
}
})
.collect::<String>()
}
/// Generate an ID for use with anchors which is derived from a "normalised"
/// string.
fn id_from_content(content: &str) -> String {
let mut content = content.to_string();
// Skip any tags or html-encoded stuff
static_regex!(HTML, r"(<.*?>)");
content = HTML.replace_all(&content, "").into();
const REPL_SUB: &[&str] = &["&lt;", "&gt;", "&amp;", "&#39;", "&quot;"];
for sub in REPL_SUB {
content = content.replace(sub, "");
}
// Remove spaces and hashes indicating a header
let trimmed = content.trim().trim_start_matches('#').trim();
normalize_id(trimmed)
}
/// Generate an ID for use with anchors which is derived from a "normalised"
/// string.
///
/// Each ID returned will be unique, if the same `id_counter` is provided on
/// each call.
pub fn unique_id_from_content(content: &str, id_counter: &mut HashMap<String, usize>) -> String {
let id = id_from_content(content);
// If we have headers with the same normalized id, append an incrementing counter
let id_count = id_counter.entry(id.clone()).or_insert(0);
let unique_id = match *id_count {
0 => id,
id_count => format!("{id}-{id_count}"),
};
*id_count += 1;
unique_id
}
/// Prints a "backtrace" of some `Error`.
pub fn log_backtrace(e: &Error) {
let mut message = format!("{e}");
@@ -99,114 +40,3 @@ pub fn log_backtrace(e: &Error) {
error!("{message}");
}
/// Escape `<` and `>` for HTML.
pub fn bracket_escape(mut s: &str) -> String {
let mut escaped = String::with_capacity(s.len());
let needs_escape: &[char] = &['<', '>'];
while let Some(next) = s.find(needs_escape) {
escaped.push_str(&s[..next]);
match s.as_bytes()[next] {
b'<' => escaped.push_str("&lt;"),
b'>' => escaped.push_str("&gt;"),
_ => unreachable!(),
}
s = &s[next + 1..];
}
escaped.push_str(s);
escaped
}
#[cfg(test)]
mod tests {
use super::bracket_escape;
#[allow(deprecated)]
mod id_from_content {
use super::super::id_from_content;
#[test]
fn it_generates_anchors() {
assert_eq!(
id_from_content("## Method-call expressions"),
"method-call-expressions"
);
assert_eq!(id_from_content("## **Bold** title"), "bold-title");
assert_eq!(id_from_content("## `Code` title"), "code-title");
assert_eq!(
id_from_content("## title <span dir=rtl>foo</span>"),
"title-foo"
);
}
#[test]
fn it_generates_anchors_from_non_ascii_initial() {
assert_eq!(
id_from_content("## `--passes`: add more rustdoc passes"),
"--passes-add-more-rustdoc-passes"
);
assert_eq!(
id_from_content("## 中文標題 CJK title"),
"中文標題-cjk-title"
);
assert_eq!(id_from_content("## Über"), "Über");
}
}
mod html_munging {
use super::super::{normalize_id, unique_id_from_content};
#[test]
fn it_normalizes_ids() {
assert_eq!(
normalize_id("`--passes`: add more rustdoc passes"),
"--passes-add-more-rustdoc-passes"
);
assert_eq!(
normalize_id("Method-call 🐙 expressions \u{1f47c}"),
"method-call--expressions-"
);
assert_eq!(normalize_id("_-_12345"), "_-_12345");
assert_eq!(normalize_id("12345"), "12345");
assert_eq!(normalize_id("中文"), "中文");
assert_eq!(normalize_id("にほんご"), "にほんご");
assert_eq!(normalize_id("한국어"), "한국어");
assert_eq!(normalize_id(""), "");
}
#[test]
fn it_generates_unique_ids_from_content() {
// Same id if not given shared state
assert_eq!(
unique_id_from_content("## 中文標題 CJK title", &mut Default::default()),
"中文標題-cjk-title"
);
assert_eq!(
unique_id_from_content("## 中文標題 CJK title", &mut Default::default()),
"中文標題-cjk-title"
);
// Different id if given shared state
let mut id_counter = Default::default();
assert_eq!(unique_id_from_content("## Über", &mut id_counter), "Über");
assert_eq!(
unique_id_from_content("## 中文標題 CJK title", &mut id_counter),
"中文標題-cjk-title"
);
assert_eq!(unique_id_from_content("## Über", &mut id_counter), "Über-1");
assert_eq!(unique_id_from_content("## Über", &mut id_counter), "Über-2");
}
}
#[test]
fn escaped_brackets() {
assert_eq!(bracket_escape(""), "");
assert_eq!(bracket_escape("<"), "&lt;");
assert_eq!(bracket_escape(">"), "&gt;");
assert_eq!(bracket_escape("<>"), "&lt;&gt;");
assert_eq!(bracket_escape("<test>"), "&lt;test&gt;");
assert_eq!(bracket_escape("a<test>b"), "a&lt;test&gt;b");
assert_eq!(bracket_escape("'"), "'");
assert_eq!(bracket_escape("\\"), "\\");
}
}

View File

@@ -1,7 +1,7 @@
use anyhow::{Context, Result};
use mdbook_core::book::{Book, BookItem, Chapter};
use mdbook_core::config::BuildConfig;
use mdbook_core::utils::bracket_escape;
use mdbook_core::utils::escape_html;
use mdbook_summary::{Link, Summary, SummaryItem, parse_summary};
use std::fs::{self, File};
use std::io::{Read, Write};
@@ -51,7 +51,8 @@ fn create_missing(src_dir: &Path, summary: &Summary) -> Result<()> {
let mut f = File::create(&filename).with_context(|| {
format!("Unable to create missing file: {}", filename.display())
})?;
writeln!(f, "# {}", bracket_escape(&link.name))?;
let title = escape_html(&link.name);
writeln!(f, "# {title}")?;
}
}

View File

@@ -8,12 +8,14 @@ repository.workspace = true
rust-version.workspace = true
[dependencies]
ammonia = { workspace = true, optional = true }
anyhow.workspace = true
ego-tree.workspace = true
elasticlunr-rs = { workspace = true, optional = true }
font-awesome-as-a-crate.workspace = true
handlebars.workspace = true
hex.workspace = true
html5ever.workspace = true
indexmap.workspace = true
mdbook-core.workspace = true
mdbook-markdown.workspace = true
mdbook-renderer.workspace = true
@@ -25,7 +27,6 @@ sha2.workspace = true
tracing.workspace = true
[dev-dependencies]
pretty_assertions.workspace = true
tempfile.workspace = true
toml.workspace = true
@@ -33,4 +34,4 @@ toml.workspace = true
workspace = true
[features]
search = ["dep:ammonia", "dep:elasticlunr-rs"]
search = ["dep:elasticlunr-rs"]

View File

@@ -0,0 +1,182 @@
//! Support for hiding code lines.
use crate::html::{Element, Node};
use ego_tree::{NodeId, Tree};
use html5ever::tendril::StrTendril;
use mdbook_core::static_regex;
use std::collections::HashMap;
/// Wraps hidden lines in a `<span>` for the given code block.
pub(crate) fn hide_lines(
tree: &mut Tree<Node>,
code_id: NodeId,
hidelines: &HashMap<String, String>,
) {
let mut node = tree.get_mut(code_id).unwrap();
let el = node.value().as_element().unwrap();
let classes: Vec<_> = el.attr("class").unwrap_or_default().split(' ').collect();
let language = classes
.iter()
.filter_map(|cls| cls.strip_prefix("language-"))
.next()
.unwrap_or_default()
.to_string();
let hideline_info = classes
.iter()
.filter_map(|cls| cls.strip_prefix("hidelines="))
.map(|prefix| prefix.to_string())
.next();
if let Some(mut child) = node.first_child()
&& let Node::Text(text) = child.value()
{
if language == "rust" {
let new_nodes = hide_lines_rust(text);
child.detach();
let root = tree.extend_tree(new_nodes);
let root_id = root.id();
let mut node = tree.get_mut(code_id).unwrap();
node.reparent_from_id_append(root_id);
} else {
// Use the prefix from the code block, else the prefix from config.
let hidelines_prefix = hideline_info
.as_deref()
.or_else(|| hidelines.get(&language).map(|p| p.as_str()));
if let Some(prefix) = hidelines_prefix {
let new_nodes = hide_lines_with_prefix(text, prefix);
child.detach();
let root = tree.extend_tree(new_nodes);
let root_id = root.id();
let mut node = tree.get_mut(code_id).unwrap();
node.reparent_from_id_append(root_id);
}
}
}
}
/// Wraps hidden lines in a `<span>` specifically for Rust code blocks.
fn hide_lines_rust(text: &StrTendril) -> Tree<Node> {
static_regex!(BORING_LINES_REGEX, r"^(\s*)#(.?)(.*)$");
let mut tree = Tree::new(Node::Fragment);
let mut root = tree.root_mut();
let mut lines = text.lines().peekable();
while let Some(line) = lines.next() {
// Don't include newline on the last line.
let newline = if lines.peek().is_none() { "" } else { "\n" };
if let Some(caps) = BORING_LINES_REGEX.captures(line) {
if &caps[2] == "#" {
root.append(Node::Text(
format!("{}{}{}{newline}", &caps[1], &caps[2], &caps[3]).into(),
));
continue;
} else if matches!(&caps[2], "" | " ") {
let mut span = Element::new("span");
span.insert_attr("class", "boring".into());
let mut span = root.append(Node::Element(span));
span.append(Node::Text(
format!("{}{}{newline}", &caps[1], &caps[3]).into(),
));
continue;
}
}
root.append(Node::Text(format!("{line}{newline}").into()));
}
tree
}
/// Wraps hidden lines in a `<span>` tag for lines starting with the given prefix.
fn hide_lines_with_prefix(content: &str, prefix: &str) -> Tree<Node> {
let mut tree = Tree::new(Node::Fragment);
let mut root = tree.root_mut();
for line in content.lines() {
if line.trim_start().starts_with(prefix) {
let pos = line.find(prefix).unwrap();
let (ws, rest) = (&line[..pos], &line[pos + prefix.len()..]);
let mut span = Element::new("span");
span.insert_attr("class", "boring".into());
let mut span = root.append(Node::Element(span));
span.append(Node::Text(format!("{ws}{rest}\n").into()));
} else {
root.append(Node::Text(format!("{line}\n").into()));
}
}
tree
}
/// If this code text is missing an `fn main`, the wrap it with `fn main` in a
/// fashion similar to rustdoc, with the wrapper hidden.
pub(crate) fn wrap_rust_main(text: &str) -> Option<String> {
if !text.contains("fn main") && !text.contains("quick_main!") {
let (attrs, code) = partition_rust_source(text);
let newline = if code.is_empty() || code.ends_with('\n') {
""
} else {
"\n"
};
Some(format!(
"# #![allow(unused)]\n{attrs}# fn main() {{\n{code}{newline}# }}"
))
} else {
None
}
}
/// Splits Rust inner attributes from the given source string.
///
/// Returns `(inner_attrs, rest_of_code)`.
fn partition_rust_source(s: &str) -> (&str, &str) {
static_regex!(
HEADER_RE,
r"^(?mx)
(
(?:
^[ \t]*\#!\[.* (?:\r?\n)?
|
^\s* (?:\r?\n)?
)*
)"
);
let split_idx = match HEADER_RE.captures(s) {
Some(caps) => {
let attributes = &caps[1];
attributes.len()
}
None => 0,
};
s.split_at(split_idx)
}
#[test]
fn it_partitions_rust_source() {
assert_eq!(partition_rust_source(""), ("", ""));
assert_eq!(partition_rust_source("let x = 1;"), ("", "let x = 1;"));
assert_eq!(
partition_rust_source("fn main()\n{ let x = 1; }\n"),
("", "fn main()\n{ let x = 1; }\n")
);
assert_eq!(
partition_rust_source("#![allow(foo)]"),
("#![allow(foo)]", "")
);
assert_eq!(
partition_rust_source("#![allow(foo)]\n"),
("#![allow(foo)]\n", "")
);
assert_eq!(
partition_rust_source("#![allow(foo)]\nlet x = 1;"),
("#![allow(foo)]\n", "let x = 1;")
);
assert_eq!(
partition_rust_source(
"\n\
#![allow(foo)]\n\
\n\
#![allow(bar)]\n\
\n\
let x = 1;"
),
("\n#![allow(foo)]\n\n#![allow(bar)]\n\n", "let x = 1;")
);
}

View File

@@ -0,0 +1,105 @@
//! HTML rendering support.
//!
//! This module's primary entry point is [`render_markdown`] which will take
//! markdown text and render it to HTML. In summary, the general procedure of
//! that function is:
//!
//! 1. Use [`pulldown_cmark`] to parse the markdown and generate events.
//! 2. [`tree`] converts those events to a tree data structure.
//! 1. Parse HTML inside the markdown using [`tokenizer`].
//! 2. Apply various transformations to the tree data structure, such as adding header links.
//! 3. Serialize the tree to HTML in [`serialize()`].
use ego_tree::Tree;
use mdbook_core::book::{Book, Chapter};
use mdbook_core::config::{HtmlConfig, RustEdition};
use mdbook_markdown::{MarkdownOptions, new_cmark_parser};
use std::path::{Path, PathBuf};
mod hide_lines;
mod print;
mod serialize;
#[cfg(test)]
mod tests;
mod tokenizer;
mod tree;
pub(crate) use hide_lines::{hide_lines, wrap_rust_main};
pub(crate) use print::render_print_page;
pub(crate) use serialize::serialize;
pub(crate) use tree::{Element, Node};
/// Options for converting a single chapter's markdown to HTML.
pub(crate) struct HtmlRenderOptions<'a> {
/// Options for parsing markdown.
pub markdown_options: MarkdownOptions,
/// The chapter's location, relative to the `SUMMARY.md` file.
pub path: &'a Path,
/// The default Rust edition, used to set the proper class on the code blocks.
pub edition: Option<RustEdition>,
/// The [`HtmlConfig`], whose options affect how the HTML is generated.
pub config: &'a HtmlConfig,
}
impl<'a> HtmlRenderOptions<'a> {
/// Creates a new [`HtmlRenderOptions`].
pub(crate) fn new(
path: &'a Path,
config: &'a HtmlConfig,
edition: Option<RustEdition>,
) -> HtmlRenderOptions<'a> {
let mut markdown_options = MarkdownOptions::default();
markdown_options.smart_punctuation = config.smart_punctuation;
HtmlRenderOptions {
markdown_options,
path,
edition,
config,
}
}
}
/// Renders markdown to HTML.
pub(crate) fn render_markdown(text: &str, options: &HtmlRenderOptions<'_>) -> String {
let tree = build_tree(text, options);
let mut output = String::new();
serialize::serialize(&tree, &mut output);
output
}
/// Renders markdown to a [`Tree`].
fn build_tree(text: &str, options: &HtmlRenderOptions<'_>) -> Tree<Node> {
let events = new_cmark_parser(text, &options.markdown_options);
tree::MarkdownTreeBuilder::build(options, events)
}
/// The parsed chapter, and some information about the chapter.
pub(crate) struct ChapterTree<'book> {
pub(crate) chapter: &'book Chapter,
/// The path to the chapter relative to the root with the `.html` extension.
pub(crate) html_path: PathBuf,
/// The chapter tree.
pub(crate) tree: Tree<Node>,
}
/// Creates all of the [`ChapterTree`]s for the book.
pub(crate) fn build_trees<'book>(
book: &'book Book,
html_config: &HtmlConfig,
edition: Option<RustEdition>,
) -> Vec<ChapterTree<'book>> {
book.chapters()
.map(|ch| {
let path = ch.path.as_ref().unwrap();
let html_path = ch.path.as_ref().unwrap().with_extension("html");
let options = HtmlRenderOptions::new(path, html_config, edition);
let tree = build_tree(&ch.content, &options);
ChapterTree {
chapter: ch,
html_path,
tree,
}
})
.collect()
}

View File

@@ -0,0 +1,217 @@
//! Support for generating the print page.
//!
//! The print page takes all the individual chapters (as `Tree<Node>`
//! elements) and modifies the chapters so that they work on a consolidated
//! print page, and then serializes it all as one HTML page.
use super::Node;
use crate::html::{ChapterTree, Element, serialize};
use crate::utils::{ToUrlPath, id_from_content, normalize_path, unique_id};
use mdbook_core::static_regex;
use std::collections::HashMap;
use std::path::{Component, PathBuf};
/// Takes all the chapter trees, modifies them to be suitable to render for
/// the print page, and returns an string of all the chapters rendered to a
/// single HTML page.
pub(crate) fn render_print_page(mut chapter_trees: Vec<ChapterTree<'_>>) -> String {
let (id_remap, mut id_counter) = make_ids_unique(&mut chapter_trees);
let path_to_root_id = make_root_id_map(&mut chapter_trees, &mut id_counter);
rewrite_links(&mut chapter_trees, &id_remap, &path_to_root_id);
let mut print_content = String::new();
for ChapterTree { tree, .. } in chapter_trees {
if !print_content.is_empty() {
// Add page break between chapters
// See https://developer.mozilla.org/en-US/docs/Web/CSS/break-before and https://developer.mozilla.org/en-US/docs/Web/CSS/page-break-before
// Add both two CSS properties because of the compatibility issue
print_content
.push_str(r#"<div style="break-before: page; page-break-before: always;"></div>"#);
}
serialize(&tree, &mut print_content);
}
print_content
}
/// Make all IDs unique, and create a map from old to new IDs.
///
/// The first map is a map of the chapter path to the IDs that were rewritten
/// in that chapter (old ID to new ID).
///
/// The second map is a map of every ID seen to the number of times it has
/// been seen. This is used to generate unique IDs.
fn make_ids_unique(
chapter_trees: &mut [ChapterTree<'_>],
) -> (
HashMap<PathBuf, HashMap<String, String>>,
HashMap<String, u32>,
) {
let mut id_remap = HashMap::new();
let mut id_counter = HashMap::new();
for ChapterTree {
html_path, tree, ..
} in chapter_trees
{
for value in tree.values_mut() {
if let Node::Element(el) = value
&& let Some(id) = el.attr("id")
{
let new_id = unique_id(id, &mut id_counter);
if new_id != id {
let id = id.to_string();
el.insert_attr("id", new_id.clone().into());
let map: &mut HashMap<_, _> = id_remap.entry(html_path.clone()).or_default();
map.insert(id, new_id);
}
}
}
}
(id_remap, id_counter)
}
/// Generates a map of a chapter path to the ID of the top of the chapter.
///
/// If a chapter is missing an `h1` tag, then one is synthesized so that the
/// print output has something to link to.
fn make_root_id_map(
chapter_trees: &mut [ChapterTree<'_>],
id_counter: &mut HashMap<String, u32>,
) -> HashMap<PathBuf, String> {
let mut path_to_root_id = HashMap::new();
for ChapterTree {
chapter,
html_path,
tree,
..
} in chapter_trees
{
let mut h1_found = false;
for value in tree.values_mut() {
if let Node::Element(el) = value {
if el.name() == "h1" {
if let Some(id) = el.attr("id") {
h1_found = true;
path_to_root_id.insert(html_path.clone(), id.to_string());
}
break;
} else if matches!(el.name(), "h2" | "h3" | "h4" | "h5" | "h6") {
// h1 not found.
break;
}
}
}
if !h1_found {
// Synthesize a root id to be able to link to the start of the page.
// TODO: This might want to be a warning? Chapters generally
// should start with an h1.
let mut h1 = Element::new("h1");
let id = id_from_content(&chapter.name);
let id = unique_id(&id, id_counter);
h1.insert_attr("id", id.clone().into());
let mut root = tree.root_mut();
let mut h1 = root.prepend(Node::Element(h1));
let mut a = Element::new("a");
a.insert_attr("href", format!("#{id}").into());
a.insert_attr("class", "header".into());
let mut a = h1.append(Node::Element(a));
a.append(Node::Text(chapter.name.clone().into()));
path_to_root_id.insert(html_path.clone(), id);
}
}
path_to_root_id
}
/// Rewrite links so that they point to IDs on the print page.
fn rewrite_links(
chapter_trees: &mut [ChapterTree<'_>],
id_remap: &HashMap<PathBuf, HashMap<String, String>>,
path_to_root_id: &HashMap<PathBuf, String>,
) {
static_regex!(
LINK,
r"(?x)
(?P<scheme>^[a-z][a-z0-9+.-]*:)?
(?P<path>[^\#]+)?
(?:\#(?P<anchor>.*))?"
);
// Rewrite path links to go to the appropriate place.
for ChapterTree {
html_path, tree, ..
} in chapter_trees
{
let base = html_path.parent().expect("path can't be empty");
for value in tree.values_mut() {
let Node::Element(el) = value else {
continue;
};
if !matches!(el.name(), "a" | "img") {
continue;
}
for attr in ["href", "src", "xlink:href"] {
let Some(dest) = el.attr(attr) else {
continue;
};
let Some(caps) = LINK.captures(&dest) else {
continue;
};
if caps.name("scheme").is_some() {
continue;
}
// The lookup_key is the key to look up in the remap table.
let mut lookup_key = html_path.clone();
if let Some(href_path) = caps.name("path")
&& let href_path = href_path.as_str()
&& !href_path.is_empty()
{
lookup_key.pop();
lookup_key.push(href_path);
let normalized = normalize_path(&lookup_key);
// If this points outside of the book, don't modify it.
let is_outside = matches!(
normalized.components().next(),
Some(Component::ParentDir | Component::RootDir)
);
if is_outside || !href_path.ends_with(".html") {
// Make the link relative to the print page location.
let mut rel_path = normalize_path(&base.join(href_path)).to_url_path();
if let Some(anchor) = caps.name("anchor") {
rel_path.push('#');
rel_path.push_str(anchor.as_str());
}
el.insert_attr(attr, rel_path.into());
continue;
}
}
let lookup_key = normalize_path(&lookup_key);
let anchor = caps.name("anchor");
let id = match anchor {
Some(anchor_id) => {
let anchor_id = anchor_id.as_str().to_string();
match id_remap.get(&lookup_key) {
Some(id_map) => match id_map.get(&anchor_id) {
Some(new_id) => new_id.clone(),
None => anchor_id,
},
None => {
// Assume the anchor goes to some non-remapped
// ID that already exists.
anchor_id
}
}
}
None => match path_to_root_id.get(&lookup_key) {
Some(id) => id.to_string(),
None => continue,
},
};
el.insert_attr(attr, format!("#{id}").into());
}
}
}
}

View File

@@ -0,0 +1,112 @@
//! Serializes the [`Node`] tree to an HTML string.
use super::tree::is_void_element;
use super::tree::{Element, Node};
use ego_tree::{Tree, iter::Edge};
use html5ever::{local_name, ns};
use mdbook_core::utils::{escape_html, escape_html_attribute};
use std::ops::Deref;
/// Serializes the given tree of [`Node`] elements to an HTML string.
pub(crate) fn serialize(tree: &Tree<Node>, output: &mut String) {
for edge in tree.root().traverse() {
match edge {
Edge::Open(node) => match node.value() {
Node::Element(el) => serialize_start(el, output),
Node::Text(text) => {
output.push_str(&escape_html(text));
}
Node::Comment(comment) => {
output.push_str("<!--");
output.push_str(comment);
output.push_str("-->");
}
Node::Fragment => {}
Node::RawData(html) => {
output.push_str(html);
}
},
Edge::Close(node) => {
if let Node::Element(el) = node.value() {
serialize_end(el, output);
}
}
}
}
}
/// Returns true if this HTML element wants a newline to keep the emitted
/// output more readable.
fn wants_pretty_html_newline(name: &str) -> bool {
matches!(name, |"blockquote"| "dd"
| "div"
| "dl"
| "dt"
| "h1"
| "h2"
| "h3"
| "h4"
| "h5"
| "h6"
| "hr"
| "li"
| "ol"
| "p"
| "pre"
| "table"
| "tbody"
| "thead"
| "tr"
| "ul")
}
/// Emit the start tag of an element.
fn serialize_start(el: &Element, output: &mut String) {
let el_name = el.name();
if wants_pretty_html_newline(el_name) {
if !output.is_empty() {
if !output.ends_with('\n') {
output.push('\n');
}
}
}
output.push('<');
output.push_str(el_name);
for (attr_name, value) in &el.attrs {
output.push(' ');
match attr_name.ns {
ns!() => (),
ns!(xml) => output.push_str("xml:"),
ns!(xmlns) => {
if el.name.local != local_name!("xmlns") {
output.push_str("xmlns:");
}
}
ns!(xlink) => output.push_str("xlink:"),
_ => (), // TODO what should it do here?
}
output.push_str(attr_name.local.deref());
output.push_str("=\"");
output.push_str(&escape_html_attribute(&value));
output.push('"');
}
if el.self_closing {
output.push_str(" /");
}
output.push('>');
}
/// Emit the end tag of an element.
fn serialize_end(el: &Element, output: &mut String) {
// Void elements do not have an end tag.
if el.self_closing || is_void_element(el.name()) {
return;
}
let name = el.name();
output.push_str("</");
output.push_str(name);
output.push('>');
if wants_pretty_html_newline(name) {
output.push('\n');
}
}

View File

@@ -0,0 +1,53 @@
use crate::html::tokenizer::parse_html;
use html5ever::tokenizer::{Tag, TagKind, Token};
// Basic tokenizer behavior of a script.
#[test]
fn parse_html_script() {
let script = r#"
if (3 < 5 > 10)
{
alert("The sky is falling!");
}
"#;
let t = format!("<script>{script}</script>");
let ts = parse_html(&t);
eprintln!("{ts:#?}",);
let mut output = String::new();
let mut in_script = false;
for t in ts {
match t {
Token::ParseError(e) => panic!("{e:?}"),
Token::CharacterTokens(s) => {
if in_script {
output.push_str(&s)
}
}
Token::TagToken(Tag {
kind: TagKind::StartTag,
..
}) => in_script = true,
Token::TagToken(Tag {
kind: TagKind::EndTag,
..
}) => in_script = false,
_ => {}
}
}
assert_eq!(output, script);
}
// What happens if a script doesn't end.
#[test]
fn parse_html_script_unclosed() {
let t = r#"<script>
// Test
"#;
let ts = parse_html(t);
eprintln!("{ts:#?}",);
for t in ts {
if let Token::ParseError(e) = t {
panic!("{e:?}",);
}
}
}

View File

@@ -0,0 +1,83 @@
//! Support for parsing HTML.
//!
//! The primary entry point is [`parse_html`] which uses [`html5ever`] to
//! tokenize the input.
use html5ever::TokenizerResult;
use html5ever::tendril::ByteTendril;
use html5ever::tokenizer::states::RawKind;
use html5ever::tokenizer::{
BufferQueue, TagKind, Token, TokenSink, TokenSinkResult, Tokenizer, TokenizerOpts,
};
use std::cell::RefCell;
/// Collector for HTML tokens.
#[derive(Default)]
struct TokenCollector {
/// Parsed HTML tokens.
tokens: RefCell<Vec<Token>>,
}
impl TokenSink for TokenCollector {
type Handle = ();
fn process_token(&self, token: Token, _line_number: u64) -> TokenSinkResult<()> {
match &token {
Token::DoctypeToken(_) => {}
Token::TagToken(tag) => {
let tag_name = tag.name.as_bytes();
// TODO: This could probably use special support for SVG and MathML.
if tag_name == b"script" {
match tag.kind {
TagKind::StartTag => {
self.tokens.borrow_mut().push(token);
return TokenSinkResult::RawData(RawKind::ScriptData);
}
TagKind::EndTag => {}
}
}
if tag_name == b"style" {
match tag.kind {
TagKind::StartTag => {
self.tokens.borrow_mut().push(token);
return TokenSinkResult::RawData(RawKind::Rawtext);
}
TagKind::EndTag => {}
}
}
self.tokens.borrow_mut().push(token);
}
Token::CommentToken(_) => {
self.tokens.borrow_mut().push(token);
}
Token::CharacterTokens(_) => {
self.tokens.borrow_mut().push(token);
}
Token::NullCharacterToken => {}
Token::EOFToken => {}
Token::ParseError(_) => {
self.tokens.borrow_mut().push(token);
}
}
TokenSinkResult::Continue
}
}
/// Parse HTML into tokens.
pub(crate) fn parse_html(html: &str) -> Vec<Token> {
let tendril: ByteTendril = html.as_bytes().into();
let mut queue = BufferQueue::default();
queue.push_back(tendril.try_reinterpret().unwrap());
let collector = TokenCollector::default();
let tok = Tokenizer::new(collector, TokenizerOpts::default());
let result = tok.feed(&mut queue);
assert_eq!(result, TokenizerResult::Done);
assert!(
queue.is_empty(),
"queue wasn't empty: {:?}",
queue.pop_front()
);
tok.end();
tok.sink.tokens.take()
}

File diff suppressed because it is too large Load Diff

View File

@@ -1,20 +1,17 @@
use super::helpers;
use super::static_files::StaticFiles;
use crate::html::ChapterTree;
use crate::html::{build_trees, render_markdown, serialize};
use crate::theme::Theme;
use crate::utils::ToUrlPath;
use anyhow::{Context, Result, bail};
use handlebars::Handlebars;
use mdbook_core::book::{Book, BookItem, Chapter};
use mdbook_core::config::{BookConfig, Code, Config, HtmlConfig, Playground, RustEdition};
use mdbook_core::utils::fs::get_404_output_file;
use mdbook_core::{static_regex, utils};
use mdbook_markdown::render_markdown;
use mdbook_core::config::{BookConfig, Config, HtmlConfig};
use mdbook_core::utils;
use mdbook_renderer::{RenderContext, Renderer};
use regex::Captures;
use serde_json::json;
use std::borrow::Cow;
use std::collections::BTreeMap;
use std::collections::HashMap;
use std::collections::{BTreeMap, HashMap};
use std::fs::{self, File};
use std::path::{Path, PathBuf};
use tracing::error;
@@ -33,15 +30,19 @@ impl HtmlHandlebars {
fn render_chapter(
&self,
ch: &Chapter,
chapter_tree: &ChapterTree<'_>,
prev_ch: Option<&Chapter>,
next_ch: Option<&Chapter>,
mut ctx: RenderChapterContext<'_>,
print_content: &mut String,
) -> Result<()> {
// FIXME: This should be made DRY-er and rely less on mutable state
let ch = chapter_tree.chapter;
let path = ch.path.as_ref().unwrap();
// "print.html" is used for the print page.
if path == Path::new("print.md") {
bail!("{} is reserved for internal use", path.display());
};
if let Some(ref edit_url_template) = ctx.html_config.edit_url_template {
let full_path = ctx.book_config.src.to_str().unwrap_or_default().to_owned()
@@ -57,30 +58,14 @@ impl HtmlHandlebars {
.insert("git_repository_edit_url".to_owned(), json!(edit_url));
}
let mut options = crate::html_render_options_from_config(path, &ctx.html_config);
let content = render_markdown(&ch.content, &options);
options.for_print = true;
let fixed_content = render_markdown(&ch.content, &options);
if prev_ch.is_some() && ctx.html_config.print.page_break {
// Add page break between chapters
// See https://developer.mozilla.org/en-US/docs/Web/CSS/break-before and https://developer.mozilla.org/en-US/docs/Web/CSS/page-break-before
// Add both two CSS properties because of the compatibility issue
print_content
.push_str(r#"<div style="break-before: page; page-break-before: always;"></div>"#);
}
print_content.push_str(&fixed_content);
let mut content = String::new();
serialize(&chapter_tree.tree, &mut content);
// Update the context with data for this file
let ctx_path = path
.to_str()
.with_context(|| "Could not convert path to str")?;
let filepath = Path::new(&ctx_path).with_extension("html");
// "print.html" is used for the print page.
if path == Path::new("print.md") {
bail!("{} is reserved for internal use", path.display());
};
let book_title = ctx
.data
.get("book_title")
@@ -137,13 +122,6 @@ impl HtmlHandlebars {
debug!("Render template");
let rendered = ctx.handlebars.render("index", &ctx.data)?;
let rendered = self.post_process(
rendered,
&ctx.html_config.playground,
&ctx.html_config.code,
ctx.edition,
);
// Write to file
debug!("Creating {}", filepath.display());
utils::fs::write_file(&ctx.destination, &filepath, rendered.as_bytes())?;
@@ -153,12 +131,6 @@ impl HtmlHandlebars {
ctx.data.insert("path_to_root".to_owned(), json!(""));
ctx.data.insert("is_index".to_owned(), json!(true));
let rendered_index = ctx.handlebars.render("index", &ctx.data)?;
let rendered_index = self.post_process(
rendered_index,
&ctx.html_config.playground,
&ctx.html_config.code,
ctx.edition,
);
debug!("Creating index.html from {}", ctx_path);
utils::fs::write_file(&ctx.destination, "index.html", rendered_index.as_bytes())?;
}
@@ -192,7 +164,11 @@ impl HtmlHandlebars {
.to_string()
}
};
let options = crate::html_render_options_from_config(Path::new("404.md"), html_config);
let options = crate::html::HtmlRenderOptions::new(
Path::new("404.md"),
html_config,
ctx.config.rust.edition,
);
let html_content_404 = render_markdown(&content_404, &options);
let mut data_404 = data.clone();
@@ -219,43 +195,28 @@ impl HtmlHandlebars {
data_404.insert("title".to_owned(), json!(title));
let rendered = handlebars.render("index", &data_404)?;
let rendered = self.post_process(
rendered,
&html_config.playground,
&html_config.code,
ctx.config.rust.edition,
);
let output_file = get_404_output_file(&html_config.input_404);
let output_file = utils::fs::get_404_output_file(&html_config.input_404);
utils::fs::write_file(destination, output_file, rendered.as_bytes())?;
debug!("Creating 404.html ✓");
Ok(())
}
fn post_process(
&self,
rendered: String,
playground_config: &Playground,
code_config: &Code,
edition: Option<RustEdition>,
) -> String {
let rendered = build_header_links(&rendered);
let rendered = fix_code_blocks(&rendered);
let rendered = add_playground_pre(&rendered, playground_config, edition);
let rendered = hide_lines(&rendered, code_config);
let rendered = convert_fontawesome(&rendered);
rendered
}
/// Update the context with data for this file
fn configure_print_version(
fn render_print_page(
&self,
ctx: &RenderContext,
handlebars: &Handlebars<'_>,
data: &mut serde_json::Map<String, serde_json::Value>,
print_content: &str,
) {
// Make sure that the Print chapter does not display the title from
// the last rendered chapter by removing it from its context
data.remove("title");
chapter_trees: Vec<ChapterTree<'_>>,
) -> Result<String> {
let print_content = crate::html::render_print_page(chapter_trees);
if let Some(ref title) = ctx.config.book.title {
data.insert("title".to_owned(), json!(title));
} else {
// Make sure that the Print chapter does not display the title from
// the last rendered chapter by removing it from its context
data.remove("title");
}
data.insert("is_print".to_owned(), json!(true));
data.insert("path".to_owned(), json!("print.md"));
data.insert("content".to_owned(), json!(print_content));
@@ -263,6 +224,10 @@ impl HtmlHandlebars {
"path_to_root".to_owned(),
json!(utils::fs::path_to_root(Path::new("print.md"))),
);
debug!("Render template");
let rendered = handlebars.render("index", &data)?;
Ok(rendered)
}
fn register_hbs_helpers(&self, handlebars: &mut Handlebars<'_>, html_config: &HtmlConfig) {
@@ -401,8 +366,7 @@ impl Renderer for HtmlHandlebars {
let mut data = make_data(&ctx.root, book, &ctx.config, &html_config, &theme)?;
// Print version
let mut print_content = String::new();
let chapter_trees = build_trees(book, &html_config, ctx.config.rust.edition);
fs::create_dir_all(destination)
.with_context(|| "Unexpected error when constructing destination path")?;
@@ -415,7 +379,7 @@ impl Renderer for HtmlHandlebars {
let default = mdbook_core::config::Search::default();
let search = html_config.search.as_ref().unwrap_or(&default);
if search.enable {
super::search::create_files(&search, &mut static_files, &book)?;
super::search::create_files(&search, &mut static_files, &chapter_trees)?;
}
}
@@ -458,20 +422,18 @@ impl Renderer for HtmlHandlebars {
utils::fs::write_file(destination, "CNAME", format!("{cname}\n").as_bytes())?;
}
let chapters: Vec<_> = book.chapters().collect();
for (i, ch) in chapters.iter().enumerate() {
let previous = (i != 0).then(|| chapters[i - 1]);
let next = (i != chapters.len() - 1).then(|| chapters[i + 1]);
for (i, chapter_tree) in chapter_trees.iter().enumerate() {
let previous = (i != 0).then(|| chapter_trees[i - 1].chapter);
let next = (i != chapter_trees.len() - 1).then(|| chapter_trees[i + 1].chapter);
let ctx = RenderChapterContext {
handlebars: &handlebars,
destination: destination.to_path_buf(),
data: data.clone(),
book_config: book_config.clone(),
html_config: html_config.clone(),
edition: ctx.config.rust.edition,
chapter_titles: &ctx.chapter_titles,
};
self.render_chapter(ch, previous, next, ctx, &mut print_content)?;
self.render_chapter(chapter_tree, previous, next, ctx)?;
}
// Render 404 page
@@ -479,25 +441,12 @@ impl Renderer for HtmlHandlebars {
self.render_404(ctx, &html_config, &src_dir, &mut handlebars, &mut data)?;
}
// Print version
self.configure_print_version(&mut data, &print_content);
if let Some(ref title) = ctx.config.book.title {
data.insert("title".to_owned(), json!(title));
}
// Render the handlebars template with the data
// Render the print version.
if html_config.print.enable {
debug!("Render template");
let rendered = handlebars.render("index", &data)?;
let print_rendered =
self.render_print_page(ctx, &handlebars, &mut data, chapter_trees)?;
let rendered = self.post_process(
rendered,
&html_config.playground,
&html_config.code,
ctx.config.rust.edition,
);
utils::fs::write_file(destination, "print.html", rendered.as_bytes())?;
utils::fs::write_file(destination, "print.html", print_rendered.as_bytes())?;
debug!("Creating print.html ✓");
}
@@ -691,331 +640,12 @@ fn make_data(
Ok(data)
}
/// Goes through the rendered HTML, making sure all header tags have
/// an anchor respectively so people can link to sections directly.
fn build_header_links(html: &str) -> String {
static_regex!(
BUILD_HEADER_LINKS,
r#"<h(\d)(?: id="([^"]+)")?(?: class="([^"]+)")?>(.*?)</h\d>"#
);
static IGNORE_CLASS: &[&str] = &["menu-title", "mdbook-help-title"];
let mut id_counter = HashMap::new();
BUILD_HEADER_LINKS
.replace_all(html, |caps: &Captures<'_>| {
let level = caps[1]
.parse()
.expect("Regex should ensure we only ever get numbers here");
// Ignore .menu-title because now it's getting detected by the regex.
if let Some(classes) = caps.get(3) {
for class in classes.as_str().split(" ") {
if IGNORE_CLASS.contains(&class) {
return caps[0].to_string();
}
}
}
insert_link_into_header(
level,
&caps[4],
caps.get(2).map(|x| x.as_str().to_string()),
caps.get(3).map(|x| x.as_str().to_string()),
&mut id_counter,
)
})
.into_owned()
}
/// Insert a single link into a header, making sure each link gets its own
/// unique ID by appending an auto-incremented number (if necessary).
fn insert_link_into_header(
level: usize,
content: &str,
id: Option<String>,
classes: Option<String>,
id_counter: &mut HashMap<String, usize>,
) -> String {
let id = id.unwrap_or_else(|| utils::unique_id_from_content(content, id_counter));
let classes = classes
.map(|s| format!(" class=\"{s}\""))
.unwrap_or_default();
format!(
r##"<h{level} id="{id}"{classes}><a class="header" href="#{id}">{content}</a></h{level}>"##
)
}
// Convert fontawesome `<i>` tags to inline SVG
fn convert_fontawesome(html: &str) -> String {
use font_awesome_as_a_crate as fa;
static_regex!(FA_RE, r#"<i([^>]+)class="([^"]+)"([^>]*)></i>"#);
FA_RE
.replace_all(html, |caps: &Captures<'_>| {
let text = &caps[0];
let before = &caps[1];
let classes = &caps[2];
let after = &caps[3];
let mut icon = String::new();
let mut type_ = fa::Type::Regular;
let mut other_classes = String::new();
for class in classes.split(" ") {
if let Some(class) = class.strip_prefix("fa-") {
icon = class.to_owned();
} else if class == "fa" {
type_ = fa::Type::Regular;
} else if class == "fas" {
type_ = fa::Type::Solid;
} else if class == "fab" {
type_ = fa::Type::Brands;
} else {
other_classes += " ";
other_classes += class;
}
}
if icon.is_empty() {
text.to_owned()
} else if let Ok(svg) = fa::svg(type_, &icon) {
format!(
r#"<span{before}class="fa-svg{other_classes}"{after}>{svg}</span>"#,
before = before,
other_classes = other_classes,
after = after,
svg = svg
)
} else {
text.to_owned()
}
})
.into_owned()
}
// The rust book uses annotations for rustdoc to test code snippets,
// like the following:
// ```rust,should_panic
// fn main() {
// // Code here
// }
// ```
// This function replaces all commas by spaces in the code block classes
fn fix_code_blocks(html: &str) -> String {
static_regex!(FIX_CODE_BLOCKS, r#"<code([^>]+)class="([^"]+)"([^>]*)>"#);
FIX_CODE_BLOCKS
.replace_all(html, |caps: &Captures<'_>| {
let before = &caps[1];
let classes = &caps[2].replace(',', " ");
let after = &caps[3];
format!(r#"<code{before}class="{classes}"{after}>"#)
})
.into_owned()
}
static_regex!(
CODE_BLOCK_RE,
r#"((?s)<code[^>]?class="([^"]+)".*?>(.*?)</code>)"#
);
fn add_playground_pre(
html: &str,
playground_config: &Playground,
edition: Option<RustEdition>,
) -> String {
CODE_BLOCK_RE
.replace_all(html, |caps: &Captures<'_>| {
let text = &caps[1];
let classes = &caps[2];
let code = &caps[3];
if classes.contains("language-rust")
&& ((!classes.contains("ignore")
&& !classes.contains("noplayground")
&& !classes.contains("noplaypen")
&& playground_config.runnable)
|| classes.contains("mdbook-runnable"))
{
let contains_e2015 = classes.contains("edition2015");
let contains_e2018 = classes.contains("edition2018");
let contains_e2021 = classes.contains("edition2021");
let edition_class = if contains_e2015 || contains_e2018 || contains_e2021 {
// the user forced edition, we should not overwrite it
""
} else {
match edition {
Some(RustEdition::E2015) => " edition2015",
Some(RustEdition::E2018) => " edition2018",
Some(RustEdition::E2021) => " edition2021",
Some(RustEdition::E2024) => " edition2024",
Some(_) => panic!("edition {edition:?} not covered"),
None => "",
}
};
// wrap the contents in an external pre block
format!(
"<pre class=\"playground\"><code class=\"{}{}\">{}</code></pre>",
classes,
edition_class,
{
let content: Cow<'_, str> = if playground_config.editable
&& classes.contains("editable")
|| text.contains("fn main")
|| text.contains("quick_main!")
{
code.into()
} else {
// we need to inject our own main
let (attrs, code) = partition_rust_source(code);
let newline = if code.is_empty() || code.ends_with('\n') {
""
} else {
"\n"
};
format!(
"# #![allow(unused)]\n{attrs}# fn main() {{\n{code}{newline}# }}"
)
.into()
};
content
}
)
} else {
// not language-rust, so no-op
text.to_owned()
}
})
.into_owned()
}
/// Modifies all `<code>` blocks to convert "hidden" lines and to wrap them in
/// a `<span class="boring">`.
fn hide_lines(html: &str, code_config: &Code) -> String {
static_regex!(LANGUAGE_REGEX, r"\blanguage-(\w+)\b");
static_regex!(HIDELINES_REGEX, r"\bhidelines=(\S+)");
CODE_BLOCK_RE
.replace_all(html, |caps: &Captures<'_>| {
let text = &caps[1];
let classes = &caps[2];
let code = &caps[3];
if classes.contains("language-rust") {
format!(
"<code class=\"{}\">{}</code>",
classes,
hide_lines_rust(code)
)
} else {
// First try to get the prefix from the code block
let hidelines_capture = HIDELINES_REGEX.captures(classes);
let hidelines_prefix = match &hidelines_capture {
Some(capture) => Some(&capture[1]),
None => {
// Then look up the prefix by language
LANGUAGE_REGEX.captures(classes).and_then(|capture| {
code_config.hidelines.get(&capture[1]).map(|p| p.as_str())
})
}
};
match hidelines_prefix {
Some(prefix) => format!(
"<code class=\"{}\">{}</code>",
classes,
hide_lines_with_prefix(code, prefix)
),
None => text.to_owned(),
}
}
})
.into_owned()
}
fn hide_lines_rust(content: &str) -> String {
static_regex!(BORING_LINES_REGEX, r"^(\s*)#(.?)(.*)$");
let mut result = String::with_capacity(content.len());
let mut lines = content.lines().peekable();
while let Some(line) = lines.next() {
// Don't include newline on the last line.
let newline = if lines.peek().is_none() { "" } else { "\n" };
if let Some(caps) = BORING_LINES_REGEX.captures(line) {
if &caps[2] == "#" {
result += &caps[1];
result += &caps[2];
result += &caps[3];
result += newline;
continue;
} else if matches!(&caps[2], "" | " ") {
result += "<span class=\"boring\">";
result += &caps[1];
result += &caps[3];
result += newline;
result += "</span>";
continue;
}
}
result += line;
result += newline;
}
result
}
fn hide_lines_with_prefix(content: &str, prefix: &str) -> String {
let mut result = String::with_capacity(content.len());
for line in content.lines() {
if line.trim_start().starts_with(prefix) {
let pos = line.find(prefix).unwrap();
let (ws, rest) = (&line[..pos], &line[pos + prefix.len()..]);
result += "<span class=\"boring\">";
result += ws;
result += rest;
result += "\n";
result += "</span>";
continue;
}
result += line;
result += "\n";
}
result
}
/// Splits Rust inner attributes from the given source string.
///
/// Returns `(inner_attrs, rest_of_code)`.
fn partition_rust_source(s: &str) -> (&str, &str) {
static_regex!(
HEADER_RE,
r"^(?mx)
(
(?:
^[ \t]*\#!\[.* (?:\r?\n)?
|
^\s* (?:\r?\n)?
)*
)"
);
let split_idx = match HEADER_RE.captures(s) {
Some(caps) => caps[1].len(),
None => 0,
};
s.split_at(split_idx)
}
struct RenderChapterContext<'a> {
handlebars: &'a Handlebars<'a>,
destination: PathBuf,
data: serde_json::Map<String, serde_json::Value>,
book_config: BookConfig,
html_config: HtmlConfig,
edition: Option<RustEdition>,
chapter_titles: &'a HashMap<PathBuf, String>,
}
@@ -1074,283 +704,3 @@ fn collect_redirects_for_path(
.collect();
Ok(map)
}
#[cfg(test)]
mod tests {
use super::*;
use mdbook_core::config::TextDirection;
use pretty_assertions::assert_eq;
#[test]
fn original_build_header_links() {
let inputs = vec![
(
"blah blah <h1>Foo</h1>",
r##"blah blah <h1 id="foo"><a class="header" href="#foo">Foo</a></h1>"##,
),
(
"<h1>Foo</h1>",
r##"<h1 id="foo"><a class="header" href="#foo">Foo</a></h1>"##,
),
(
"<h3>Foo^bar</h3>",
r##"<h3 id="foobar"><a class="header" href="#foobar">Foo^bar</a></h3>"##,
),
(
"<h4></h4>",
r##"<h4 id=""><a class="header" href="#"></a></h4>"##,
),
(
"<h4><em>Hï</em></h4>",
r##"<h4 id="hï"><a class="header" href="#hï"><em>Hï</em></a></h4>"##,
),
(
"<h1>Foo</h1><h3>Foo</h3>",
r##"<h1 id="foo"><a class="header" href="#foo">Foo</a></h1><h3 id="foo-1"><a class="header" href="#foo-1">Foo</a></h3>"##,
),
// id only
(
r##"<h1 id="foobar">Foo</h1>"##,
r##"<h1 id="foobar"><a class="header" href="#foobar">Foo</a></h1>"##,
),
// class only
(
r##"<h1 class="class1 class2">Foo</h1>"##,
r##"<h1 id="foo" class="class1 class2"><a class="header" href="#foo">Foo</a></h1>"##,
),
// both id and class
(
r##"<h1 id="foobar" class="class1 class2">Foo</h1>"##,
r##"<h1 id="foobar" class="class1 class2"><a class="header" href="#foobar">Foo</a></h1>"##,
),
];
for (src, should_be) in inputs {
let got = build_header_links(src);
assert_eq!(got, should_be);
}
}
#[test]
fn add_playground() {
let inputs = [
(
"<code class=\"language-rust\">x()</code>",
"<pre class=\"playground\"><code class=\"language-rust\"># #![allow(unused)]\n# fn main() {\nx()\n# }</code></pre>",
),
(
"<code class=\"language-rust\">fn main() {}</code>",
"<pre class=\"playground\"><code class=\"language-rust\">fn main() {}</code></pre>",
),
(
"<code class=\"language-rust editable\">let s = \"foo\n # bar\n\";</code>",
"<pre class=\"playground\"><code class=\"language-rust editable\">let s = \"foo\n # bar\n\";</code></pre>",
),
(
"<code class=\"language-rust editable\">let s = \"foo\n ## bar\n\";</code>",
"<pre class=\"playground\"><code class=\"language-rust editable\">let s = \"foo\n ## bar\n\";</code></pre>",
),
(
"<code class=\"language-rust editable\">let s = \"foo\n # bar\n#\n\";</code>",
"<pre class=\"playground\"><code class=\"language-rust editable\">let s = \"foo\n # bar\n#\n\";</code></pre>",
),
(
"<code class=\"language-rust ignore\">let s = \"foo\n # bar\n\";</code>",
"<code class=\"language-rust ignore\">let s = \"foo\n # bar\n\";</code>",
),
(
"<code class=\"language-rust editable\">#![no_std]\nlet s = \"foo\";\n #[some_attr]</code>",
"<pre class=\"playground\"><code class=\"language-rust editable\">#![no_std]\nlet s = \"foo\";\n #[some_attr]</code></pre>",
),
];
for (src, should_be) in &inputs {
let mut p = Playground::default();
p.editable = true;
let got = add_playground_pre(src, &p, None);
assert_eq!(&*got, *should_be);
}
}
#[test]
fn add_playground_edition2015() {
let inputs = [
(
"<code class=\"language-rust\">x()</code>",
"<pre class=\"playground\"><code class=\"language-rust edition2015\"># #![allow(unused)]\n# fn main() {\nx()\n# }</code></pre>",
),
(
"<code class=\"language-rust\">fn main() {}</code>",
"<pre class=\"playground\"><code class=\"language-rust edition2015\">fn main() {}</code></pre>",
),
(
"<code class=\"language-rust edition2015\">fn main() {}</code>",
"<pre class=\"playground\"><code class=\"language-rust edition2015\">fn main() {}</code></pre>",
),
(
"<code class=\"language-rust edition2018\">fn main() {}</code>",
"<pre class=\"playground\"><code class=\"language-rust edition2018\">fn main() {}</code></pre>",
),
];
for (src, should_be) in &inputs {
let mut p = Playground::default();
p.editable = true;
let got = add_playground_pre(src, &p, Some(RustEdition::E2015));
assert_eq!(&*got, *should_be);
}
}
#[test]
fn add_playground_edition2018() {
let inputs = [
(
"<code class=\"language-rust\">x()</code>",
"<pre class=\"playground\"><code class=\"language-rust edition2018\"># #![allow(unused)]\n# fn main() {\nx()\n# }</code></pre>",
),
(
"<code class=\"language-rust\">fn main() {}</code>",
"<pre class=\"playground\"><code class=\"language-rust edition2018\">fn main() {}</code></pre>",
),
(
"<code class=\"language-rust edition2015\">fn main() {}</code>",
"<pre class=\"playground\"><code class=\"language-rust edition2015\">fn main() {}</code></pre>",
),
(
"<code class=\"language-rust edition2018\">fn main() {}</code>",
"<pre class=\"playground\"><code class=\"language-rust edition2018\">fn main() {}</code></pre>",
),
];
for (src, should_be) in &inputs {
let mut p = Playground::default();
p.editable = true;
let got = add_playground_pre(src, &p, Some(RustEdition::E2018));
assert_eq!(&*got, *should_be);
}
}
#[test]
fn add_playground_edition2021() {
let inputs = [
(
"<code class=\"language-rust\">x()</code>",
"<pre class=\"playground\"><code class=\"language-rust edition2021\"># #![allow(unused)]\n# fn main() {\nx()\n# }</code></pre>",
),
(
"<code class=\"language-rust\">fn main() {}</code>",
"<pre class=\"playground\"><code class=\"language-rust edition2021\">fn main() {}</code></pre>",
),
(
"<code class=\"language-rust edition2015\">fn main() {}</code>",
"<pre class=\"playground\"><code class=\"language-rust edition2015\">fn main() {}</code></pre>",
),
(
"<code class=\"language-rust edition2018\">fn main() {}</code>",
"<pre class=\"playground\"><code class=\"language-rust edition2018\">fn main() {}</code></pre>",
),
];
for (src, should_be) in &inputs {
let mut p = Playground::default();
p.editable = true;
let got = add_playground_pre(src, &p, Some(RustEdition::E2021));
assert_eq!(&*got, *should_be);
}
}
#[test]
fn hide_lines_language_rust() {
let inputs = [
(
"<pre class=\"playground\"><code class=\"language-rust\">\n# #![allow(unused)]\n# fn main() {\nx()\n# }</code></pre>",
"<pre class=\"playground\"><code class=\"language-rust\">\n<span class=\"boring\">#![allow(unused)]\n</span><span class=\"boring\">fn main() {\n</span>x()\n<span class=\"boring\">}</span></code></pre>",
),
// # must be followed by a space for a line to be hidden
(
"<pre class=\"playground\"><code class=\"language-rust\">\n#fn main() {\nx()\n#}</code></pre>",
"<pre class=\"playground\"><code class=\"language-rust\">\n#fn main() {\nx()\n#}</code></pre>",
),
(
"<pre class=\"playground\"><code class=\"language-rust\">fn main() {}</code></pre>",
"<pre class=\"playground\"><code class=\"language-rust\">fn main() {}</code></pre>",
),
(
"<pre class=\"playground\"><code class=\"language-rust editable\">let s = \"foo\n # bar\n\";</code></pre>",
"<pre class=\"playground\"><code class=\"language-rust editable\">let s = \"foo\n<span class=\"boring\"> bar\n</span>\";</code></pre>",
),
(
"<pre class=\"playground\"><code class=\"language-rust editable\">let s = \"foo\n ## bar\n\";</code></pre>",
"<pre class=\"playground\"><code class=\"language-rust editable\">let s = \"foo\n # bar\n\";</code></pre>",
),
(
"<pre class=\"playground\"><code class=\"language-rust editable\">let s = \"foo\n # bar\n#\n\";</code></pre>",
"<pre class=\"playground\"><code class=\"language-rust editable\">let s = \"foo\n<span class=\"boring\"> bar\n</span><span class=\"boring\">\n</span>\";</code></pre>",
),
(
"<code class=\"language-rust ignore\">let s = \"foo\n # bar\n\";</code>",
"<code class=\"language-rust ignore\">let s = \"foo\n<span class=\"boring\"> bar\n</span>\";</code>",
),
(
"<pre class=\"playground\"><code class=\"language-rust editable\">#![no_std]\nlet s = \"foo\";\n #[some_attr]</code></pre>",
"<pre class=\"playground\"><code class=\"language-rust editable\">#![no_std]\nlet s = \"foo\";\n #[some_attr]</code></pre>",
),
];
for (src, should_be) in &inputs {
let got = hide_lines(src, &Code::default());
assert_eq!(&*got, *should_be);
}
}
#[test]
fn hide_lines_language_other() {
let inputs = [
(
"<code class=\"language-python\">~hidden()\nnothidden():\n~ hidden()\n ~hidden()\n nothidden()</code>",
"<code class=\"language-python\"><span class=\"boring\">hidden()\n</span>nothidden():\n<span class=\"boring\"> hidden()\n</span><span class=\"boring\"> hidden()\n</span> nothidden()\n</code>",
),
(
"<code class=\"language-python hidelines=!!!\">!!!hidden()\nnothidden():\n!!! hidden()\n !!!hidden()\n nothidden()</code>",
"<code class=\"language-python hidelines=!!!\"><span class=\"boring\">hidden()\n</span>nothidden():\n<span class=\"boring\"> hidden()\n</span><span class=\"boring\"> hidden()\n</span> nothidden()\n</code>",
),
];
let mut code = Code::default();
code.hidelines.insert("python".to_string(), "~".to_string());
for (src, should_be) in &inputs {
let got = hide_lines(src, &code);
assert_eq!(&*got, *should_be);
}
}
#[test]
fn test_json_direction() {
assert_eq!(json!(TextDirection::RightToLeft), json!("rtl"));
assert_eq!(json!(TextDirection::LeftToRight), json!("ltr"));
}
#[test]
fn it_partitions_rust_source() {
assert_eq!(partition_rust_source(""), ("", ""));
assert_eq!(partition_rust_source("let x = 1;"), ("", "let x = 1;"));
assert_eq!(
partition_rust_source("fn main()\n{ let x = 1; }\n"),
("", "fn main()\n{ let x = 1; }\n")
);
assert_eq!(
partition_rust_source("#![allow(foo)]"),
("#![allow(foo)]", "")
);
assert_eq!(
partition_rust_source("#![allow(foo)]\n"),
("#![allow(foo)]\n", "")
);
assert_eq!(
partition_rust_source("#![allow(foo)]\nlet x = 1;"),
("#![allow(foo)]\n", "let x = 1;")
);
assert_eq!(
partition_rust_source(
"\n\
#![allow(foo)]\n\
\n\
#![allow(bar)]\n\
\n\
let x = 1;"
),
("\n#![allow(foo)]\n\n#![allow(bar)]\n\n", "let x = 1;")
);
}
}

View File

@@ -1,11 +1,10 @@
use crate::utils::ToUrlPath;
use std::path::Path;
use std::{cmp::Ordering, collections::BTreeMap};
use handlebars::{
Context, Handlebars, Helper, HelperDef, Output, RenderContext, RenderError, RenderErrorReason,
};
use mdbook_markdown::special_escape;
use mdbook_core::utils::escape_html_attribute;
use std::path::Path;
use std::{cmp::Ordering, collections::BTreeMap};
// Handlebars helper to construct TOC
#[derive(Clone, Copy)]
@@ -101,7 +100,7 @@ impl HelperDef for RenderToc {
// Part title
if let Some(title) = item.get("part") {
out.write("<li class=\"part-title\">")?;
out.write(&special_escape(title))?;
out.write(&escape_html_attribute(title))?;
out.write("</li>")?;
continue;
}
@@ -137,7 +136,7 @@ impl HelperDef for RenderToc {
}
if let Some(name) = item.get("name") {
out.write(&special_escape(name))?
out.write(&escape_html_attribute(name))?;
}
if path_exists {

View File

@@ -1,19 +1,17 @@
use super::static_files::StaticFiles;
use crate::html::{ChapterTree, Node};
use crate::theme::searcher;
use crate::utils::ToUrlPath;
use anyhow::{Result, bail};
use ego_tree::iter::Edge;
use elasticlunr::{Index, IndexBuilder};
use mdbook_core::book::{Book, Chapter};
use mdbook_core::book::Chapter;
use mdbook_core::config::{Search, SearchChapterSettings};
use mdbook_core::utils;
use mdbook_markdown::HtmlRenderOptions;
use mdbook_markdown::new_cmark_parser;
use pulldown_cmark::*;
use mdbook_core::static_regex;
use serde::Serialize;
use std::borrow::Cow;
use std::collections::{HashMap, HashSet};
use std::collections::HashMap;
use std::path::{Path, PathBuf};
use std::sync::LazyLock;
use tracing::{debug, warn};
const MAX_WORD_LENGTH_TO_INDEX: usize = 80;
@@ -31,7 +29,7 @@ fn tokenize(text: &str) -> Vec<String> {
pub(super) fn create_files(
search_config: &Search,
static_files: &mut StaticFiles,
book: &Book,
chapter_trees: &[ChapterTree<'_>],
) -> Result<()> {
let mut index = IndexBuilder::new()
.add_field_with_tokenizer("title", Box::new(&tokenize))
@@ -39,19 +37,19 @@ pub(super) fn create_files(
.add_field_with_tokenizer("breadcrumbs", Box::new(&tokenize))
.build();
let mut doc_urls = Vec::with_capacity(book.items.len());
// These are links to all of the headings in all of the chapters.
let mut doc_urls = Vec::new();
let chapter_configs = sort_search_config(&search_config.chapter);
validate_chapter_config(&chapter_configs, book)?;
validate_chapter_config(&chapter_configs, chapter_trees)?;
for chapter in book.chapters() {
if let Some(path) = settings_path(chapter) {
let chapter_settings = get_chapter_settings(&chapter_configs, path);
if !chapter_settings.enable.unwrap_or(true) {
continue;
}
for ct in chapter_trees {
let path = settings_path(ct.chapter);
let chapter_settings = get_chapter_settings(&chapter_configs, path);
if !chapter_settings.enable.unwrap_or(true) {
continue;
}
render_item(&mut index, search_config, &mut doc_urls, chapter)?;
index_chapter(&mut index, search_config, &mut doc_urls, ct)?;
}
let index = write_to_json(index, search_config, doc_urls)?;
@@ -85,151 +83,110 @@ fn add_doc(
index: &mut Index,
doc_urls: &mut Vec<String>,
anchor_base: &str,
heading: &str,
id_counter: &mut HashMap<String, usize>,
section_id: &Option<CowStr<'_>>,
heading_id: &str,
items: &[&str],
) {
// Either use the explicit section id the user specified, or generate one
// from the heading content.
let section_id = section_id.as_ref().map(|id| id.to_string()).or_else(|| {
if heading.is_empty() {
// In the case where a chapter has no heading, don't set a section id.
None
} else {
Some(utils::unique_id_from_content(heading, id_counter))
}
});
let mut url = anchor_base.to_string();
if !heading_id.is_empty() {
url.push('#');
url.push_str(heading_id);
}
let url = if let Some(id) = section_id {
Cow::Owned(format!("{anchor_base}#{id}"))
} else {
Cow::Borrowed(anchor_base)
};
let url = utils::collapse_whitespace(url.trim());
let doc_ref = doc_urls.len().to_string();
doc_urls.push(url.into());
doc_urls.push(url);
let items = items.iter().map(|&x| utils::collapse_whitespace(x.trim()));
let items = items.iter().map(|&x| collapse_whitespace(x.trim()));
index.add_doc(&doc_ref, items);
}
/// Renders markdown into flat unformatted text and adds it to the search index.
fn render_item(
/// Adds the chapter to the search index.
fn index_chapter(
index: &mut Index,
search_config: &Search,
doc_urls: &mut Vec<String>,
chapter: &Chapter,
chapter_tree: &ChapterTree<'_>,
) -> Result<()> {
let chapter_path = chapter
.path
.as_ref()
.expect("Checked that path exists above");
let anchor_base = Path::new(&chapter_path)
.with_extension("html")
.to_url_path();
let options = HtmlRenderOptions::new(&chapter_path);
let mut p = new_cmark_parser(&chapter.content, &options.markdown_options).peekable();
let anchor_base = chapter_tree.html_path.to_url_path();
let mut in_heading = false;
let max_section_depth = u32::from(search_config.heading_split_level);
let max_section_depth = search_config.heading_split_level;
let mut section_id = None;
let mut heading = String::new();
let mut body = String::new();
let mut breadcrumbs = chapter.parent_names.clone();
let mut footnote_numbers = HashMap::new();
let mut breadcrumbs = chapter_tree.chapter.parent_names.clone();
breadcrumbs.push(chapter.name.clone());
breadcrumbs.push(chapter_tree.chapter.name.clone());
let mut id_counter = HashMap::new();
while let Some(event) = p.next() {
match event {
Event::Start(Tag::Heading { level, id, .. }) if level as u32 <= max_section_depth => {
if !heading.is_empty() {
// Section finished, the next heading is following now
// Write the data to the index, and clear it for the next section
add_doc(
index,
doc_urls,
&anchor_base,
&heading,
&mut id_counter,
&section_id,
&[&heading, &body, &breadcrumbs.join(" » ")],
);
heading.clear();
body.clear();
breadcrumbs.pop();
}
let mut traverse = chapter_tree.tree.root().traverse();
section_id = id;
in_heading = true;
}
Event::End(TagEnd::Heading(level)) if level as u32 <= max_section_depth => {
in_heading = false;
breadcrumbs.push(heading.clone());
}
Event::Start(Tag::FootnoteDefinition(name)) => {
let number = footnote_numbers.len() + 1;
footnote_numbers.entry(name).or_insert(number);
}
Event::Html(html) => {
let mut html_block = html.into_string();
// As of pulldown_cmark 0.6, html events are no longer contained
// in an HtmlBlock tag. We must collect consecutive Html events
// into a block ourselves.
while let Some(Event::Html(html)) = p.peek() {
html_block.push_str(html);
p.next();
while let Some(edge) = traverse.next() {
match edge {
Edge::Open(node) => match node.value() {
Node::Element(el) => {
if let Some(level) = el.heading_level()
&& level <= max_section_depth
&& let Some(heading_id) = el.attr("id")
{
if !heading.is_empty() {
// Section finished, the next heading is following now
// Write the data to the index, and clear it for the next section
add_doc(
index,
doc_urls,
&anchor_base,
section_id.unwrap(),
&[&heading, &body, &breadcrumbs.join(" » ")],
);
heading.clear();
body.clear();
breadcrumbs.pop();
}
section_id = Some(heading_id);
in_heading = true;
} else if matches!(el.name(), "script" | "style") {
// Skip this node.
while let Some(edge) = traverse.next() {
if let Edge::Close(close) = edge
&& close == node
{
break;
}
}
// Insert spaces where HTML output would usually separate text
// to ensure words don't get merged together
} else if in_heading {
heading.push(' ');
} else {
body.push(' ');
}
}
body.push_str(&clean_html(&html_block));
}
Event::InlineHtml(html) => {
// This is not capable of cleaning inline tags like
// `foo <script>…</script>`. The `<script>` tags show up as
// individual InlineHtml events, and the content inside is
// just a regular Text event. There isn't a very good way to
// know how to collect all the content in-between. I'm not
// sure if this is easily fixable. It should be extremely
// rare, since script and style tags should almost always be
// blocks, and worse case you have some noise in the index.
body.push_str(&clean_html(&html));
}
Event::InlineMath(text) | Event::DisplayMath(text) => {
if in_heading {
heading.push_str(&text);
} else {
body.push_str(&text);
Node::Text(text) => {
if in_heading {
heading.push_str(text);
} else {
body.push_str(text);
}
}
}
Event::Start(_) | Event::End(_) | Event::Rule | Event::SoftBreak | Event::HardBreak => {
// Insert spaces where HTML output would usually separate text
// to ensure words don't get merged together
if in_heading {
heading.push(' ');
} else {
body.push(' ');
Node::Comment(_) => {}
Node::Fragment => {}
Node::RawData(_) => {}
},
Edge::Close(node) => match node.value() {
Node::Element(el) => {
if let Some(level) = el.heading_level()
&& level <= max_section_depth
{
in_heading = false;
breadcrumbs.push(heading.clone());
}
}
}
Event::Text(text) | Event::Code(text) => {
if in_heading {
heading.push_str(&text);
} else {
body.push_str(&text);
}
}
Event::FootnoteReference(name) => {
let len = footnote_numbers.len() + 1;
let number = footnote_numbers.entry(name).or_insert(len);
body.push_str(&format!(" [{number}] "));
}
Event::TaskListMarker(_checked) => {}
_ => {}
},
}
}
if !body.is_empty() || !heading.is_empty() {
// Make sure the last section is added to the index
let title = if heading.is_empty() {
if let Some(chapter) = breadcrumbs.first() {
chapter
@@ -239,14 +196,11 @@ fn render_item(
} else {
&heading
};
// Make sure the last section is added to the index
add_doc(
index,
doc_urls,
&anchor_base,
&heading,
&mut id_counter,
&section_id,
section_id.unwrap_or_default(),
&[title, &body, &breadcrumbs.join(" » ")],
);
}
@@ -316,37 +270,20 @@ fn write_to_json(index: Index, search_config: &Search, doc_urls: Vec<String>) ->
Ok(json_contents)
}
fn clean_html(html: &str) -> String {
static AMMONIA: LazyLock<ammonia::Builder<'static>> = LazyLock::new(|| {
let mut clean_content = HashSet::new();
clean_content.insert("script");
clean_content.insert("style");
let mut builder = ammonia::Builder::new();
builder
.tags(HashSet::new())
.tag_attributes(HashMap::new())
.generic_attributes(HashSet::new())
.link_rel(None)
.allowed_classes(HashMap::new())
.clean_content_tags(clean_content);
builder
});
AMMONIA.clean(html).to_string()
}
fn settings_path(ch: &Chapter) -> Option<&Path> {
ch.source_path.as_deref().or_else(|| ch.path.as_deref())
fn settings_path(ch: &Chapter) -> &Path {
ch.source_path
.as_deref()
.unwrap_or_else(|| ch.path.as_deref().unwrap())
}
fn validate_chapter_config(
chapter_configs: &[(PathBuf, SearchChapterSettings)],
book: &Book,
chapter_trees: &[ChapterTree<'_>],
) -> Result<()> {
for (path, _) in chapter_configs {
let found = book
.chapters()
.filter_map(|ch| settings_path(ch))
.any(|source_path| source_path.starts_with(path));
let found = chapter_trees
.iter()
.any(|ct| settings_path(ct.chapter).starts_with(path));
if !found {
bail!(
"[output.html.search.chapter] key `{}` does not match any chapter paths",
@@ -383,6 +320,12 @@ fn get_chapter_settings(
result
}
/// Replaces multiple consecutive whitespace characters with a single space character.
fn collapse_whitespace(text: &str) -> Cow<'_, str> {
static_regex!(WS, r"\s\s+");
WS.replace_all(text, " ")
}
#[test]
fn chapter_settings_priority() {
let cfg = r#"

View File

@@ -1,20 +1,8 @@
//! mdBook HTML renderer.
mod html;
mod html_handlebars;
pub mod theme;
pub(crate) mod utils;
pub use html_handlebars::HtmlHandlebars;
use mdbook_core::config::HtmlConfig;
use mdbook_markdown::HtmlRenderOptions;
use std::path::Path;
/// Creates an [`HtmlRenderOptions`] from the given config.
pub fn html_render_options_from_config<'a>(
path: &'a Path,
config: &HtmlConfig,
) -> HtmlRenderOptions<'a> {
let mut options = HtmlRenderOptions::new(path);
options.markdown_options.smart_punctuation = config.smart_punctuation;
options
}

View File

@@ -1,6 +1,43 @@
//! Utilities for processing HTML.
use std::path::Path;
use std::collections::HashMap;
use std::fmt::Write;
use std::path::{Component, Path, PathBuf};
/// Utility function to normalize path elements like `..`.
pub(crate) fn normalize_path(path: &Path) -> PathBuf {
let mut components = path.components().peekable();
let mut ret = if let Some(c @ Component::Prefix(..)) = components.peek().cloned() {
components.next();
PathBuf::from(c.as_os_str())
} else {
PathBuf::new()
};
for component in components {
match component {
Component::Prefix(..) => unreachable!(),
Component::RootDir => {
ret.push(Component::RootDir);
}
Component::CurDir => {}
Component::ParentDir => {
if ret.ends_with(Component::ParentDir) {
ret.push(Component::ParentDir);
} else {
let popped = ret.pop();
if !popped && !ret.has_root() {
ret.push(Component::ParentDir);
}
}
}
Component::Normal(c) => {
ret.push(c);
}
}
}
ret
}
/// Helper trait for converting a [`Path`] to a string suitable for an HTML path.
pub(crate) trait ToUrlPath {
@@ -14,3 +51,71 @@ impl ToUrlPath for Path {
self.to_str().unwrap().replace('\\', "/")
}
}
/// Make sure an HTML id is unique.
///
/// The `id_counter` map is used to ensure the ID is globally unique. If the
/// same id appears more than once, then it will have a number added to make
/// it unique.
pub(crate) fn unique_id(id: &str, id_counter: &mut HashMap<String, u32>) -> String {
let mut id = id.to_string();
let id_count = id_counter.entry(id.to_string()).or_insert(0);
if *id_count != 0 {
// FIXME: This should be a loop to ensure that the new ID is also unique.
write!(id, "-{id_count}").unwrap();
}
*id_count += 1;
id
}
/// Generates an HTML id from the given text.
pub(crate) fn id_from_content(content: &str) -> String {
content
.trim()
.chars()
.filter_map(|ch| {
if ch.is_alphanumeric() || ch == '_' || ch == '-' {
Some(ch.to_ascii_lowercase())
} else if ch.is_whitespace() {
Some('-')
} else {
None
}
})
.collect()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn it_generates_unique_ids() {
let mut id_counter = Default::default();
assert_eq!(unique_id("", &mut id_counter), "");
assert_eq!(unique_id("Über", &mut id_counter), "Über");
assert_eq!(unique_id("Über", &mut id_counter), "Über-1");
assert_eq!(unique_id("Über", &mut id_counter), "Über-2");
}
#[test]
fn it_normalizes_ids() {
assert_eq!(
id_from_content("`--passes`: add more rustdoc passes"),
"--passes-add-more-rustdoc-passes"
);
assert_eq!(
id_from_content("Method-call 🐙 expressions \u{1f47c}"),
"method-call--expressions-"
);
assert_eq!(id_from_content("_-_12345"), "_-_12345");
assert_eq!(id_from_content("12345"), "12345");
assert_eq!(id_from_content("中文"), "中文");
assert_eq!(id_from_content("にほんご"), "にほんご");
assert_eq!(id_from_content("한국어"), "한국어");
assert_eq!(id_from_content(""), "");
assert_eq!(id_from_content("中文標題 CJK title"), "中文標題-cjk-title");
assert_eq!(id_from_content("Über"), "Über");
}
}

View File

@@ -9,20 +9,11 @@
//! [`pulldown_cmark`] event stream. For example, it adjusts some links,
//! modifies the behavior of footnotes, and adds various HTML wrappers.
use pulldown_cmark::{CodeBlockKind, CowStr, Event, Options, Parser, Tag, TagEnd, html};
use regex::Regex;
use std::collections::HashMap;
use std::fmt::Write;
use std::path::Path;
use std::sync::LazyLock;
use tracing::warn;
use pulldown_cmark::{Options, Parser};
#[doc(inline)]
pub use pulldown_cmark;
#[cfg(test)]
mod tests;
/// Options for parsing markdown.
#[non_exhaustive]
pub struct MarkdownOptions {
@@ -43,28 +34,6 @@ impl Default for MarkdownOptions {
}
}
/// Options for converting markdown to HTML.
#[non_exhaustive]
pub struct HtmlRenderOptions<'a> {
/// Options for parsing markdown.
pub markdown_options: MarkdownOptions,
/// The chapter's location, relative to the `SUMMARY.md` file.
pub path: &'a Path,
/// If true, render for the print page.
pub for_print: bool,
}
impl<'a> HtmlRenderOptions<'a> {
/// Creates a new [`HtmlRenderOptions`].
pub fn new(path: &'a Path) -> HtmlRenderOptions<'a> {
HtmlRenderOptions {
markdown_options: MarkdownOptions::default(),
path,
for_print: false,
}
}
}
/// Creates a new pulldown-cmark parser of the given text.
pub fn new_cmark_parser<'text>(text: &'text str, options: &MarkdownOptions) -> Parser<'text> {
let mut opts = Options::empty();
@@ -78,346 +47,3 @@ pub fn new_cmark_parser<'text>(text: &'text str, options: &MarkdownOptions) -> P
}
Parser::new_ext(text, opts)
}
/// Renders markdown to HTML.
pub fn render_markdown(text: &str, options: &HtmlRenderOptions<'_>) -> String {
let mut body = String::with_capacity(text.len() * 3 / 2);
// Based on
// https://github.com/pulldown-cmark/pulldown-cmark/blob/master/pulldown-cmark/examples/footnote-rewrite.rs
// This handling of footnotes is a two-pass process. This is done to
// support linkbacks, little arrows that allow you to jump back to the
// footnote reference. The first pass collects the footnote definitions.
// The second pass modifies those definitions to include the linkbacks,
// and inserts the definitions back into the `events` list.
// This is a map of name -> (number, count)
// `name` is the name of the footnote.
// `number` is the footnote number displayed in the output.
// `count` is the number of references to this footnote (used for multiple
// linkbacks, and checking for unused footnotes).
let mut footnote_numbers = HashMap::new();
// This is a map of name -> Vec<Event>
// `name` is the name of the footnote.
// The events list is the list of events needed to build the footnote definition.
let mut footnote_defs = HashMap::new();
// The following are used when currently processing a footnote definition.
//
// This is the name of the footnote (escaped).
let mut in_footnote_name = String::new();
// This is the list of events to build the footnote definition.
let mut in_footnote = Vec::new();
// This is used to add space between consecutive footnotes. I was unable
// to figure out a way to do this just with pure CSS.
let mut prev_was_footnote = false;
let events = new_cmark_parser(text, &options.markdown_options)
.map(clean_codeblock_headers)
.map(|event| adjust_links(event, options))
.flat_map(|event| {
let (a, b) = wrap_tables(event);
a.into_iter().chain(b)
})
// Footnote rewriting must go last to ensure inner definition contents
// are processed (since they get pulled out of the initial stream).
.filter_map(|event| {
match event {
Event::Start(Tag::FootnoteDefinition(name)) => {
prev_was_footnote = false;
if !in_footnote.is_empty() {
warn!(
"internal bug: nested footnote not expected in {:?}",
options.path
);
}
in_footnote_name = special_escape(&name);
None
}
Event::End(TagEnd::FootnoteDefinition) => {
let def_events = std::mem::take(&mut in_footnote);
let name = std::mem::take(&mut in_footnote_name);
if footnote_defs.contains_key(&name) {
warn!(
"footnote `{name}` in {} defined multiple times - \
not updating to new definition",
options.path.display()
);
} else {
footnote_defs.insert(name, def_events);
}
None
}
Event::FootnoteReference(name) => {
let name = special_escape(&name);
let len = footnote_numbers.len() + 1;
let (n, count) = footnote_numbers.entry(name.clone()).or_insert((len, 0));
*count += 1;
let mut html = String::new();
if prev_was_footnote {
write!(html, " ").unwrap();
}
prev_was_footnote = true;
write!(
html,
"<sup class=\"footnote-reference\" id=\"fr-{name}-{count}\">\
<a href=\"#footnote-{name}\">{n}</a>\
</sup>"
)
.unwrap();
let html = Event::Html(html.into());
if in_footnote_name.is_empty() {
Some(html)
} else {
// While inside a footnote, we need to accumulate.
in_footnote.push(html);
None
}
}
// While inside a footnote, accumulate all events into a local.
_ if !in_footnote_name.is_empty() => {
in_footnote.push(event);
prev_was_footnote = false;
None
}
_ => {
prev_was_footnote = false;
Some(event)
}
}
});
html::push_html(&mut body, events);
if !footnote_defs.is_empty() {
add_footnote_defs(
&mut body,
options,
footnote_defs.into_iter().collect(),
&footnote_numbers,
);
}
body
}
/// Adds all footnote definitions into `body`.
fn add_footnote_defs(
body: &mut String,
options: &HtmlRenderOptions<'_>,
mut defs: Vec<(String, Vec<Event<'_>>)>,
numbers: &HashMap<String, (usize, u32)>,
) {
// Remove unused.
defs.retain(|(name, _)| {
if !numbers.contains_key(name) {
warn!(
"footnote `{name}` in `{}` is defined but not referenced",
options.path.display()
);
false
} else {
true
}
});
defs.sort_by_cached_key(|(name, _)| numbers[name].0);
body.push_str(
"<hr>\n\
<ol class=\"footnote-definition\">",
);
// Insert the backrefs to the definition, and put the definitions in the output.
for (name, mut fn_events) in defs {
let count = numbers[&name].1;
fn_events.insert(
0,
Event::Html(format!("<li id=\"footnote-{name}\">").into()),
);
// Generate the linkbacks.
for usage in 1..=count {
let nth = if usage == 1 {
String::new()
} else {
usage.to_string()
};
let backlink =
Event::Html(format!(" <a href=\"#fr-{name}-{usage}\">↩{nth}</a>").into());
if matches!(fn_events.last(), Some(Event::End(TagEnd::Paragraph))) {
// Put the linkback at the end of the last paragraph instead
// of on a line by itself.
fn_events.insert(fn_events.len() - 1, backlink);
} else {
// Not a clear place to put it in this circumstance, so put it
// at the end.
fn_events.push(backlink);
}
}
fn_events.push(Event::Html("</li>\n".into()));
html::push_html(body, fn_events.into_iter());
}
body.push_str("</ol>");
}
/// Wraps tables in a `.table-wrapper` class to apply overflow-x rules to.
fn wrap_tables(event: Event<'_>) -> (Option<Event<'_>>, Option<Event<'_>>) {
match event {
Event::Start(Tag::Table(_)) => (
Some(Event::Html(r#"<div class="table-wrapper">"#.into())),
Some(event),
),
Event::End(TagEnd::Table) => (Some(event), Some(Event::Html(r#"</div>"#.into()))),
_ => (Some(event), None),
}
}
fn clean_codeblock_headers(event: Event<'_>) -> Event<'_> {
match event {
Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(ref info))) => {
let info: String = info
.chars()
.map(|x| match x {
' ' | '\t' => ',',
_ => x,
})
.filter(|ch| !ch.is_whitespace())
.collect();
Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(CowStr::from(info))))
}
_ => event,
}
}
/// Fix links to the correct location.
///
/// This adjusts links, such as turning `.md` extensions to `.html`.
///
/// `path` is the path to the page being rendered relative to the root of the
/// book. This is used for the `print.html` page so that links on the print
/// page go to the original location. Normal page rendering sets `path` to
/// None. Ideally, print page links would link to anchors on the print page,
/// but that is very difficult.
fn adjust_links<'a>(event: Event<'a>, options: &HtmlRenderOptions<'_>) -> Event<'a> {
static SCHEME_LINK: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r"^[a-z][a-z0-9+.-]*:").unwrap());
static MD_LINK: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r"(?P<link>.*)\.md(?P<anchor>#.*)?").unwrap());
fn fix<'a>(dest: CowStr<'a>, options: &HtmlRenderOptions<'_>) -> CowStr<'a> {
if dest.starts_with('#') {
// Fragment-only link.
if options.for_print {
let mut base = options.path.display().to_string();
if base.ends_with(".md") {
base.replace_range(base.len() - 3.., ".html");
}
return format!("{base}{dest}").into();
} else {
return dest;
}
}
// Don't modify links with schemes like `https`.
if !SCHEME_LINK.is_match(&dest) {
// This is a relative link, adjust it as necessary.
let mut fixed_link = String::new();
if options.for_print {
let base = options
.path
.parent()
.expect("path can't be empty")
.to_str()
.expect("utf-8 paths only");
if !base.is_empty() {
write!(fixed_link, "{base}/").unwrap();
}
}
if let Some(caps) = MD_LINK.captures(&dest) {
fixed_link.push_str(&caps["link"]);
fixed_link.push_str(".html");
if let Some(anchor) = caps.name("anchor") {
fixed_link.push_str(anchor.as_str());
}
} else {
fixed_link.push_str(&dest);
};
return CowStr::from(fixed_link);
}
dest
}
fn fix_html<'a>(html: CowStr<'a>, options: &HtmlRenderOptions<'_>) -> CowStr<'a> {
// This is a terrible hack, but should be reasonably reliable. Nobody
// should ever parse a tag with a regex. However, there isn't anything
// in Rust that I know of that is suitable for handling partial html
// fragments like those generated by pulldown_cmark.
//
// There are dozens of HTML tags/attributes that contain paths, so
// feel free to add more tags if desired; these are the only ones I
// care about right now.
static HTML_LINK: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r#"(<(?:a|img) [^>]*?(?:src|href)=")([^"]+?)""#).unwrap());
HTML_LINK
.replace_all(&html, |caps: &regex::Captures<'_>| {
let fixed = fix(caps[2].into(), options);
format!("{}{}\"", &caps[1], fixed)
})
.into_owned()
.into()
}
match event {
Event::Start(Tag::Link {
link_type,
dest_url,
title,
id,
}) => Event::Start(Tag::Link {
link_type,
dest_url: fix(dest_url, options),
title,
id,
}),
Event::Start(Tag::Image {
link_type,
dest_url,
title,
id,
}) => Event::Start(Tag::Image {
link_type,
dest_url: fix(dest_url, options),
title,
id,
}),
Event::Html(html) => Event::Html(fix_html(html, options)),
Event::InlineHtml(html) => Event::InlineHtml(fix_html(html, options)),
_ => event,
}
}
/// Escape characters to make it safe for an HTML string.
pub fn special_escape(mut s: &str) -> String {
let mut escaped = String::with_capacity(s.len());
let needs_escape: &[char] = &['<', '>', '\'', '"', '\\', '&'];
while let Some(next) = s.find(needs_escape) {
escaped.push_str(&s[..next]);
match s.as_bytes()[next] {
b'<' => escaped.push_str("&lt;"),
b'>' => escaped.push_str("&gt;"),
b'\'' => escaped.push_str("&#39;"),
b'"' => escaped.push_str("&quot;"),
b'\\' => escaped.push_str("&#92;"),
b'&' => escaped.push_str("&amp;"),
_ => unreachable!(),
}
s = &s[next + 1..];
}
escaped.push_str(s);
escaped
}

View File

@@ -1,153 +0,0 @@
use super::render_markdown;
use super::*;
#[test]
fn escaped_special() {
assert_eq!(special_escape(""), "");
assert_eq!(special_escape("<"), "&lt;");
assert_eq!(special_escape(">"), "&gt;");
assert_eq!(special_escape("<>"), "&lt;&gt;");
assert_eq!(special_escape("<test>"), "&lt;test&gt;");
assert_eq!(special_escape("a<test>b"), "a&lt;test&gt;b");
assert_eq!(special_escape("'"), "&#39;");
assert_eq!(special_escape("\\"), "&#92;");
assert_eq!(special_escape("&"), "&amp;");
}
#[test]
fn preserves_external_links() {
let options = HtmlRenderOptions::new(&Path::new(""));
assert_eq!(
render_markdown("[example](https://www.rust-lang.org/)", &options),
"<p><a href=\"https://www.rust-lang.org/\">example</a></p>\n"
);
}
#[test]
fn it_can_adjust_markdown_links() {
let options = HtmlRenderOptions::new(&Path::new(""));
assert_eq!(
render_markdown("[example](example.md)", &options),
"<p><a href=\"example.html\">example</a></p>\n"
);
assert_eq!(
render_markdown("[example_anchor](example.md#anchor)", &options),
"<p><a href=\"example.html#anchor\">example_anchor</a></p>\n"
);
// this anchor contains 'md' inside of it
assert_eq!(
render_markdown("[phantom data](foo.html#phantomdata)", &options),
"<p><a href=\"foo.html#phantomdata\">phantom data</a></p>\n"
);
}
#[test]
fn it_can_wrap_tables() {
let src = r#"
| Original | Punycode | Punycode + Encoding |
|-----------------|-----------------|---------------------|
| føø | f-5gaa | f_5gaa |
"#;
let out = r#"
<div class="table-wrapper"><table><thead><tr><th>Original</th><th>Punycode</th><th>Punycode + Encoding</th></tr></thead><tbody>
<tr><td>føø</td><td>f-5gaa</td><td>f_5gaa</td></tr>
</tbody></table>
</div>
"#.trim();
let options = HtmlRenderOptions::new(&Path::new(""));
assert_eq!(render_markdown(src, &options), out);
}
#[test]
fn it_can_keep_quotes_straight() {
let mut options = HtmlRenderOptions::new(&Path::new(""));
options.markdown_options.smart_punctuation = false;
assert_eq!(render_markdown("'one'", &options), "<p>'one'</p>\n");
}
#[test]
fn it_can_make_quotes_curly_except_when_they_are_in_code() {
let input = r#"
'one'
```
'two'
```
`'three'` 'four'"#;
let expected = r#"<p>one</p>
<pre><code>'two'
</code></pre>
<p><code>'three'</code> four</p>
"#;
let options = HtmlRenderOptions::new(&Path::new(""));
assert_eq!(render_markdown(input, &options), expected);
}
#[test]
fn whitespace_outside_of_codeblock_header_is_preserved() {
let input = r#"
some text with spaces
```rust
fn main() {
// code inside is unchanged
}
```
more text with spaces
"#;
let expected = r#"<p>some text with spaces</p>
<pre><code class="language-rust">fn main() {
// code inside is unchanged
}
</code></pre>
<p>more text with spaces</p>
"#;
let options = HtmlRenderOptions::new(&Path::new(""));
assert_eq!(render_markdown(input, &options), expected);
}
#[test]
fn rust_code_block_properties_are_passed_as_space_delimited_class() {
let input = r#"
```rust,no_run,should_panic,property_3
```
"#;
let expected = r#"<pre><code class="language-rust,no_run,should_panic,property_3"></code></pre>
"#;
let options = HtmlRenderOptions::new(&Path::new(""));
assert_eq!(render_markdown(input, &options), expected);
}
#[test]
fn rust_code_block_properties_with_whitespace_are_passed_as_space_delimited_class() {
let input = r#"
```rust, no_run,,,should_panic , ,property_3
```
"#;
let expected = r#"<pre><code class="language-rust,,,,,no_run,,,should_panic,,,,property_3"></code></pre>
"#;
let options = HtmlRenderOptions::new(&Path::new(""));
assert_eq!(render_markdown(input, &options), expected);
}
#[test]
fn rust_code_block_without_properties_has_proper_html_class() {
let input = r#"
```rust
```
"#;
let expected = r#"<pre><code class="language-rust"></code></pre>
"#;
let options = HtmlRenderOptions::new(&Path::new(""));
assert_eq!(render_markdown(input, &options), expected);
let input = r#"
```rust
```
"#;
let options = HtmlRenderOptions::new(&Path::new(""));
assert_eq!(render_markdown(input, &options), expected);
}

View File

@@ -20,7 +20,7 @@ To make it easier to run, put the path to the binary into your `PATH`.
To build the `mdbook` executable from source, you will first need to install Rust and Cargo.
Follow the instructions on the [Rust installation page].
mdBook currently requires at least Rust version 1.85.
mdBook currently requires at least Rust version 1.88.
Once you have installed Rust, the following command can be used to build and install mdBook:

View File

@@ -31,10 +31,10 @@ fn anchored_include() {
"book/anchors.html",
str![[r##"
<h1 id="include-anchors"><a class="header" href="#include-anchors">Include Anchors</a></h1>
<pre><pre class="playground"><code class="language-rust"><span class="boring">#![allow(unused)]
<pre class="playground"><code class="language-rust"><span class="boring">#![allow(unused)]
</span><span class="boring">fn main() {
</span>let x = 1;
<span class="boring">}</span></code></pre></pre>
<span class="boring">}</span></code></pre>
"##]],
);
}
@@ -77,12 +77,12 @@ fn playground_include() {
.check_main_file("book/playground.html",
str![[r##"
<h1 id="playground-includes"><a class="header" href="#playground-includes">Playground Includes</a></h1>
<pre><pre class="playground"><code class="language-rust">fn main() {
<pre class="playground"><code class="language-rust">fn main() {
println!("Hello World!");
<span class="boring">
</span><span class="boring"> // You can even hide lines! :D
</span><span class="boring"> println!("I am hidden! Expand the code snippet to see me");
</span>}</code></pre></pre>
</span>}</code></pre>
"##]]);
}
@@ -94,20 +94,20 @@ fn rustdoc_include() {
str![[r##"
<h1 id="rustdoc-includes"><a class="header" href="#rustdoc-includes">Rustdoc Includes</a></h1>
<h2 id="rustdoc-include-adds-the-rest-of-the-file-as-hidden"><a class="header" href="#rustdoc-include-adds-the-rest-of-the-file-as-hidden">Rustdoc include adds the rest of the file as hidden</a></h2>
<pre><pre class="playground"><code class="language-rust"><span class="boring">fn some_function() {
<pre class="playground"><code class="language-rust"><span class="boring">fn some_function() {
</span><span class="boring"> println!("some function");
</span><span class="boring">}
</span><span class="boring">
</span>fn main() {
some_function();
}</code></pre></pre>
}</code></pre>
<h2 id="rustdoc-include-works-with-anchors-too"><a class="header" href="#rustdoc-include-works-with-anchors-too">Rustdoc include works with anchors too</a></h2>
<pre><pre class="playground"><code class="language-rust"><span class="boring">fn some_other_function() {
<pre class="playground"><code class="language-rust"><span class="boring">fn some_other_function() {
</span><span class="boring"> println!("unused anchor");
</span><span class="boring">}
</span><span class="boring">
</span>fn main() {
some_other_function();
}</code></pre></pre>
}</code></pre>
"##]]);
}

View File

@@ -9,9 +9,9 @@ fn custom_header_attributes() {
BookTest::from_dir("markdown/custom_header_attributes")
.check_main_file("book/custom_header_attributes.html", str![[r##"
<h1 id="attrs"><a class="header" href="#attrs">Heading Attributes</a></h1>
<h2 id="heading-with-classes" class="class1 class2"><a class="header" href="#heading-with-classes">Heading with classes</a></h2>
<h2 class="class1 class2" id="heading-with-classes"><a class="header" href="#heading-with-classes">Heading with classes</a></h2>
<h2 id="both" class="class1 class2"><a class="header" href="#both">Heading with id and classes</a></h2>
<h2 id="myh3" class="myclass1 myclass2" myattr="" otherattr="value">Heading with attribute</h2>
<h2 myattr="" otherattr="value" id="myh3" class="myclass1 myclass2"><a class="header" href="#myh3">Heading with attribute</a></h2>
"##]]);
}
@@ -25,8 +25,6 @@ fn footnotes() {
INFO Running the html backend
WARN footnote `multiple-definitions` in footnotes.md defined multiple times - not updating to new definition
WARN footnote `unused` in `footnotes.md` is defined but not referenced
WARN footnote `multiple-definitions` in footnotes.md defined multiple times - not updating to new definition
WARN footnote `unused` in `footnotes.md` is defined but not referenced
INFO HTML book written to `[ROOT]/book`
"#]]);
@@ -44,16 +42,29 @@ fn tables() {
"book/tables.html",
str![[r##"
<h1 id="tables"><a class="header" href="#tables">Tables</a></h1>
<div class="table-wrapper"><table><thead><tr><th>foo</th><th>bar</th></tr></thead><tbody>
<div class="table-wrapper">
<table>
<thead>
<tr><th>foo</th><th>bar</th></tr>
</thead>
<tbody>
<tr><td>baz</td><td>bim</td></tr>
<tr><td>Backslash in code</td><td><code>/</code></td></tr>
<tr><td>Double back in code</td><td><code>//</code></td></tr>
<tr><td>Pipe in code</td><td><code>|</code></td></tr>
<tr><td>Pipe in code2</td><td><code>test | inside</code></td></tr>
</tbody></table>
</div><div class="table-wrapper"><table><thead><tr><th>Neither</th><th style="text-align: left">Left</th><th style="text-align: center">Center</th><th style="text-align: right">Right</th></tr></thead><tbody>
</tbody>
</table>
</div>
<div class="table-wrapper">
<table>
<thead>
<tr><th>Neither</th><th style="text-align: left">Left</th><th style="text-align: center">Center</th><th style="text-align: right">Right</th></tr>
</thead>
<tbody>
<tr><td>one</td><td style="text-align: left">two</td><td style="text-align: center">three</td><td style="text-align: right">four</td></tr>
</tbody></table>
</tbody>
</table>
</div>
"##]],
);
@@ -79,12 +90,9 @@ fn tasklists() {
str![[r##"
<h2 id="tasklisks"><a class="header" href="#tasklisks">Tasklisks</a></h2>
<ul>
<li><input disabled="" type="checkbox" checked=""/>
Apples</li>
<li><input disabled="" type="checkbox" checked=""/>
Broccoli</li>
<li><input disabled="" type="checkbox"/>
Carrots</li>
<li><input disabled="" type="checkbox" checked=""> Apples</li>
<li><input disabled="" type="checkbox" checked=""> Broccoli</li>
<li><input disabled="" type="checkbox"> Carrots</li>
</ul>
"##]],
);

View File

@@ -1,7 +1,6 @@
<h1 id="blockquotes"><a class="header" href="#blockquotes">Blockquotes</a></h1>
<p>Empty:</p>
<blockquote>
</blockquote>
<blockquote></blockquote>
<p>Normal:</p>
<blockquote>
<p>foo
@@ -9,10 +8,10 @@ bar</p>
</blockquote>
<p>Contains code block:</p>
<blockquote>
<pre><pre class="playground"><code class="language-rust"><span class="boring">#![allow(unused)]
<pre class="playground"><code class="language-rust"><span class="boring">#![allow(unused)]
</span><span class="boring">fn main() {
</span>let x = 1;
<span class="boring">}</span></code></pre></pre>
<span class="boring">}</span></code></pre>
</blockquote>
<p>Random stuff:</p>
<blockquote>

View File

@@ -1,22 +1,22 @@
<h1 id="code-blocks"><a class="header" href="#code-blocks">Code blocks</a></h1>
<pre><code>This is a codeblock
</code></pre>
<pre><pre class="playground"><code class="language-rust"><span class="boring">#![allow(unused)]
<pre class="playground"><code class="language-rust"><span class="boring">#![allow(unused)]
</span><span class="boring">fn main() {
</span>// This links to a playpen
<span class="boring">}</span></code></pre></pre>
<span class="boring">}</span></code></pre>
<pre><code class="language-bash editable"># This is an editable codeblock
</code></pre>
<pre><code class="language-text cls1 cls2 cls3">Text with different classes.
<pre><code class="language-text cls1 cls2 cls3">Text with different classes.
</code></pre>
<pre><code>Indented
code
block.
</code></pre>
<pre><pre class="playground"><code class="language-rust edition2021"><span class="boring">#![allow(unused)]
<pre class="playground"><code class="language-rust edition2021"><span class="boring">#![allow(unused)]
</span><span class="boring">fn main() {
</span>let x = 1;
<span class="boring">}</span></code></pre></pre>
<pre><pre class="playground"><code class="language-rust">fn main() {
<span class="boring">}</span></code></pre>
<pre class="playground"><code class="language-rust">fn main() {
println!("hello");
}</code></pre></pre>
}</code></pre>

View File

@@ -20,18 +20,14 @@ comment
<col span="2" class="a">
</colgroup>
</table>
<p><embed
type="image/jpeg"
src="/image.jpg"
width="100"
height="200"></p>
<p><embed type="image/jpeg" src="/image.jpg" width="100" height="200"></p>
<p>Rule:</p>
<hr>
<img src="example.jpg">
<input type="text">
<link href="example.css" rel="stylesheet">
<p><meta name="example"
content="Example content"></p>
<p><meta name="example" content="Example content"></p>
<video>
<source src="video.webm" type="video/webm">
<track kind="captions" src="captions.vtt" srclang="en">
@@ -40,12 +36,14 @@ comment
<div>
A block HTML element trying to do *markup*.
</div>
<div>
<p>A block HTML with spaces that <strong>cause</strong> it to be interleaved with markdown.</p>
</div>
<h2 id="scripts"><a class="header" href="#scripts">Scripts</a></h2>
<script></script>
<script async src="foo.js"></script>
<script async="" src="foo.js"></script>
<script>
const x = 'some *text* inside';
@@ -62,6 +60,6 @@ const x = 'some *text* inside';
*/
</style>
<style media="(width < 500px)">
<style media="(width &lt; 500px)">
.bar { background-color: green }
</style>

View File

@@ -1,3 +1,3 @@
<h1 id="images"><a class="header" href="#images">Images</a></h1>
<p><img src="https://rust-lang.org/logos/rust-logo-256x256.png" alt="Image “alt” &amp; &quot; &quot;text&quot; &amp; &lt;stuff&gt; url &lt;em&gt;html&lt;/em&gt; — hard break " /></p>
<p><img src="https://rust-lang.org/logos/rust-logo-256x256.png" alt="Image with title" title="Some title" /></p>
<p><img src="https://rust-lang.org/logos/rust-logo-256x256.png" alt="Image “alt” &amp; &quot; &quot;text&quot; &amp; &lt;stuff&gt; url &lt;em&gt;html&lt;/em&gt; — hard break "></p>
<p><img src="https://rust-lang.org/logos/rust-logo-256x256.png" title="Some title" alt="Image with title"></p>

View File

@@ -1,8 +1,6 @@
<h1 id="inlines"><a class="header" href="#inlines">Inlines</a></h1>
<p><em>emphasis</em> <strong>bold</strong> <strong><em>bold emphasis</em></strong></p>
<p>Some <code>inline code</code>.</p>
<p>Hard<br />
break</p>
<p>Invisible hard<br />
break</p>
<p>Hard<br>break</p>
<p>Invisible hard<br>break</p>
<p>[escaped] &lt;html&gt; *here*</p>

View File

@@ -5,7 +5,7 @@
<li>Ordered</li>
<li>List</li>
</ol>
<hr />
<hr>
<ol>
<li>A
<ol>
@@ -17,19 +17,19 @@
<li>Still</li>
<li>Normal</li>
</ol>
<hr />
<hr>
<ol start="7">
<li>Start list</li>
<li>with a different number.</li>
</ol>
<hr />
<hr>
<ul>
<li>An</li>
<li>Unordered</li>
<li>Normal</li>
<li>List</li>
</ul>
<hr />
<hr>
<ul>
<li>Nested
<ul>
@@ -38,7 +38,7 @@
</li>
<li>List</li>
</ul>
<hr />
<hr>
<ul>
<li>This
<ol>

View File

@@ -9,7 +9,8 @@
<p>And another<sup class="footnote-reference" id="fr-in-between-1"><a href="#footnote-in-between">8</a></sup> that references the duplicate again.<sup class="footnote-reference" id="fr-multiple-definitions-2"><a href="#footnote-multiple-definitions">7</a></sup></p>
<p>Multiple footnotes in a row.<sup class="footnote-reference" id="fr-a-1"><a href="#footnote-a">9</a></sup> <sup class="footnote-reference" id="fr-b-1"><a href="#footnote-b">10</a></sup> <sup class="footnote-reference" id="fr-c-1"><a href="#footnote-c">11</a></sup></p>
<hr>
<ol class="footnote-definition"><li id="footnote-1">
<ol class="footnote-definition">
<li id="footnote-1">
<p>This is a footnote. <a href="#fr-1-1"></a> <a href="#fr-1-2">↩2</a></p>
</li>
<li id="footnote-word">

View File

@@ -9,10 +9,10 @@ fn playground_on_rust_code() {
"book/index.html",
str![[r##"
<h1 id="rust-sample"><a class="header" href="#rust-sample">Rust Sample</a></h1>
<pre><pre class="playground"><code class="language-rust"><span class="boring">#![allow(unused)]
<pre class="playground"><code class="language-rust"><span class="boring">#![allow(unused)]
</span><span class="boring">fn main() {
</span>let x = 1;
<span class="boring">}</span></code></pre></pre>
<span class="boring">}</span></code></pre>
"##]],
);
}

View File

@@ -1,5 +1,9 @@
<h1 id="chapter-1"><a class="header" href="#chapter-1">Chapter 1</a></h1>
<p>See <a href="chapter_2.html">chapter 2</a>.</p>
<div style="break-before: page; page-break-before: always;"></div><p>See <a href="./chapter_2.html">this</a>.</p>
<div style="break-before: page; page-break-before: always;"></div><h2 id="h2-instead"><a class="header" href="#h2-instead">H2 instead</a></h2>
<p>See <a href="#chapter-2">chapter 2</a>.</p>
<div style="break-before: page; page-break-before: always;"></div>
<h1 id="chapter-2"><a href="#chapter-2" class="header">Chapter 2</a></h1>
<p>See <a href="#chapter-2">this</a>.</p>
<div style="break-before: page; page-break-before: always;"></div>
<h1 id="h2-instead-1"><a href="#h2-instead-1" class="header">H2 instead</a></h1>
<h2 id="h2-instead"><a class="header" href="#h2-instead">H2 instead</a></h2>
<p>This has H2 instead of H1.</p>

View File

@@ -1,11 +1,12 @@
<h1 id="chapter-1"><a class="header" href="#chapter-1">Chapter 1</a></h1>
<h2 id="some-title"><a class="header" href="#some-title">Some title</a></h2>
<p>See <a href="chapter_2.html#some-title">other</a></p>
<p>See <a href="chapter_1.html#some-title">this</a></p>
<p>See <a href="chapter_1.html#some-title">this anchor only</a></p>
<div style="break-before: page; page-break-before: always;"></div><h1 id="chapter-2"><a class="header" href="#chapter-2">Chapter 2</a></h1>
<p>See <a href="#some-title-1">other</a></p>
<p>See <a href="#some-title">this</a></p>
<p>See <a href="#some-title">this anchor only</a></p>
<div style="break-before: page; page-break-before: always;"></div>
<h1 id="chapter-2"><a class="header" href="#chapter-2">Chapter 2</a></h1>
<h2 id="some-title-1"><a class="header" href="#some-title-1">Some title</a></h2>
<p>See <a href="chapter_1.html#some-title">other</a></p>
<p>See <a href="chapter_2.html#some-title">this</a></p>
<p>See <a href="chapter_2.html#some-title">this anchor only</a></p>
<p><a href="chapter_1.html#some-title">Works with HTML extension too</a></p>
<p>See <a href="#some-title">other</a></p>
<p>See <a href="#some-title-1">this</a></p>
<p>See <a href="#some-title-1">this anchor only</a></p>
<p><a href="#some-title">Works with HTML extension too</a></p>

View File

@@ -1,15 +1,17 @@
<h1 id="first-chapter"><a class="header" href="#first-chapter">First Chapter</a></h1>
<div style="break-before: page; page-break-before: always;"></div><h1 id="first-nested"><a class="header" href="#first-nested">First Nested</a></h1>
<div style="break-before: page; page-break-before: always;"></div><h1 id="testing-relative-links-for-the-print-page"><a class="header" href="#testing-relative-links-for-the-print-page">Testing relative links for the print page</a></h1>
<p>When we link to <a href="second/../first/nested.html">the first section</a>, it should work on
<div style="break-before: page; page-break-before: always;"></div>
<h1 id="first-nested"><a class="header" href="#first-nested">First Nested</a></h1>
<div style="break-before: page; page-break-before: always;"></div>
<h1 id="testing-relative-links-for-the-print-page"><a class="header" href="#testing-relative-links-for-the-print-page">Testing relative links for the print page</a></h1>
<p>When we link to <a href="#first-nested">the first section</a>, it should work on
both the print page and the non-print page.</p>
<p>The same link should work <a href="second/../first/nested.html">with an html extension</a>.</p>
<p>A <a href="second/nested.html#some-section">fragment link</a> should work.</p>
<p>Link <a href="second/../../std/foo/bar.html">outside</a>.</p>
<p>Link <a href="second/../../std/foo/bar.html#panic">outside with anchor</a>.</p>
<p><img src="second/../images/picture.png" alt="Some image" /></p>
<p><a href="second/../first/nested.html">HTML Link</a></p>
<img src="second/../images/picture.png" alt="raw html">
<p>The same link should work <a href="#first-nested">with an html extension</a>.</p>
<p>A <a href="#some-section">fragment link</a> should work.</p>
<p>Link <a href="../std/foo/bar.html">outside</a>.</p>
<p>Link <a href="../std/foo/bar.html#panic">outside with anchor</a>.</p>
<p><img src="images/picture.png" alt="Some image"></p>
<p><a href="#first-nested">HTML Link</a></p>
<img src="images/picture.png" alt="raw html">
<h2 id="some-section"><a class="header" href="#some-section">Some section</a></h2>
<p><a href="https://example.com/foo.html#bar">Links with scheme shouldnt be touched.</a></p>
<p><a href="second/../images/not-html?arg1&arg2#with-anchor">Non-html link</a></p>
<p><a href="images/not-html?arg1&amp;arg2#with-anchor">Non-html link</a></p>

View File

@@ -82,28 +82,26 @@ fn language_rust_playground() {
"x()",
"",
str![[r#"
<pre><pre class="playground"><code class="language-rust"><span class="boring">#![allow(unused)]
<pre class="playground"><code class="language-rust"><span class="boring">#![allow(unused)]
</span><span class="boring">fn main() {
</span>x()
<span class="boring">}</span></code></pre></pre>
<span class="boring">}</span></code></pre>
"#]],
);
// `fn main` should not be wrapped, not boring.
expect(
"fn main() {}",
"",
str![[
r#"<pre><pre class="playground"><code class="language-rust">fn main() {}</code></pre></pre>"#
]],
str![[r#"<pre class="playground"><code class="language-rust">fn main() {}</code></pre>"#]],
);
// Lines starting with `#` are boring.
expect(
"let s = \"foo\n # bar\n\";",
"editable",
str![[r#"
<pre><pre class="playground"><code class="language-rust editable">let s = "foo
<pre class="playground"><code class="language-rust editable">let s = "foo
<span class="boring"> bar
</span>";</code></pre></pre>
</span>";</code></pre>
"#]],
);
// `##` is not boring and is used as an escape.
@@ -111,9 +109,9 @@ fn language_rust_playground() {
"let s = \"foo\n ## bar\n\";",
"editable",
str![[r#"
<pre><pre class="playground"><code class="language-rust editable">let s = "foo
<pre class="playground"><code class="language-rust editable">let s = "foo
# bar
";</code></pre></pre>
";</code></pre>
"#]],
);
// `#` on a line by itself is boring.
@@ -121,10 +119,10 @@ fn language_rust_playground() {
"let s = \"foo\n # bar\n#\n\";",
"editable",
str![[r#"
<pre><pre class="playground"><code class="language-rust editable">let s = "foo
<pre class="playground"><code class="language-rust editable">let s = "foo
<span class="boring"> bar
</span><span class="boring">
</span>";</code></pre></pre>
</span>";</code></pre>
"#]],
);
// `#` must be followed by a space to be boring.
@@ -132,10 +130,10 @@ fn language_rust_playground() {
"#x;",
"",
str![[r#"
<pre><pre class="playground"><code class="language-rust"><span class="boring">#![allow(unused)]
<pre class="playground"><code class="language-rust"><span class="boring">#![allow(unused)]
</span><span class="boring">fn main() {
</span>#x;
<span class="boring">}</span></code></pre></pre>
<span class="boring">}</span></code></pre>
"#]],
);
@@ -155,9 +153,9 @@ fn language_rust_playground() {
"#![no_std]\nlet s = \"foo\";\n #[some_attr]",
"editable",
str![[r#"
<pre><pre class="playground"><code class="language-rust editable">#![no_std]
<pre class="playground"><code class="language-rust editable">#![no_std]
let s = "foo";
#[some_attr]</code></pre></pre>
#[some_attr]</code></pre>
"#]],
);
}
@@ -183,12 +181,12 @@ fn code_block_in_list() {
<ul>
<li>
<p>inside list</p>
<pre><pre class="playground"><code class="language-rust"><span class="boring">#![allow(unused)]
<pre class="playground"><code class="language-rust"><span class="boring">#![allow(unused)]
</span><span class="boring">fn main() {
</span>fn foo() {
let x = 1;
}
<span class="boring">}</span></code></pre></pre>
<span class="boring">}</span></code></pre>
</li>
</ul>
"#]],
@@ -210,14 +208,14 @@ fn busted_end_tag() {
cmd.expect_stderr(str![[r#"
INFO Book building has started
INFO Running the html backend
WARN html parse error in `chapter_1.md`: Self-closing end tag
Html text was:
<div>x<span>foo</span/>y</div>
INFO HTML book written to `[ROOT]/book`
"#]]);
})
.check_main_file(
"book/chapter_1.html",
str!["<div>x<span>foo</span/>y</div>"],
);
.check_main_file("book/chapter_1.html", str!["<div>x<span>foo</span>y</div>"]);
}
// Various html blocks.

View File

@@ -1,13 +1,13 @@
<h1 id="chapter-1"><a class="header" href="#chapter-1">Chapter 1</a></h1>
<pre><pre class="playground"><code class="language-rust edition2021"><span class="boring">#![allow(unused)]
<pre class="playground"><code class="language-rust edition2021"><span class="boring">#![allow(unused)]
</span><span class="boring">fn main() {
</span>let x = 2021;
<span class="boring">}</span></code></pre></pre>
<pre><pre class="playground"><code class="language-rust edition2021"><span class="boring">#![allow(unused)]
<span class="boring">}</span></code></pre>
<pre class="playground"><code class="language-rust edition2021"><span class="boring">#![allow(unused)]
</span><span class="boring">fn main() {
</span>let x = 2021;
<span class="boring">}</span></code></pre></pre>
<pre><pre class="playground"><code class="language-rust edition2024 edition2021"><span class="boring">#![allow(unused)]
<span class="boring">}</span></code></pre>
<pre class="playground"><code class="language-rust edition2024"><span class="boring">#![allow(unused)]
</span><span class="boring">fn main() {
</span>let x = 2024;
<span class="boring">}</span></code></pre></pre>
<span class="boring">}</span></code></pre>

View File

@@ -1,8 +1,8 @@
<h1 id="chapter-1"><a class="header" href="#chapter-1">Chapter 1</a></h1>
<pre><pre class="playground"><code class="language-rust editable">fn f() {
<pre class="playground"><code class="language-rust editable">fn f() {
println!("hello");
}</code></pre></pre>
<pre><pre class="playground"><code class="language-rust"><span class="boring">#![allow(unused)]
}</code></pre>
<pre class="playground"><code class="language-rust"><span class="boring">#![allow(unused)]
</span><span class="boring">fn main() {
</span>// Not editable.
<span class="boring">}</span></code></pre></pre>
<span class="boring">}</span></code></pre>

View File

@@ -1,5 +1,5 @@
<h1 id="chapter-1"><a class="header" href="#chapter-1">Chapter 1</a></h1>
<p><span id="example1" class="fa-svg extra-class"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><!--! Font Awesome Free 6.2.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2022 Fonticons, Inc. --><path d="M47.6 300.4L228.3 469.1c7.5 7 17.4 10.9 27.7 10.9s20.2-3.9 27.7-10.9L464.4 300.4c30.4-28.3 47.6-68 47.6-109.5v-5.8c0-69.9-50.5-129.5-119.4-141C347 36.5 300.6 51.4 268 84L256 96 244 84c-32.6-32.6-79-47.5-124.6-39.9C50.5 55.6 0 115.2 0 185.1v5.8c0 41.5 17.2 81.2 47.6 109.5z"/></svg></span></p>
<p><span class="fa-svg extra-class" id="example1"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><!--! Font Awesome Free 6.2.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2022 Fonticons, Inc. --><path d="M47.6 300.4L228.3 469.1c7.5 7 17.4 10.9 27.7 10.9s20.2-3.9 27.7-10.9L464.4 300.4c30.4-28.3 47.6-68 47.6-109.5v-5.8c0-69.9-50.5-129.5-119.4-141C347 36.5 300.6 51.4 268 84L256 96 244 84c-32.6-32.6-79-47.5-124.6-39.9C50.5 55.6 0 115.2 0 185.1v5.8c0 41.5 17.2 81.2 47.6 109.5z"/></svg></span></p>
<p><span class="fa-svg"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><!--! Font Awesome Free 6.2.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2022 Fonticons, Inc. --><path d="M272 304h-96C78.8 304 0 382.8 0 480c0 17.67 14.33 32 32 32h384c17.67 0 32-14.33 32-32C448 382.8 369.2 304 272 304zM48.99 464C56.89 400.9 110.8 352 176 352h96c65.16 0 119.1 48.95 127 112H48.99zM224 256c70.69 0 128-57.31 128-128c0-70.69-57.31-128-128-128S96 57.31 96 128C96 198.7 153.3 256 224 256zM224 48c44.11 0 80 35.89 80 80c0 44.11-35.89 80-80 80S144 172.1 144 128C144 83.89 179.9 48 224 48z"/></svg></span></p>
<p><span class="fa-svg"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><!--! Font Awesome Free 6.2.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2022 Fonticons, Inc. --><path d="M448 48V384C385 407 366 416 329 416C266 416 242 384 179 384C159 384 143 388 128 392V328C143 324 159 320 179 320C242 320 266 352 329 352C349 352 364 349 384 343V135C364 141 349 144 329 144C266 144 242 112 179 112C128 112 104 133 64 141V448C64 466 50 480 32 480S0 466 0 448V64C0 46 14 32 32 32S64 46 64 64V77C104 69 128 48 179 48C242 48 266 80 329 80C366 80 385 71 448 48Z"/></svg></span></p>
<p><i class="fas fa-heart">Some text here.</i></p>

View File

@@ -11,7 +11,7 @@
</span><span class="boring"> hidden()
</span> nothidden()
</code></pre>
<pre><pre class="playground"><code class="language-rust"><span class="boring">#![allow(unused)]
<pre class="playground"><code class="language-rust"><span class="boring">#![allow(unused)]
</span>#![allow(something)]
<span class="boring">fn main() {
</span><span class="boring">
@@ -20,4 +20,4 @@
</span># not_hidden();
#[not_hidden]
not_hidden();
<span class="boring">}</span></code></pre></pre>
<span class="boring">}</span></code></pre>

View File

@@ -63,7 +63,7 @@ fn reasonable_search_index() {
// html.
assert_eq!(
docs[&sneaky]["body"],
"I put &lt;HTML&gt; in here! Sneaky inline event alert(“inline”);. But regular inline is indexed."
"I put <HTML> in here! Sneaky inline event . But regular inline is indexed."
);
assert_eq!(
docs[&no_headers]["breadcrumbs"],

File diff suppressed because one or more lines are too long