Merge pull request #4195 from rust-lang/mdbook-strip-heading-markup

Implement and integrate an mdBook plugin to strip markup from headings
This commit is contained in:
Chris Krycho
2025-01-08 12:12:09 -07:00
committed by GitHub
10 changed files with 1181 additions and 694 deletions

View File

@@ -19,5 +19,8 @@ output-mode = "simple"
[preprocessor.trpl-figure]
output-mode = "simple"
[preprocessor.trpl-heading]
output-mode = "simple"
[rust]
edition = "2021"

File diff suppressed because it is too large Load Diff

View File

@@ -11,6 +11,10 @@ path = "src/bin/note.rs"
name = "mdbook-trpl-listing"
path = "src/bin/listing.rs"
[[bin]]
name = "mdbook-trpl-heading"
path = "src/bin/heading.rs"
[[bin]]
name = "mdbook-trpl-figure"
path = "src/bin/figure.rs"

View File

@@ -0,0 +1,38 @@
use std::io;
use clap::{self, Parser, Subcommand};
use mdbook::preprocess::{CmdPreprocessor, Preprocessor};
use mdbook_trpl::Heading;
fn main() -> Result<(), String> {
let cli = Cli::parse();
if let Some(Command::Supports { renderer }) = cli.command {
return if Heading.supports_renderer(&renderer) {
Ok(())
} else {
Err(format!("Renderer '{renderer}' is unsupported"))
};
}
let (ctx, book) = CmdPreprocessor::parse_input(io::stdin())
.map_err(|e| format!("{e}"))?;
let processed = Heading.run(&ctx, book).map_err(|e| format!("{e}"))?;
serde_json::to_writer(io::stdout(), &processed).map_err(|e| format!("{e}"))
}
/// A simple preprocessor for semantic markup for code listings in _The Rust
/// Programming Language_.
#[derive(Parser, Debug)]
struct Cli {
#[command(subcommand)]
command: Option<Command>,
}
#[derive(Subcommand, Debug)]
enum Command {
/// Is the renderer supported?
///
/// This supports the HTML
Supports { renderer: String },
}

View File

@@ -2,7 +2,7 @@
use mdbook::preprocess::PreprocessorContext;
#[derive(Debug, Clone, Copy)]
#[derive(Debug, Clone, Copy, PartialEq)]
pub enum Mode {
Default,
Simple,

View File

@@ -5,7 +5,7 @@ use mdbook::{book::Book, preprocess::Preprocessor, BookItem};
use pulldown_cmark::Event;
use pulldown_cmark_to_cmark::cmark;
use crate::config::Mode;
use crate::{config::Mode, CompositeError};
/// A simple preprocessor to rewrite `<figure>`s with `<img>`s.
///
@@ -74,19 +74,6 @@ impl Preprocessor for TrplFigure {
}
}
#[derive(Debug, thiserror::Error)]
struct CompositeError(Vec<anyhow::Error>);
impl std::fmt::Display for CompositeError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(
f,
"Error(s) rewriting input: {}",
self.0.iter().map(|e| format!("{e:?}")).collect::<String>()
)
}
}
const OPEN_FIGURE: &'static str = "<figure>";
const CLOSE_FIGURE: &'static str = "</figure>";

View File

@@ -0,0 +1,114 @@
use anyhow::anyhow;
use mdbook::{
book::Book,
preprocess::{Preprocessor, PreprocessorContext},
BookItem,
};
use pulldown_cmark::{Event, Tag, TagEnd};
use pulldown_cmark_to_cmark::cmark;
use crate::{CompositeError, Mode};
pub struct TrplHeading;
impl Preprocessor for TrplHeading {
fn name(&self) -> &str {
"trpl-heading"
}
fn run(
&self,
ctx: &PreprocessorContext,
mut book: Book,
) -> anyhow::Result<Book> {
let mode = Mode::from_context(ctx, self.name())?;
let mut errors = vec![];
book.for_each_mut(|item| {
if let BookItem::Chapter(ref mut chapter) = item {
match rewrite_headings(&chapter.content, mode) {
Ok(rewritten) => chapter.content = rewritten,
Err(reason) => errors.push(reason),
}
}
});
if errors.is_empty() {
Ok(book)
} else {
Err(CompositeError(errors).into())
}
}
fn supports_renderer(&self, renderer: &str) -> bool {
renderer == "html" || renderer == "markdown" || renderer == "test"
}
}
fn rewrite_headings(src: &str, mode: Mode) -> anyhow::Result<String> {
// Don't rewrite anything for the default mode.
if mode == Mode::Default {
return Ok(src.into());
}
#[derive(Default)]
struct State<'e> {
in_heading: bool,
events: Vec<Event<'e>>,
}
let final_state: State = crate::parser(src).try_fold(
State::default(),
|mut state, event| -> anyhow::Result<State> {
if state.in_heading {
match event {
// When we see the start or end of any of the inline tags
// (emphasis, strong emphasis, or strikethrough), or any
// inline HTML tags, we just skip emitting them. As dumb as
// that may seem, it does the job!
Event::Start(
Tag::Emphasis | Tag::Strong | Tag::Strikethrough,
)
| Event::End(
TagEnd::Emphasis
| TagEnd::Strong
| TagEnd::Strikethrough,
)
| Event::InlineHtml(_) => { /* skip */ }
// For code, we just emit the body of the inline code block,
// unchanged (the wrapping backticks are not present here).
Event::Code(code) => {
state.events.push(Event::Text(code));
}
// Assume headings are well-formed; you cannot have a nested
// headings, so we don't have to check heading level.
Event::End(TagEnd::Heading(_)) => {
state.in_heading = false;
state.events.push(event);
}
_ => state.events.push(event),
}
} else if matches!(event, Event::Start(Tag::Heading { .. })) {
state.events.push(event);
state.in_heading = true;
} else {
state.events.push(event);
}
Ok(state)
},
)?;
if final_state.in_heading {
return Err(anyhow!("Unclosed heading"));
}
let mut rewritten = String::new();
cmark(final_state.events.into_iter(), &mut rewritten)?;
Ok(rewritten)
}
#[cfg(test)]
mod tests;

View File

@@ -0,0 +1,205 @@
use super::*;
#[test]
fn default_mode_is_unchanged() {
let result = rewrite_headings(
"# This is *emphasized* and **strong** and `code`
## Here is *another* and **strong** and `code`
### Third *level* **heading** with `code`
#### Fourth *heading* **level** and `code`
##### Fifth *level* **heading** and `code`
###### Last *heading* **level** with `code`
",
Mode::Default,
);
assert_eq!(
result.unwrap(),
"# This is *emphasized* and **strong** and `code`
## Here is *another* and **strong** and `code`
### Third *level* **heading** with `code`
#### Fourth *heading* **level** and `code`
##### Fifth *level* **heading** and `code`
###### Last *heading* **level** with `code`
"
);
}
// Note: these tests all check that the result of rewriting a header *with* and
// *without* the markup is the same, so that other “normalization” that happens
// along the way (inserting or removing newlines, e.g.) is ignored.
mod simple_mode {
use super::*;
#[test]
fn strips_em() {
let result = rewrite_headings(
"# This is *emphasized* and _this is too_
## Here is *another* and _emphasis style_
### Third *level* _heading_ here
#### Fourth *heading* _level_ text
##### Fifth *level* _heading_ now
###### Last *heading* _level_ test
",
Mode::Simple,
);
let expected = rewrite_headings(
"# This is emphasized and this is too
## Here is another and emphasis style
### Third level heading here
#### Fourth heading level text
##### Fifth level heading now
###### Last heading level test
",
Mode::Simple,
);
assert_eq!(result.unwrap(), expected.unwrap());
}
#[test]
fn strips_nested_em() {
let result = rewrite_headings(
"# *This _is *extra* emphatic_ emphasis*.",
Mode::Simple,
);
let expected = "# This is extra emphatic emphasis.";
assert_eq!(result.unwrap(), expected);
}
#[test]
fn strips_strong() {
let result = rewrite_headings(
"# This is **strong** and __this is too__
## Here is **another** and __strong style__
### Third **level** __heading__ here
#### Fourth **heading** __level__ text
##### Fifth **level** __heading__ now
###### Last **heading** __level__ test
",
Mode::Simple,
);
let expected = rewrite_headings(
"# This is strong and this is too
## Here is another and strong style
### Third level heading here
#### Fourth heading level text
##### Fifth level heading now
###### Last heading level test
",
Mode::Simple,
);
assert_eq!(result.unwrap(), expected.unwrap());
}
#[test]
fn strips_nested_strong() {
let result = rewrite_headings(
"# **This __is **extra** emphatic__ emphasis**.",
Mode::Simple,
);
let expected = "# This is extra emphatic emphasis.";
assert_eq!(result.unwrap(), expected);
}
#[test]
fn strips_code() {
let result = rewrite_headings(
"# This is `code`
## Here is `another`
### Third `level`
#### Fourth `heading`
##### Fifth `level`
###### Last `heading`
",
Mode::Simple,
);
let expected = rewrite_headings(
"# This is code
## Here is another
### Third level
#### Fourth heading
##### Fifth level
###### Last heading
",
Mode::Simple,
);
assert_eq!(result.unwrap(), expected.unwrap());
}
#[test]
fn strips_html() {
let result = rewrite_headings(
"# This is <span>html</span>
## Here is <span>another</span>
### Third <span>level</span>
#### Fourth <span>heading</span>
##### Fifth <span>level</span>
###### Last <span>heading</span>
",
Mode::Simple,
);
let expected = rewrite_headings(
"# This is html
## Here is another
### Third level
#### Fourth heading
##### Fifth level
###### Last heading
",
Mode::Simple,
);
assert_eq!(result.unwrap(), expected.unwrap());
}
#[test]
fn strips_strikethrough() {
let result = rewrite_headings(
"# This is ~~strikethrough~~
## Here is ~~another~~
### Third ~~level~~
#### Fourth ~~heading~~
##### Fifth ~~level~~
###### Last ~~heading~~
",
Mode::Simple,
);
let expected = rewrite_headings(
"# This is strikethrough
## Here is another
### Third level
#### Fourth heading
##### Fifth level
###### Last heading
",
Mode::Simple,
);
assert_eq!(result.unwrap(), expected.unwrap());
}
#[test]
fn strips_nested_combinations() {
let result = rewrite_headings(
"# **Nested ~~strikethrough _emphasis_ fun~~ times**",
Mode::Simple,
);
let expected = rewrite_headings(
"# Nested strikethrough emphasis fun times",
Mode::Simple,
);
assert_eq!(result.unwrap(), expected.unwrap())
}
}

View File

@@ -1,10 +1,12 @@
mod config;
mod figure;
mod heading;
mod listing;
mod note;
pub use config::Mode;
pub use figure::TrplFigure as Figure;
pub use heading::TrplHeading as Heading;
pub use listing::TrplListing as Listing;
pub use note::TrplNote as Note;
use pulldown_cmark::{Options, Parser};
@@ -33,3 +35,16 @@ pub fn parser(text: &str) -> Parser<'_> {
opts.insert(Options::ENABLE_SMART_PUNCTUATION);
Parser::new_ext(text, opts)
}
#[derive(Debug, thiserror::Error)]
struct CompositeError(Vec<anyhow::Error>);
impl std::fmt::Display for CompositeError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(
f,
"Error(s) rewriting input: {}",
self.0.iter().map(|e| format!("{e:?}")).collect::<String>()
)
}
}

View File

@@ -1,3 +1,4 @@
use anyhow::anyhow;
use html_parser::Dom;
use mdbook::{
book::Book,
@@ -8,7 +9,7 @@ use mdbook::{
use pulldown_cmark::{html, Event};
use pulldown_cmark_to_cmark::cmark;
use crate::config::Mode;
use crate::{config::Mode, CompositeError};
/// A preprocessor for rendering listings more elegantly.
///
@@ -67,7 +68,7 @@ impl Preprocessor for TrplListing {
if let BookItem::Chapter(ref mut chapter) = item {
match rewrite_listing(&chapter.content, mode) {
Ok(rewritten) => chapter.content = rewritten,
Err(reason) => errors.push(reason),
Err(reason) => errors.push(anyhow!(reason)),
}
}
});
@@ -75,7 +76,7 @@ impl Preprocessor for TrplListing {
if errors.is_empty() {
Ok(book)
} else {
Err(CompositeError(errors.join("\n")).into())
Err(CompositeError(errors).into())
}
}
@@ -84,10 +85,6 @@ impl Preprocessor for TrplListing {
}
}
#[derive(Debug, thiserror::Error)]
#[error("Error(s) rewriting input: {0}")]
struct CompositeError(String);
fn rewrite_listing(src: &str, mode: Mode) -> Result<String, String> {
match mode {
Mode::Default => {
@@ -96,7 +93,7 @@ fn rewrite_listing(src: &str, mode: Mode) -> Result<String, String> {
current: None,
events: vec![],
},
|mut state, ev| {
|mut state, ev| -> Result<RewriteState, String> {
match ev {
Event::Html(tag) => {
if tag.starts_with("<Listing") {
@@ -109,7 +106,7 @@ fn rewrite_listing(src: &str, mode: Mode) -> Result<String, String> {
}
ev => state.events.push(Ok(ev)),
};
Ok::<RewriteState<'_>, String>(state)
Ok(state)
},
)?;