Move preprocessor types to mdbook-preprocessor

This sets up mdbook-preprocessor with the intent of being the core
library that preprocessors use to implement the necessary interactions.
This commit is contained in:
Eric Huss
2025-07-21 15:19:18 -07:00
parent e123879c8c
commit 12285f505d
13 changed files with 111 additions and 99 deletions

7
Cargo.lock generated
View File

@@ -1271,6 +1271,7 @@ dependencies = [
"ignore",
"log",
"mdbook-core",
"mdbook-preprocessor",
"mdbook-summary",
"memchr",
"notify",
@@ -1313,15 +1314,17 @@ dependencies = [
name = "mdbook-preprocessor"
version = "0.5.0-alpha.1"
dependencies = [
"anyhow",
"mdbook-core",
"serde",
"serde_json",
]
[[package]]
name = "mdbook-remove-emphasis"
version = "0.1.0"
dependencies = [
"anyhow",
"mdbook",
"mdbook-preprocessor",
"pulldown-cmark 0.12.2",
"pulldown-cmark-to-cmark",
"serde_json",

View File

@@ -24,6 +24,7 @@ rust-version = "1.85.0" # Keep in sync with installation.md and .github/workflow
anyhow = "1.0.98"
log = "0.4.27"
mdbook-core = { path = "crates/mdbook-core" }
mdbook-preprocessor = { path = "crates/mdbook-preprocessor" }
mdbook-summary = { path = "crates/mdbook-summary" }
memchr = "2.7.5"
pulldown-cmark = { version = "0.10.3", default-features = false, features = ["html"] } # Do not update, part of the public api.
@@ -61,6 +62,7 @@ handlebars = "6.0"
hex = "0.4.3"
log.workspace = true
mdbook-core.workspace = true
mdbook-preprocessor.workspace = true
mdbook-summary.workspace = true
memchr.workspace = true
opener = "0.8.1"

View File

@@ -8,7 +8,10 @@ repository.workspace = true
rust-version.workspace = true
[dependencies]
anyhow.workspace = true
mdbook-core.workspace = true
serde.workspace = true
serde_json.workspace = true
[lints]
workspace = true

View File

@@ -1,3 +1,76 @@
//! Library to assist implementing an mdbook preprocessor.
use anyhow::Context;
use mdbook_core::book::Book;
use mdbook_core::config::Config;
use mdbook_core::errors::Result;
use serde::{Deserialize, Serialize};
use std::cell::RefCell;
use std::collections::HashMap;
use std::io::Read;
use std::path::PathBuf;
pub use mdbook_core::MDBOOK_VERSION;
pub use mdbook_core::book;
pub use mdbook_core::config;
pub use mdbook_core::errors;
/// An operation which is run immediately after loading a book into memory and
/// before it gets rendered.
pub trait Preprocessor {
/// Get the `Preprocessor`'s name.
fn name(&self) -> &str;
/// Run this `Preprocessor`, allowing it to update the book before it is
/// given to a renderer.
fn run(&self, ctx: &PreprocessorContext, book: Book) -> Result<Book>;
/// A hint to `MDBook` whether this preprocessor is compatible with a
/// particular renderer.
///
/// By default, always returns `true`.
fn supports_renderer(&self, _renderer: &str) -> bool {
true
}
}
/// Extra information for a `Preprocessor` to give them more context when
/// processing a book.
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct PreprocessorContext {
/// The location of the book directory on disk.
pub root: PathBuf,
/// The book configuration (`book.toml`).
pub config: Config,
/// The `Renderer` this preprocessor is being used with.
pub renderer: String,
/// The calling `mdbook` version.
pub mdbook_version: String,
/// Internal mapping of chapter titles.
///
/// This is used internally by mdbook to compute custom chapter titles.
/// This should not be used outside of mdbook's internals.
#[serde(skip)]
pub chapter_titles: RefCell<HashMap<PathBuf, String>>,
#[serde(skip)]
__non_exhaustive: (),
}
impl PreprocessorContext {
/// Create a new `PreprocessorContext`.
pub fn new(root: PathBuf, config: Config, renderer: String) -> Self {
PreprocessorContext {
root,
config,
renderer,
mdbook_version: crate::MDBOOK_VERSION.to_string(),
chapter_titles: RefCell::new(HashMap::new()),
__non_exhaustive: (),
}
}
}
/// Parses the input given to a preprocessor.
pub fn parse_input<R: Read>(reader: R) -> Result<(PreprocessorContext, Book)> {
serde_json::from_reader(reader).with_context(|| "Unable to parse the input")
}

View File

@@ -1,10 +1,10 @@
//! A basic example of a preprocessor that does nothing.
use crate::nop_lib::Nop;
use anyhow::Error;
use clap::{Arg, ArgMatches, Command};
use mdbook::book::Book;
use mdbook::preprocess::{CmdPreprocessor, Preprocessor, PreprocessorContext};
use mdbook_preprocessor::book::Book;
use mdbook_preprocessor::errors::Result;
use mdbook_preprocessor::{Preprocessor, PreprocessorContext};
use semver::{Version, VersionReq};
use std::io;
use std::process;
@@ -33,8 +33,8 @@ fn main() {
}
}
fn handle_preprocessing(pre: &dyn Preprocessor) -> Result<(), Error> {
let (ctx, book) = CmdPreprocessor::parse_input(io::stdin())?;
fn handle_preprocessing(pre: &dyn Preprocessor) -> Result<()> {
let (ctx, book) = mdbook_preprocessor::parse_input(io::stdin())?;
let book_version = Version::parse(&ctx.mdbook_version)?;
let version_req = VersionReq::parse(mdbook::MDBOOK_VERSION)?;
@@ -88,7 +88,7 @@ mod nop_lib {
"nop-preprocessor"
}
fn run(&self, ctx: &PreprocessorContext, book: Book) -> Result<Book, Error> {
fn run(&self, ctx: &PreprocessorContext, book: Book) -> Result<Book> {
// In testing we want to tell the preprocessor to blow up by setting a
// particular config value
if let Some(nop_cfg) = ctx.config.get_preprocessor(self.name()) {
@@ -149,7 +149,7 @@ mod nop_lib {
]"##;
let input_json = input_json.as_bytes();
let (ctx, book) = mdbook::preprocess::CmdPreprocessor::parse_input(input_json).unwrap();
let (ctx, book) = mdbook_preprocessor::parse_input(input_json).unwrap();
let expected_book = book.clone();
let result = Nop::new().run(&ctx, book);
assert!(result.is_ok());

View File

@@ -4,8 +4,7 @@ version = "0.1.0"
edition.workspace = true
[dependencies]
anyhow.workspace = true
mdbook = { path = "../../.." }
mdbook-preprocessor.workspace = true
pulldown-cmark = { version = "0.12.2", default-features = false }
pulldown-cmark-to-cmark = "18.0.0"
serde_json = "1.0.132"

View File

@@ -1,10 +1,9 @@
//! This is a demonstration of an mdBook preprocessor which parses markdown
//! and removes any instances of emphasis.
use anyhow::Error;
use mdbook::BookItem;
use mdbook::book::{Book, Chapter};
use mdbook::preprocess::{CmdPreprocessor, Preprocessor, PreprocessorContext};
use mdbook_preprocessor::book::{Book, BookItem, Chapter};
use mdbook_preprocessor::errors::Result;
use mdbook_preprocessor::{Preprocessor, PreprocessorContext};
use pulldown_cmark::{Event, Parser, Tag, TagEnd};
use std::io;
@@ -35,7 +34,7 @@ impl Preprocessor for RemoveEmphasis {
"remove-emphasis"
}
fn run(&self, _ctx: &PreprocessorContext, mut book: Book) -> Result<Book, Error> {
fn run(&self, _ctx: &PreprocessorContext, mut book: Book) -> Result<Book> {
let mut total = 0;
book.for_each_mut(|item| {
let BookItem::Chapter(ch) = item else {
@@ -55,7 +54,7 @@ impl Preprocessor for RemoveEmphasis {
}
// ANCHOR: remove_emphasis
fn remove_emphasis(num_removed_items: &mut usize, chapter: &mut Chapter) -> Result<String, Error> {
fn remove_emphasis(num_removed_items: &mut usize, chapter: &mut Chapter) -> Result<String> {
let mut buf = String::with_capacity(chapter.content.len());
let events = Parser::new(&chapter.content).filter(|e| match e {
@@ -71,9 +70,9 @@ fn remove_emphasis(num_removed_items: &mut usize, chapter: &mut Chapter) -> Resu
}
// ANCHOR_END: remove_emphasis
pub fn handle_preprocessing() -> Result<(), Error> {
pub fn handle_preprocessing() -> Result<()> {
let pre = RemoveEmphasis;
let (ctx, book) = CmdPreprocessor::parse_input(io::stdin())?;
let (ctx, book) = mdbook_preprocessor::parse_input(io::stdin())?;
let processed_book = pre.run(&ctx, book)?;
serde_json::to_writer(io::stdout(), &processed_book)?;

View File

@@ -15,6 +15,7 @@ use log::{debug, error, info, log_enabled, trace, warn};
pub use mdbook_core::book::{Book, BookItem, BookItems, Chapter, SectionNumber};
use mdbook_core::config::{Config, RustEdition};
use mdbook_core::utils;
use mdbook_preprocessor::{Preprocessor, PreprocessorContext};
pub use mdbook_summary::{Link, Summary, SummaryItem, parse_summary};
use std::ffi::OsString;
use std::io::{IsTerminal, Write};
@@ -24,9 +25,7 @@ use tempfile::Builder as TempFileBuilder;
use toml::Value;
use topological_sort::TopologicalSort;
use crate::preprocess::{
CmdPreprocessor, IndexPreprocessor, LinkPreprocessor, Preprocessor, PreprocessorContext,
};
use crate::preprocess::{CmdPreprocessor, IndexPreprocessor, LinkPreprocessor};
use crate::renderer::{CmdRenderer, HtmlHandlebars, MarkdownRenderer, RenderContext, Renderer};
/// The object used to manage and build a book.

View File

@@ -1,9 +1,9 @@
use super::{Preprocessor, PreprocessorContext};
use crate::book::Book;
use anyhow::{Context, Result, bail, ensure};
use log::{debug, trace, warn};
use mdbook_preprocessor::{Preprocessor, PreprocessorContext};
use shlex::Shlex;
use std::io::{self, Read, Write};
use std::io::{self, Write};
use std::process::{Child, Command, Stdio};
/// A custom preprocessor which will shell out to a 3rd-party program.
@@ -41,12 +41,6 @@ impl CmdPreprocessor {
CmdPreprocessor { name, cmd }
}
/// A convenience function custom preprocessors can use to parse the input
/// written to `stdin` by a `CmdRenderer`.
pub fn parse_input<R: Read>(reader: R) -> Result<(PreprocessorContext, Book)> {
serde_json::from_reader(reader).with_context(|| "Unable to parse the input")
}
fn write_input_to_child(&self, child: &mut Child, book: &Book, ctx: &PreprocessorContext) {
let stdin = child.stdin.take().expect("Child has stdin");
@@ -200,7 +194,7 @@ mod tests {
let mut buffer = Vec::new();
cmd.write_input(&mut buffer, &md.book, &ctx).unwrap();
let (got_ctx, got_book) = CmdPreprocessor::parse_input(buffer.as_slice()).unwrap();
let (got_ctx, got_book) = mdbook_preprocessor::parse_input(buffer.as_slice()).unwrap();
assert_eq!(got_book, md.book);
assert_eq!(got_ctx, ctx);

View File

@@ -1,10 +1,9 @@
use regex::Regex;
use std::{path::Path, sync::LazyLock};
use super::{Preprocessor, PreprocessorContext};
use crate::book::{Book, BookItem};
use anyhow::Result;
use log::warn;
use mdbook_preprocessor::{Preprocessor, PreprocessorContext};
use regex::Regex;
use std::{path::Path, sync::LazyLock};
/// A preprocessor for converting file name `README.md` to `index.md` since
/// `README.md` is the de facto index file in markdown-based documentation.

View File

@@ -1,18 +1,17 @@
use crate::book::{Book, BookItem};
use anyhow::{Context, Result};
use log::{error, warn};
use mdbook_core::utils::{
take_anchored_lines, take_lines, take_rustdoc_include_anchored_lines,
take_rustdoc_include_lines,
};
use mdbook_preprocessor::{Preprocessor, PreprocessorContext};
use regex::{CaptureMatches, Captures, Regex};
use std::fs;
use std::ops::{Bound, Range, RangeBounds, RangeFrom, RangeFull, RangeTo};
use std::path::{Path, PathBuf};
use std::sync::LazyLock;
use super::{Preprocessor, PreprocessorContext};
use crate::book::{Book, BookItem};
use log::{error, warn};
const ESCAPE_CHAR: char = '\\';
const MAX_LINK_NESTED_DEPTH: usize = 10;

View File

@@ -1,13 +1,5 @@
//! Book preprocessing.
use crate::book::Book;
use anyhow::Result;
use mdbook_core::config::Config;
use serde::{Deserialize, Serialize};
use std::cell::RefCell;
use std::collections::HashMap;
use std::path::PathBuf;
pub use self::cmd::CmdPreprocessor;
pub use self::index::IndexPreprocessor;
pub use self::links::LinkPreprocessor;
@@ -15,54 +7,3 @@ pub use self::links::LinkPreprocessor;
mod cmd;
mod index;
mod links;
/// Extra information for a `Preprocessor` to give them more context when
/// processing a book.
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct PreprocessorContext {
/// The location of the book directory on disk.
pub root: PathBuf,
/// The book configuration (`book.toml`).
pub config: Config,
/// The `Renderer` this preprocessor is being used with.
pub renderer: String,
/// The calling `mdbook` version.
pub mdbook_version: String,
#[serde(skip)]
pub(crate) chapter_titles: RefCell<HashMap<PathBuf, String>>,
#[serde(skip)]
__non_exhaustive: (),
}
impl PreprocessorContext {
/// Create a new `PreprocessorContext`.
pub(crate) fn new(root: PathBuf, config: Config, renderer: String) -> Self {
PreprocessorContext {
root,
config,
renderer,
mdbook_version: crate::MDBOOK_VERSION.to_string(),
chapter_titles: RefCell::new(HashMap::new()),
__non_exhaustive: (),
}
}
}
/// An operation which is run immediately after loading a book into memory and
/// before it gets rendered.
pub trait Preprocessor {
/// Get the `Preprocessor`'s name.
fn name(&self) -> &str;
/// Run this `Preprocessor`, allowing it to update the book before it is
/// given to a renderer.
fn run(&self, ctx: &PreprocessorContext, book: Book) -> Result<Book>;
/// A hint to `MDBook` whether this preprocessor is compatible with a
/// particular renderer.
///
/// By default, always returns `true`.
fn supports_renderer(&self, _renderer: &str) -> bool {
true
}
}

View File

@@ -3,7 +3,8 @@
use crate::prelude::*;
use anyhow::Result;
use mdbook::book::Book;
use mdbook::preprocess::{CmdPreprocessor, Preprocessor, PreprocessorContext};
use mdbook::preprocess::CmdPreprocessor;
use mdbook_preprocessor::{Preprocessor, PreprocessorContext};
use std::sync::{Arc, Mutex};
struct Spy(Arc<Mutex<Inner>>);