From 12285f505d8fbf0d2ac6ff5e89e23d60bfd1c59d Mon Sep 17 00:00:00 2001 From: Eric Huss Date: Mon, 21 Jul 2025 15:19:18 -0700 Subject: [PATCH] Move preprocessor types to mdbook-preprocessor This sets up mdbook-preprocessor with the intent of being the core library that preprocessors use to implement the necessary interactions. --- Cargo.lock | 7 +- Cargo.toml | 2 + crates/mdbook-preprocessor/Cargo.toml | 3 + crates/mdbook-preprocessor/src/lib.rs | 73 +++++++++++++++++++ examples/nop-preprocessor.rs | 14 ++-- .../mdbook-remove-emphasis/Cargo.toml | 3 +- .../mdbook-remove-emphasis/src/main.rs | 15 ++-- src/book/mod.rs | 5 +- src/preprocess/cmd.rs | 12 +-- src/preprocess/index.rs | 7 +- src/preprocess/links.rs | 7 +- src/preprocess/mod.rs | 59 --------------- tests/testsuite/preprocessor.rs | 3 +- 13 files changed, 111 insertions(+), 99 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 337aa70f..90db9f34 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1271,6 +1271,7 @@ dependencies = [ "ignore", "log", "mdbook-core", + "mdbook-preprocessor", "mdbook-summary", "memchr", "notify", @@ -1313,15 +1314,17 @@ dependencies = [ name = "mdbook-preprocessor" version = "0.5.0-alpha.1" dependencies = [ + "anyhow", "mdbook-core", + "serde", + "serde_json", ] [[package]] name = "mdbook-remove-emphasis" version = "0.1.0" dependencies = [ - "anyhow", - "mdbook", + "mdbook-preprocessor", "pulldown-cmark 0.12.2", "pulldown-cmark-to-cmark", "serde_json", diff --git a/Cargo.toml b/Cargo.toml index 16858e37..afdd72b4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -24,6 +24,7 @@ rust-version = "1.85.0" # Keep in sync with installation.md and .github/workflow anyhow = "1.0.98" log = "0.4.27" mdbook-core = { path = "crates/mdbook-core" } +mdbook-preprocessor = { path = "crates/mdbook-preprocessor" } mdbook-summary = { path = "crates/mdbook-summary" } memchr = "2.7.5" pulldown-cmark = { version = "0.10.3", default-features = false, features = ["html"] } # Do not update, part of the public api. @@ -61,6 +62,7 @@ handlebars = "6.0" hex = "0.4.3" log.workspace = true mdbook-core.workspace = true +mdbook-preprocessor.workspace = true mdbook-summary.workspace = true memchr.workspace = true opener = "0.8.1" diff --git a/crates/mdbook-preprocessor/Cargo.toml b/crates/mdbook-preprocessor/Cargo.toml index c39ff408..c4ad0fc1 100644 --- a/crates/mdbook-preprocessor/Cargo.toml +++ b/crates/mdbook-preprocessor/Cargo.toml @@ -8,7 +8,10 @@ repository.workspace = true rust-version.workspace = true [dependencies] +anyhow.workspace = true mdbook-core.workspace = true +serde.workspace = true +serde_json.workspace = true [lints] workspace = true diff --git a/crates/mdbook-preprocessor/src/lib.rs b/crates/mdbook-preprocessor/src/lib.rs index e4d97bb4..b1d77420 100644 --- a/crates/mdbook-preprocessor/src/lib.rs +++ b/crates/mdbook-preprocessor/src/lib.rs @@ -1,3 +1,76 @@ //! Library to assist implementing an mdbook preprocessor. +use anyhow::Context; +use mdbook_core::book::Book; +use mdbook_core::config::Config; +use mdbook_core::errors::Result; +use serde::{Deserialize, Serialize}; +use std::cell::RefCell; +use std::collections::HashMap; +use std::io::Read; +use std::path::PathBuf; + pub use mdbook_core::MDBOOK_VERSION; +pub use mdbook_core::book; +pub use mdbook_core::config; +pub use mdbook_core::errors; + +/// An operation which is run immediately after loading a book into memory and +/// before it gets rendered. +pub trait Preprocessor { + /// Get the `Preprocessor`'s name. + fn name(&self) -> &str; + + /// Run this `Preprocessor`, allowing it to update the book before it is + /// given to a renderer. + fn run(&self, ctx: &PreprocessorContext, book: Book) -> Result; + + /// A hint to `MDBook` whether this preprocessor is compatible with a + /// particular renderer. + /// + /// By default, always returns `true`. + fn supports_renderer(&self, _renderer: &str) -> bool { + true + } +} + +/// Extra information for a `Preprocessor` to give them more context when +/// processing a book. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct PreprocessorContext { + /// The location of the book directory on disk. + pub root: PathBuf, + /// The book configuration (`book.toml`). + pub config: Config, + /// The `Renderer` this preprocessor is being used with. + pub renderer: String, + /// The calling `mdbook` version. + pub mdbook_version: String, + /// Internal mapping of chapter titles. + /// + /// This is used internally by mdbook to compute custom chapter titles. + /// This should not be used outside of mdbook's internals. + #[serde(skip)] + pub chapter_titles: RefCell>, + #[serde(skip)] + __non_exhaustive: (), +} + +impl PreprocessorContext { + /// Create a new `PreprocessorContext`. + pub fn new(root: PathBuf, config: Config, renderer: String) -> Self { + PreprocessorContext { + root, + config, + renderer, + mdbook_version: crate::MDBOOK_VERSION.to_string(), + chapter_titles: RefCell::new(HashMap::new()), + __non_exhaustive: (), + } + } +} + +/// Parses the input given to a preprocessor. +pub fn parse_input(reader: R) -> Result<(PreprocessorContext, Book)> { + serde_json::from_reader(reader).with_context(|| "Unable to parse the input") +} diff --git a/examples/nop-preprocessor.rs b/examples/nop-preprocessor.rs index f85fd820..b2e0fb40 100644 --- a/examples/nop-preprocessor.rs +++ b/examples/nop-preprocessor.rs @@ -1,10 +1,10 @@ //! A basic example of a preprocessor that does nothing. use crate::nop_lib::Nop; -use anyhow::Error; use clap::{Arg, ArgMatches, Command}; -use mdbook::book::Book; -use mdbook::preprocess::{CmdPreprocessor, Preprocessor, PreprocessorContext}; +use mdbook_preprocessor::book::Book; +use mdbook_preprocessor::errors::Result; +use mdbook_preprocessor::{Preprocessor, PreprocessorContext}; use semver::{Version, VersionReq}; use std::io; use std::process; @@ -33,8 +33,8 @@ fn main() { } } -fn handle_preprocessing(pre: &dyn Preprocessor) -> Result<(), Error> { - let (ctx, book) = CmdPreprocessor::parse_input(io::stdin())?; +fn handle_preprocessing(pre: &dyn Preprocessor) -> Result<()> { + let (ctx, book) = mdbook_preprocessor::parse_input(io::stdin())?; let book_version = Version::parse(&ctx.mdbook_version)?; let version_req = VersionReq::parse(mdbook::MDBOOK_VERSION)?; @@ -88,7 +88,7 @@ mod nop_lib { "nop-preprocessor" } - fn run(&self, ctx: &PreprocessorContext, book: Book) -> Result { + fn run(&self, ctx: &PreprocessorContext, book: Book) -> Result { // In testing we want to tell the preprocessor to blow up by setting a // particular config value if let Some(nop_cfg) = ctx.config.get_preprocessor(self.name()) { @@ -149,7 +149,7 @@ mod nop_lib { ]"##; let input_json = input_json.as_bytes(); - let (ctx, book) = mdbook::preprocess::CmdPreprocessor::parse_input(input_json).unwrap(); + let (ctx, book) = mdbook_preprocessor::parse_input(input_json).unwrap(); let expected_book = book.clone(); let result = Nop::new().run(&ctx, book); assert!(result.is_ok()); diff --git a/examples/remove-emphasis/mdbook-remove-emphasis/Cargo.toml b/examples/remove-emphasis/mdbook-remove-emphasis/Cargo.toml index cd3e8c83..1b55e291 100644 --- a/examples/remove-emphasis/mdbook-remove-emphasis/Cargo.toml +++ b/examples/remove-emphasis/mdbook-remove-emphasis/Cargo.toml @@ -4,8 +4,7 @@ version = "0.1.0" edition.workspace = true [dependencies] -anyhow.workspace = true -mdbook = { path = "../../.." } +mdbook-preprocessor.workspace = true pulldown-cmark = { version = "0.12.2", default-features = false } pulldown-cmark-to-cmark = "18.0.0" serde_json = "1.0.132" diff --git a/examples/remove-emphasis/mdbook-remove-emphasis/src/main.rs b/examples/remove-emphasis/mdbook-remove-emphasis/src/main.rs index 88f58506..95974d8f 100644 --- a/examples/remove-emphasis/mdbook-remove-emphasis/src/main.rs +++ b/examples/remove-emphasis/mdbook-remove-emphasis/src/main.rs @@ -1,10 +1,9 @@ //! This is a demonstration of an mdBook preprocessor which parses markdown //! and removes any instances of emphasis. -use anyhow::Error; -use mdbook::BookItem; -use mdbook::book::{Book, Chapter}; -use mdbook::preprocess::{CmdPreprocessor, Preprocessor, PreprocessorContext}; +use mdbook_preprocessor::book::{Book, BookItem, Chapter}; +use mdbook_preprocessor::errors::Result; +use mdbook_preprocessor::{Preprocessor, PreprocessorContext}; use pulldown_cmark::{Event, Parser, Tag, TagEnd}; use std::io; @@ -35,7 +34,7 @@ impl Preprocessor for RemoveEmphasis { "remove-emphasis" } - fn run(&self, _ctx: &PreprocessorContext, mut book: Book) -> Result { + fn run(&self, _ctx: &PreprocessorContext, mut book: Book) -> Result { let mut total = 0; book.for_each_mut(|item| { let BookItem::Chapter(ch) = item else { @@ -55,7 +54,7 @@ impl Preprocessor for RemoveEmphasis { } // ANCHOR: remove_emphasis -fn remove_emphasis(num_removed_items: &mut usize, chapter: &mut Chapter) -> Result { +fn remove_emphasis(num_removed_items: &mut usize, chapter: &mut Chapter) -> Result { let mut buf = String::with_capacity(chapter.content.len()); let events = Parser::new(&chapter.content).filter(|e| match e { @@ -71,9 +70,9 @@ fn remove_emphasis(num_removed_items: &mut usize, chapter: &mut Chapter) -> Resu } // ANCHOR_END: remove_emphasis -pub fn handle_preprocessing() -> Result<(), Error> { +pub fn handle_preprocessing() -> Result<()> { let pre = RemoveEmphasis; - let (ctx, book) = CmdPreprocessor::parse_input(io::stdin())?; + let (ctx, book) = mdbook_preprocessor::parse_input(io::stdin())?; let processed_book = pre.run(&ctx, book)?; serde_json::to_writer(io::stdout(), &processed_book)?; diff --git a/src/book/mod.rs b/src/book/mod.rs index efcfd391..53109153 100644 --- a/src/book/mod.rs +++ b/src/book/mod.rs @@ -15,6 +15,7 @@ use log::{debug, error, info, log_enabled, trace, warn}; pub use mdbook_core::book::{Book, BookItem, BookItems, Chapter, SectionNumber}; use mdbook_core::config::{Config, RustEdition}; use mdbook_core::utils; +use mdbook_preprocessor::{Preprocessor, PreprocessorContext}; pub use mdbook_summary::{Link, Summary, SummaryItem, parse_summary}; use std::ffi::OsString; use std::io::{IsTerminal, Write}; @@ -24,9 +25,7 @@ use tempfile::Builder as TempFileBuilder; use toml::Value; use topological_sort::TopologicalSort; -use crate::preprocess::{ - CmdPreprocessor, IndexPreprocessor, LinkPreprocessor, Preprocessor, PreprocessorContext, -}; +use crate::preprocess::{CmdPreprocessor, IndexPreprocessor, LinkPreprocessor}; use crate::renderer::{CmdRenderer, HtmlHandlebars, MarkdownRenderer, RenderContext, Renderer}; /// The object used to manage and build a book. diff --git a/src/preprocess/cmd.rs b/src/preprocess/cmd.rs index dfd0681e..754ddddc 100644 --- a/src/preprocess/cmd.rs +++ b/src/preprocess/cmd.rs @@ -1,9 +1,9 @@ -use super::{Preprocessor, PreprocessorContext}; use crate::book::Book; use anyhow::{Context, Result, bail, ensure}; use log::{debug, trace, warn}; +use mdbook_preprocessor::{Preprocessor, PreprocessorContext}; use shlex::Shlex; -use std::io::{self, Read, Write}; +use std::io::{self, Write}; use std::process::{Child, Command, Stdio}; /// A custom preprocessor which will shell out to a 3rd-party program. @@ -41,12 +41,6 @@ impl CmdPreprocessor { CmdPreprocessor { name, cmd } } - /// A convenience function custom preprocessors can use to parse the input - /// written to `stdin` by a `CmdRenderer`. - pub fn parse_input(reader: R) -> Result<(PreprocessorContext, Book)> { - serde_json::from_reader(reader).with_context(|| "Unable to parse the input") - } - fn write_input_to_child(&self, child: &mut Child, book: &Book, ctx: &PreprocessorContext) { let stdin = child.stdin.take().expect("Child has stdin"); @@ -200,7 +194,7 @@ mod tests { let mut buffer = Vec::new(); cmd.write_input(&mut buffer, &md.book, &ctx).unwrap(); - let (got_ctx, got_book) = CmdPreprocessor::parse_input(buffer.as_slice()).unwrap(); + let (got_ctx, got_book) = mdbook_preprocessor::parse_input(buffer.as_slice()).unwrap(); assert_eq!(got_book, md.book); assert_eq!(got_ctx, ctx); diff --git a/src/preprocess/index.rs b/src/preprocess/index.rs index 3b2666d7..3b8dfd2a 100644 --- a/src/preprocess/index.rs +++ b/src/preprocess/index.rs @@ -1,10 +1,9 @@ -use regex::Regex; -use std::{path::Path, sync::LazyLock}; - -use super::{Preprocessor, PreprocessorContext}; use crate::book::{Book, BookItem}; use anyhow::Result; use log::warn; +use mdbook_preprocessor::{Preprocessor, PreprocessorContext}; +use regex::Regex; +use std::{path::Path, sync::LazyLock}; /// A preprocessor for converting file name `README.md` to `index.md` since /// `README.md` is the de facto index file in markdown-based documentation. diff --git a/src/preprocess/links.rs b/src/preprocess/links.rs index 02d7c4d7..cfefbc8a 100644 --- a/src/preprocess/links.rs +++ b/src/preprocess/links.rs @@ -1,18 +1,17 @@ +use crate::book::{Book, BookItem}; use anyhow::{Context, Result}; +use log::{error, warn}; use mdbook_core::utils::{ take_anchored_lines, take_lines, take_rustdoc_include_anchored_lines, take_rustdoc_include_lines, }; +use mdbook_preprocessor::{Preprocessor, PreprocessorContext}; use regex::{CaptureMatches, Captures, Regex}; use std::fs; use std::ops::{Bound, Range, RangeBounds, RangeFrom, RangeFull, RangeTo}; use std::path::{Path, PathBuf}; use std::sync::LazyLock; -use super::{Preprocessor, PreprocessorContext}; -use crate::book::{Book, BookItem}; -use log::{error, warn}; - const ESCAPE_CHAR: char = '\\'; const MAX_LINK_NESTED_DEPTH: usize = 10; diff --git a/src/preprocess/mod.rs b/src/preprocess/mod.rs index 1134ad4d..d4906651 100644 --- a/src/preprocess/mod.rs +++ b/src/preprocess/mod.rs @@ -1,13 +1,5 @@ //! Book preprocessing. -use crate::book::Book; -use anyhow::Result; -use mdbook_core::config::Config; -use serde::{Deserialize, Serialize}; -use std::cell::RefCell; -use std::collections::HashMap; -use std::path::PathBuf; - pub use self::cmd::CmdPreprocessor; pub use self::index::IndexPreprocessor; pub use self::links::LinkPreprocessor; @@ -15,54 +7,3 @@ pub use self::links::LinkPreprocessor; mod cmd; mod index; mod links; - -/// Extra information for a `Preprocessor` to give them more context when -/// processing a book. -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -pub struct PreprocessorContext { - /// The location of the book directory on disk. - pub root: PathBuf, - /// The book configuration (`book.toml`). - pub config: Config, - /// The `Renderer` this preprocessor is being used with. - pub renderer: String, - /// The calling `mdbook` version. - pub mdbook_version: String, - #[serde(skip)] - pub(crate) chapter_titles: RefCell>, - #[serde(skip)] - __non_exhaustive: (), -} - -impl PreprocessorContext { - /// Create a new `PreprocessorContext`. - pub(crate) fn new(root: PathBuf, config: Config, renderer: String) -> Self { - PreprocessorContext { - root, - config, - renderer, - mdbook_version: crate::MDBOOK_VERSION.to_string(), - chapter_titles: RefCell::new(HashMap::new()), - __non_exhaustive: (), - } - } -} - -/// An operation which is run immediately after loading a book into memory and -/// before it gets rendered. -pub trait Preprocessor { - /// Get the `Preprocessor`'s name. - fn name(&self) -> &str; - - /// Run this `Preprocessor`, allowing it to update the book before it is - /// given to a renderer. - fn run(&self, ctx: &PreprocessorContext, book: Book) -> Result; - - /// A hint to `MDBook` whether this preprocessor is compatible with a - /// particular renderer. - /// - /// By default, always returns `true`. - fn supports_renderer(&self, _renderer: &str) -> bool { - true - } -} diff --git a/tests/testsuite/preprocessor.rs b/tests/testsuite/preprocessor.rs index 51f4a2a2..b04f4b43 100644 --- a/tests/testsuite/preprocessor.rs +++ b/tests/testsuite/preprocessor.rs @@ -3,7 +3,8 @@ use crate::prelude::*; use anyhow::Result; use mdbook::book::Book; -use mdbook::preprocess::{CmdPreprocessor, Preprocessor, PreprocessorContext}; +use mdbook::preprocess::CmdPreprocessor; +use mdbook_preprocessor::{Preprocessor, PreprocessorContext}; use std::sync::{Arc, Mutex}; struct Spy(Arc>);