Merge pull request #2922 from ehuss/header-id-lowercase

Lowercase heading IDs
This commit is contained in:
Eric Huss
2025-11-05 00:14:42 +00:00
committed by GitHub
2 changed files with 17 additions and 4 deletions

View File

@@ -58,8 +58,11 @@ The following is a summary of the changes that may require your attention when u
[#2847](https://github.com/rust-lang/mdBook/pull/2847)
- Added support for admonitions. These are enabled by default, with the option `output.html.admonitions` to disable it.
[#2851](https://github.com/rust-lang/mdBook/pull/2851)
- Headers that start or end with HTML characters like `<`, `&`, or `>` now replace those characters in the link ID with `-` instead of being stripped. This brings the header ID generation closer to other tools and sites.
[#2844](https://github.com/rust-lang/mdBook/pull/2844)
- Header ID generation has some minor changes to bring the ID generation closer to other tools and sites:
- IDs now use Unicode lowercase instead of ASCII lowercase.
[#2922](https://github.com/rust-lang/mdBook/pull/2922)
- Headers that start or end with HTML characters like `<`, `&`, or `>` now replace those characters in the link ID with `-` instead of being stripped.
[#2844](https://github.com/rust-lang/mdBook/pull/2844)
### CLI changes

View File

@@ -74,12 +74,22 @@ pub(crate) fn unique_id(id: &str, used: &mut HashSet<String>) -> String {
/// Generates an HTML id from the given text.
pub(crate) fn id_from_content(content: &str) -> String {
// This is intended to be close to how header ID generation is done in
// other sites and tools, but is not 100% the same. Not all sites and
// tools use the same algorithm. See these for more information:
//
// - https://docs.github.com/en/get-started/writing-on-github/getting-started-with-writing-and-formatting-on-github/basic-writing-and-formatting-syntax#section-links
// - https://docs.gitlab.com/user/markdown/#heading-ids-and-links
// - https://pandoc.org/MANUAL.html#extension-auto_identifiers
// - https://kramdown.gettalong.org/converter/html#auto-ids
// - https://docs.rs/comrak/latest/comrak/options/struct.Extension.html#structfield.header_ids
content
.trim()
.to_lowercase()
.chars()
.filter_map(|ch| {
if ch.is_alphanumeric() || ch == '_' || ch == '-' {
Some(ch.to_ascii_lowercase())
Some(ch)
} else if ch.is_whitespace() {
Some('-')
} else {
@@ -120,6 +130,6 @@ mod tests {
assert_eq!(id_from_content("한국어"), "한국어");
assert_eq!(id_from_content(""), "");
assert_eq!(id_from_content("中文標題 CJK title"), "中文標題-cjk-title");
assert_eq!(id_from_content("Über"), "Über");
assert_eq!(id_from_content("Über"), "über");
}
}