Write nytehash file

This commit is contained in:
2025-10-22 18:49:33 -04:00
parent b2bc77589e
commit 7fd33c2f8a
3 changed files with 126 additions and 49 deletions

45
Cargo.lock generated
View File

@@ -117,6 +117,7 @@ dependencies = [
"thiserror",
"tokio",
"tracing",
"tracing-subscriber",
]
[[package]]
@@ -268,6 +269,12 @@ version = "0.2.177"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2874a2af47a2325c2001a6e6fad9b16a53b802102b528163885171cf92b15976"
[[package]]
name = "log"
version = "0.4.28"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "34080505efa8e45a4b816c349525ebe327ceaa8559756f0356cba97ef3bf7432"
[[package]]
name = "memchr"
version = "2.7.6"
@@ -283,6 +290,15 @@ dependencies = [
"adler2",
]
[[package]]
name = "nu-ansi-term"
version = "0.50.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7957b9740744892f114936ab4a57b3f487491bbeafaf8083688b16841a4240e5"
dependencies = [
"windows-sys",
]
[[package]]
name = "object"
version = "0.37.3"
@@ -366,6 +382,12 @@ version = "0.4.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7a2ae44ef20feb57a68b23d846850f861394c2e02dc425a50098ae8c90267589"
[[package]]
name = "smallvec"
version = "1.15.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03"
[[package]]
name = "syn"
version = "2.0.107"
@@ -470,15 +492,29 @@ dependencies = [
"tracing-subscriber",
]
[[package]]
name = "tracing-log"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ee855f1f400bd0e5c02d150ae5de3840039a3f54b025156404e34c23c03f47c3"
dependencies = [
"log",
"once_cell",
"tracing-core",
]
[[package]]
name = "tracing-subscriber"
version = "0.3.20"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2054a14f5307d601f88daf0553e1cbf472acc4f2c51afab632431cdcd72124d5"
dependencies = [
"nu-ansi-term",
"sharded-slab",
"smallvec",
"thread_local",
"tracing-core",
"tracing-log",
]
[[package]]
@@ -510,3 +546,12 @@ name = "windows-link"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5"
[[package]]
name = "windows-sys"
version = "0.61.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc"
dependencies = [
"windows-link",
]

View File

@@ -12,3 +12,4 @@ sha2 = "0.10.9"
thiserror = "2.0.17"
tokio = { version = "1.48.0", features = ["rt-multi-thread", "macros", "fs", "io-std", "io-util"] }
tracing = "0.1.41"
tracing-subscriber = "0.3.20"

View File

@@ -1,20 +1,21 @@
use futures::{StreamExt, stream};
use std::{
collections::VecDeque,
collections::{HashMap, VecDeque},
fs::File,
io::{BufReader, Read},
io::{BufReader, BufWriter, Read, Write},
path::{Path, PathBuf},
};
use tokio::{fs, task};
use tokio::task::{JoinSet, spawn_blocking};
use anyhow::Result;
use sha2::{Digest, Sha256};
const BUF_SIZE: usize = 256 * 1024;
const HASH_FILE_NAME: &str = ".nytehash";
fn hash_file(path: &Path) -> Result<String> {
tracing::debug!("Hashing File: {:?}", path);
async fn hash_file(path: PathBuf) -> Result<String> {
task::spawn_blocking(move || -> Result<String> {
let file = File::open(&path)?;
let mut reader = BufReader::with_capacity(BUF_SIZE, file);
let mut hasher = Sha256::new();
@@ -29,54 +30,84 @@ async fn hash_file(path: PathBuf) -> Result<String> {
}
Ok(hex::encode(hasher.finalize()))
})
.await?
}
async fn collect_file_paths(root: PathBuf) -> Result<Vec<PathBuf>> {
let mut paths = Vec::new();
fn write_hash_file(path: &Path, hashes: HashMap<PathBuf, String>) -> Result<()> {
let output = File::create(path.join(HASH_FILE_NAME))?;
let mut writer = BufWriter::new(output);
for (filepath, hash) in hashes {
let filename = filepath.file_name().unwrap();
let lossy = filename.to_string_lossy();
let _ = writer.write(lossy.as_bytes())?;
let _ = writer.write(b" = ")?;
let _ = writer.write(hash.as_bytes())?;
let _ = writer.write(b"\n");
}
writer.flush()?;
Ok(())
}
async fn hash_directory(path: PathBuf) -> Result<Vec<PathBuf>> {
tracing::debug!("Hashing Directory: {:?}", path);
let mut dir_contents = tokio::fs::read_dir(&path).await?;
let mut dirs = vec![];
let mut tasks = JoinSet::new();
let mut hashes: HashMap<PathBuf, String> = HashMap::new();
while let Some(entry) = dir_contents.next_entry().await? {
let path = entry.path();
let ftype = entry.file_type().await?;
if ftype.is_dir() {
dirs.push(path);
} else if ftype.is_file() {
match path.file_name() {
Some(name) if name.to_string_lossy() == HASH_FILE_NAME => continue,
_ => tasks.spawn_blocking(move || (hash_file(&path), path)),
};
}
}
while let Some(res) = tasks.join_next().await {
let (hash, path) = res?;
hashes.insert(path, hash?);
}
spawn_blocking(move || write_hash_file(&path, hashes)).await??;
Ok(dirs)
}
async fn hash_tree(root: PathBuf) -> Result<()> {
tracing::debug!("Hashing tree: {:?}", root);
let mut dirs = VecDeque::with_capacity(16);
dirs.push_back(root);
while let Some(directory) = dirs.pop_front() {
let mut dir_entry = tokio::fs::read_dir(directory).await?;
let mut dir_entry = tokio::fs::read_dir(&directory).await?;
while let Some(entry) = dir_entry.next_entry().await? {
let path = entry.path();
if path.is_dir() {
dirs.push_back(path);
} else {
paths.push(path);
let ftype = entry.file_type().await?;
if ftype.is_dir() {
let subdirs = hash_directory(path).await?;
dirs.extend(subdirs);
}
}
}
Ok(paths)
Ok(())
}
#[tokio::main]
async fn main() -> Result<()> {
tracing_subscriber::fmt::init();
let _ = color_eyre::install();
let root = ".";
let files = collect_file_paths(root.into()).await?;
let cores = 64;
stream::iter(files)
.map(|path| async move {
let name = path.file_name().map(|s| s.to_owned());
let canonical = fs::canonicalize(&path).await.unwrap_or(path.clone());
let hash = hash_file(path).await;
(hash, name, canonical)
})
.buffer_unordered(cores)
.for_each(|(hash, name, path)| async move {
println!(
"Got Hash: {}, File: {:?}, Path: {:?}",
hash.unwrap(),
name,
path
);
})
.await;
hash_tree(root.into()).await?;
Ok(())
}