diff --git a/Cargo.lock b/Cargo.lock index 77ac938..b90f2c2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -117,6 +117,7 @@ dependencies = [ "thiserror", "tokio", "tracing", + "tracing-subscriber", ] [[package]] @@ -268,6 +269,12 @@ version = "0.2.177" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2874a2af47a2325c2001a6e6fad9b16a53b802102b528163885171cf92b15976" +[[package]] +name = "log" +version = "0.4.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34080505efa8e45a4b816c349525ebe327ceaa8559756f0356cba97ef3bf7432" + [[package]] name = "memchr" version = "2.7.6" @@ -283,6 +290,15 @@ dependencies = [ "adler2", ] +[[package]] +name = "nu-ansi-term" +version = "0.50.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7957b9740744892f114936ab4a57b3f487491bbeafaf8083688b16841a4240e5" +dependencies = [ + "windows-sys", +] + [[package]] name = "object" version = "0.37.3" @@ -366,6 +382,12 @@ version = "0.4.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7a2ae44ef20feb57a68b23d846850f861394c2e02dc425a50098ae8c90267589" +[[package]] +name = "smallvec" +version = "1.15.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" + [[package]] name = "syn" version = "2.0.107" @@ -470,15 +492,29 @@ dependencies = [ "tracing-subscriber", ] +[[package]] +name = "tracing-log" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee855f1f400bd0e5c02d150ae5de3840039a3f54b025156404e34c23c03f47c3" +dependencies = [ + "log", + "once_cell", + "tracing-core", +] + [[package]] name = "tracing-subscriber" version = "0.3.20" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2054a14f5307d601f88daf0553e1cbf472acc4f2c51afab632431cdcd72124d5" dependencies = [ + "nu-ansi-term", "sharded-slab", + "smallvec", "thread_local", "tracing-core", + "tracing-log", ] [[package]] @@ -510,3 +546,12 @@ name = "windows-link" version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" + +[[package]] +name = "windows-sys" +version = "0.61.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc" +dependencies = [ + "windows-link", +] diff --git a/Cargo.toml b/Cargo.toml index 1dac2f1..45db498 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,3 +12,4 @@ sha2 = "0.10.9" thiserror = "2.0.17" tokio = { version = "1.48.0", features = ["rt-multi-thread", "macros", "fs", "io-std", "io-util"] } tracing = "0.1.41" +tracing-subscriber = "0.3.20" diff --git a/src/main.rs b/src/main.rs index 179e97c..c87f04e 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,82 +1,113 @@ -use futures::{StreamExt, stream}; use std::{ - collections::VecDeque, + collections::{HashMap, VecDeque}, fs::File, - io::{BufReader, Read}, + io::{BufReader, BufWriter, Read, Write}, path::{Path, PathBuf}, }; -use tokio::{fs, task}; +use tokio::task::{JoinSet, spawn_blocking}; use anyhow::Result; use sha2::{Digest, Sha256}; const BUF_SIZE: usize = 256 * 1024; +const HASH_FILE_NAME: &str = ".nytehash"; -async fn hash_file(path: PathBuf) -> Result { - task::spawn_blocking(move || -> Result { - let file = File::open(&path)?; - let mut reader = BufReader::with_capacity(BUF_SIZE, file); - let mut hasher = Sha256::new(); - let mut buffer = vec![0u8; BUF_SIZE]; +fn hash_file(path: &Path) -> Result { + tracing::debug!("Hashing File: {:?}", path); - loop { - let n = reader.read(&mut buffer)?; - if n == 0 { - break; - } - hasher.update(&buffer[..n]); + let file = File::open(&path)?; + let mut reader = BufReader::with_capacity(BUF_SIZE, file); + let mut hasher = Sha256::new(); + let mut buffer = vec![0u8; BUF_SIZE]; + + loop { + let n = reader.read(&mut buffer)?; + if n == 0 { + break; } + hasher.update(&buffer[..n]); + } - Ok(hex::encode(hasher.finalize())) - }) - .await? + Ok(hex::encode(hasher.finalize())) } -async fn collect_file_paths(root: PathBuf) -> Result> { - let mut paths = Vec::new(); +fn write_hash_file(path: &Path, hashes: HashMap) -> Result<()> { + let output = File::create(path.join(HASH_FILE_NAME))?; + let mut writer = BufWriter::new(output); + + for (filepath, hash) in hashes { + let filename = filepath.file_name().unwrap(); + let lossy = filename.to_string_lossy(); + let _ = writer.write(lossy.as_bytes())?; + let _ = writer.write(b" = ")?; + let _ = writer.write(hash.as_bytes())?; + let _ = writer.write(b"\n"); + } + + writer.flush()?; + + Ok(()) +} + +async fn hash_directory(path: PathBuf) -> Result> { + tracing::debug!("Hashing Directory: {:?}", path); + + let mut dir_contents = tokio::fs::read_dir(&path).await?; + let mut dirs = vec![]; + let mut tasks = JoinSet::new(); + let mut hashes: HashMap = HashMap::new(); + + while let Some(entry) = dir_contents.next_entry().await? { + let path = entry.path(); + let ftype = entry.file_type().await?; + if ftype.is_dir() { + dirs.push(path); + } else if ftype.is_file() { + match path.file_name() { + Some(name) if name.to_string_lossy() == HASH_FILE_NAME => continue, + _ => tasks.spawn_blocking(move || (hash_file(&path), path)), + }; + } + } + + while let Some(res) = tasks.join_next().await { + let (hash, path) = res?; + hashes.insert(path, hash?); + } + + spawn_blocking(move || write_hash_file(&path, hashes)).await??; + + Ok(dirs) +} + +async fn hash_tree(root: PathBuf) -> Result<()> { + tracing::debug!("Hashing tree: {:?}", root); + let mut dirs = VecDeque::with_capacity(16); dirs.push_back(root); while let Some(directory) = dirs.pop_front() { - let mut dir_entry = tokio::fs::read_dir(directory).await?; + let mut dir_entry = tokio::fs::read_dir(&directory).await?; while let Some(entry) = dir_entry.next_entry().await? { let path = entry.path(); - - if path.is_dir() { - dirs.push_back(path); - } else { - paths.push(path); + let ftype = entry.file_type().await?; + if ftype.is_dir() { + let subdirs = hash_directory(path).await?; + dirs.extend(subdirs); } } } - Ok(paths) + Ok(()) } #[tokio::main] async fn main() -> Result<()> { + tracing_subscriber::fmt::init(); + let _ = color_eyre::install(); + let root = "."; - let files = collect_file_paths(root.into()).await?; - let cores = 64; - - stream::iter(files) - .map(|path| async move { - let name = path.file_name().map(|s| s.to_owned()); - let canonical = fs::canonicalize(&path).await.unwrap_or(path.clone()); - let hash = hash_file(path).await; - (hash, name, canonical) - }) - .buffer_unordered(cores) - .for_each(|(hash, name, path)| async move { - println!( - "Got Hash: {}, File: {:?}, Path: {:?}", - hash.unwrap(), - name, - path - ); - }) - .await; - + hash_tree(root.into()).await?; Ok(()) }