Write nytehash file
This commit is contained in:
45
Cargo.lock
generated
45
Cargo.lock
generated
@@ -117,6 +117,7 @@ dependencies = [
|
||||
"thiserror",
|
||||
"tokio",
|
||||
"tracing",
|
||||
"tracing-subscriber",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -268,6 +269,12 @@ version = "0.2.177"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2874a2af47a2325c2001a6e6fad9b16a53b802102b528163885171cf92b15976"
|
||||
|
||||
[[package]]
|
||||
name = "log"
|
||||
version = "0.4.28"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "34080505efa8e45a4b816c349525ebe327ceaa8559756f0356cba97ef3bf7432"
|
||||
|
||||
[[package]]
|
||||
name = "memchr"
|
||||
version = "2.7.6"
|
||||
@@ -283,6 +290,15 @@ dependencies = [
|
||||
"adler2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "nu-ansi-term"
|
||||
version = "0.50.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7957b9740744892f114936ab4a57b3f487491bbeafaf8083688b16841a4240e5"
|
||||
dependencies = [
|
||||
"windows-sys",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "object"
|
||||
version = "0.37.3"
|
||||
@@ -366,6 +382,12 @@ version = "0.4.11"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7a2ae44ef20feb57a68b23d846850f861394c2e02dc425a50098ae8c90267589"
|
||||
|
||||
[[package]]
|
||||
name = "smallvec"
|
||||
version = "1.15.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03"
|
||||
|
||||
[[package]]
|
||||
name = "syn"
|
||||
version = "2.0.107"
|
||||
@@ -470,15 +492,29 @@ dependencies = [
|
||||
"tracing-subscriber",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tracing-log"
|
||||
version = "0.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ee855f1f400bd0e5c02d150ae5de3840039a3f54b025156404e34c23c03f47c3"
|
||||
dependencies = [
|
||||
"log",
|
||||
"once_cell",
|
||||
"tracing-core",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tracing-subscriber"
|
||||
version = "0.3.20"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2054a14f5307d601f88daf0553e1cbf472acc4f2c51afab632431cdcd72124d5"
|
||||
dependencies = [
|
||||
"nu-ansi-term",
|
||||
"sharded-slab",
|
||||
"smallvec",
|
||||
"thread_local",
|
||||
"tracing-core",
|
||||
"tracing-log",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -510,3 +546,12 @@ name = "windows-link"
|
||||
version = "0.2.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5"
|
||||
|
||||
[[package]]
|
||||
name = "windows-sys"
|
||||
version = "0.61.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc"
|
||||
dependencies = [
|
||||
"windows-link",
|
||||
]
|
||||
|
||||
@@ -12,3 +12,4 @@ sha2 = "0.10.9"
|
||||
thiserror = "2.0.17"
|
||||
tokio = { version = "1.48.0", features = ["rt-multi-thread", "macros", "fs", "io-std", "io-util"] }
|
||||
tracing = "0.1.41"
|
||||
tracing-subscriber = "0.3.20"
|
||||
|
||||
129
src/main.rs
129
src/main.rs
@@ -1,82 +1,113 @@
|
||||
use futures::{StreamExt, stream};
|
||||
use std::{
|
||||
collections::VecDeque,
|
||||
collections::{HashMap, VecDeque},
|
||||
fs::File,
|
||||
io::{BufReader, Read},
|
||||
io::{BufReader, BufWriter, Read, Write},
|
||||
path::{Path, PathBuf},
|
||||
};
|
||||
|
||||
use tokio::{fs, task};
|
||||
use tokio::task::{JoinSet, spawn_blocking};
|
||||
|
||||
use anyhow::Result;
|
||||
use sha2::{Digest, Sha256};
|
||||
|
||||
const BUF_SIZE: usize = 256 * 1024;
|
||||
const HASH_FILE_NAME: &str = ".nytehash";
|
||||
|
||||
async fn hash_file(path: PathBuf) -> Result<String> {
|
||||
task::spawn_blocking(move || -> Result<String> {
|
||||
let file = File::open(&path)?;
|
||||
let mut reader = BufReader::with_capacity(BUF_SIZE, file);
|
||||
let mut hasher = Sha256::new();
|
||||
let mut buffer = vec![0u8; BUF_SIZE];
|
||||
fn hash_file(path: &Path) -> Result<String> {
|
||||
tracing::debug!("Hashing File: {:?}", path);
|
||||
|
||||
loop {
|
||||
let n = reader.read(&mut buffer)?;
|
||||
if n == 0 {
|
||||
break;
|
||||
}
|
||||
hasher.update(&buffer[..n]);
|
||||
let file = File::open(&path)?;
|
||||
let mut reader = BufReader::with_capacity(BUF_SIZE, file);
|
||||
let mut hasher = Sha256::new();
|
||||
let mut buffer = vec![0u8; BUF_SIZE];
|
||||
|
||||
loop {
|
||||
let n = reader.read(&mut buffer)?;
|
||||
if n == 0 {
|
||||
break;
|
||||
}
|
||||
hasher.update(&buffer[..n]);
|
||||
}
|
||||
|
||||
Ok(hex::encode(hasher.finalize()))
|
||||
})
|
||||
.await?
|
||||
Ok(hex::encode(hasher.finalize()))
|
||||
}
|
||||
|
||||
async fn collect_file_paths(root: PathBuf) -> Result<Vec<PathBuf>> {
|
||||
let mut paths = Vec::new();
|
||||
fn write_hash_file(path: &Path, hashes: HashMap<PathBuf, String>) -> Result<()> {
|
||||
let output = File::create(path.join(HASH_FILE_NAME))?;
|
||||
let mut writer = BufWriter::new(output);
|
||||
|
||||
for (filepath, hash) in hashes {
|
||||
let filename = filepath.file_name().unwrap();
|
||||
let lossy = filename.to_string_lossy();
|
||||
let _ = writer.write(lossy.as_bytes())?;
|
||||
let _ = writer.write(b" = ")?;
|
||||
let _ = writer.write(hash.as_bytes())?;
|
||||
let _ = writer.write(b"\n");
|
||||
}
|
||||
|
||||
writer.flush()?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn hash_directory(path: PathBuf) -> Result<Vec<PathBuf>> {
|
||||
tracing::debug!("Hashing Directory: {:?}", path);
|
||||
|
||||
let mut dir_contents = tokio::fs::read_dir(&path).await?;
|
||||
let mut dirs = vec![];
|
||||
let mut tasks = JoinSet::new();
|
||||
let mut hashes: HashMap<PathBuf, String> = HashMap::new();
|
||||
|
||||
while let Some(entry) = dir_contents.next_entry().await? {
|
||||
let path = entry.path();
|
||||
let ftype = entry.file_type().await?;
|
||||
if ftype.is_dir() {
|
||||
dirs.push(path);
|
||||
} else if ftype.is_file() {
|
||||
match path.file_name() {
|
||||
Some(name) if name.to_string_lossy() == HASH_FILE_NAME => continue,
|
||||
_ => tasks.spawn_blocking(move || (hash_file(&path), path)),
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
while let Some(res) = tasks.join_next().await {
|
||||
let (hash, path) = res?;
|
||||
hashes.insert(path, hash?);
|
||||
}
|
||||
|
||||
spawn_blocking(move || write_hash_file(&path, hashes)).await??;
|
||||
|
||||
Ok(dirs)
|
||||
}
|
||||
|
||||
async fn hash_tree(root: PathBuf) -> Result<()> {
|
||||
tracing::debug!("Hashing tree: {:?}", root);
|
||||
|
||||
let mut dirs = VecDeque::with_capacity(16);
|
||||
dirs.push_back(root);
|
||||
|
||||
while let Some(directory) = dirs.pop_front() {
|
||||
let mut dir_entry = tokio::fs::read_dir(directory).await?;
|
||||
let mut dir_entry = tokio::fs::read_dir(&directory).await?;
|
||||
while let Some(entry) = dir_entry.next_entry().await? {
|
||||
let path = entry.path();
|
||||
|
||||
if path.is_dir() {
|
||||
dirs.push_back(path);
|
||||
} else {
|
||||
paths.push(path);
|
||||
let ftype = entry.file_type().await?;
|
||||
if ftype.is_dir() {
|
||||
let subdirs = hash_directory(path).await?;
|
||||
dirs.extend(subdirs);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(paths)
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> Result<()> {
|
||||
tracing_subscriber::fmt::init();
|
||||
let _ = color_eyre::install();
|
||||
|
||||
let root = ".";
|
||||
let files = collect_file_paths(root.into()).await?;
|
||||
let cores = 64;
|
||||
|
||||
stream::iter(files)
|
||||
.map(|path| async move {
|
||||
let name = path.file_name().map(|s| s.to_owned());
|
||||
let canonical = fs::canonicalize(&path).await.unwrap_or(path.clone());
|
||||
let hash = hash_file(path).await;
|
||||
(hash, name, canonical)
|
||||
})
|
||||
.buffer_unordered(cores)
|
||||
.for_each(|(hash, name, path)| async move {
|
||||
println!(
|
||||
"Got Hash: {}, File: {:?}, Path: {:?}",
|
||||
hash.unwrap(),
|
||||
name,
|
||||
path
|
||||
);
|
||||
})
|
||||
.await;
|
||||
|
||||
hash_tree(root.into()).await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user