first working version
This commit is contained in:
1
.gitignore
vendored
Normal file
1
.gitignore
vendored
Normal file
@@ -0,0 +1 @@
|
||||
/target
|
||||
408
Cargo.lock
generated
Normal file
408
Cargo.lock
generated
Normal file
@@ -0,0 +1,408 @@
|
||||
# This file is automatically @generated by Cargo.
|
||||
# It is not intended for manual editing.
|
||||
version = 4
|
||||
|
||||
[[package]]
|
||||
name = "android_system_properties"
|
||||
version = "0.1.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311"
|
||||
dependencies = [
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "anyhow"
|
||||
version = "1.0.101"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5f0e0fee31ef5ed1ba1316088939cea399010ed7731dba877ed44aeb407a75ea"
|
||||
|
||||
[[package]]
|
||||
name = "autocfg"
|
||||
version = "1.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8"
|
||||
|
||||
[[package]]
|
||||
name = "bit-vec"
|
||||
version = "0.8.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5e764a1d40d510daf35e07be9eb06e75770908c27d411ee6c92109c9840eaaf7"
|
||||
|
||||
[[package]]
|
||||
name = "bumpalo"
|
||||
version = "3.19.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5dd9dc738b7a8311c7ade152424974d8115f2cdad61e8dab8dac9f2362298510"
|
||||
|
||||
[[package]]
|
||||
name = "cc"
|
||||
version = "1.2.56"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "aebf35691d1bfb0ac386a69bac2fde4dd276fb618cf8bf4f5318fe285e821bb2"
|
||||
dependencies = [
|
||||
"find-msvc-tools",
|
||||
"shlex",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cfg-if"
|
||||
version = "1.0.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801"
|
||||
|
||||
[[package]]
|
||||
name = "chrono"
|
||||
version = "0.4.43"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fac4744fb15ae8337dc853fee7fb3f4e48c0fbaa23d0afe49c447b4fab126118"
|
||||
dependencies = [
|
||||
"iana-time-zone",
|
||||
"js-sys",
|
||||
"num-traits",
|
||||
"wasm-bindgen",
|
||||
"windows-link",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "core-foundation-sys"
|
||||
version = "0.8.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b"
|
||||
|
||||
[[package]]
|
||||
name = "find-msvc-tools"
|
||||
version = "0.1.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582"
|
||||
|
||||
[[package]]
|
||||
name = "iana-time-zone"
|
||||
version = "0.1.65"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e31bc9ad994ba00e440a8aa5c9ef0ec67d5cb5e5cb0cc7f8b744a35b389cc470"
|
||||
dependencies = [
|
||||
"android_system_properties",
|
||||
"core-foundation-sys",
|
||||
"iana-time-zone-haiku",
|
||||
"js-sys",
|
||||
"log",
|
||||
"wasm-bindgen",
|
||||
"windows-core",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "iana-time-zone-haiku"
|
||||
version = "0.1.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f"
|
||||
dependencies = [
|
||||
"cc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "js-sys"
|
||||
version = "0.3.85"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8c942ebf8e95485ca0d52d97da7c5a2c387d0e7f0ba4c35e93bfcaee045955b3"
|
||||
dependencies = [
|
||||
"once_cell",
|
||||
"wasm-bindgen",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "libc"
|
||||
version = "0.2.182"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6800badb6cb2082ffd7b6a67e6125bb39f18782f793520caee8cb8846be06112"
|
||||
|
||||
[[package]]
|
||||
name = "log"
|
||||
version = "0.4.29"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897"
|
||||
|
||||
[[package]]
|
||||
name = "murmur3"
|
||||
version = "0.5.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9252111cf132ba0929b6f8e030cac2a24b507f3a4d6db6fb2896f27b354c714b"
|
||||
|
||||
[[package]]
|
||||
name = "num"
|
||||
version = "0.4.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "35bd024e8b2ff75562e5f34e7f4905839deb4b22955ef5e73d2fea1b9813cb23"
|
||||
dependencies = [
|
||||
"num-bigint",
|
||||
"num-complex",
|
||||
"num-integer",
|
||||
"num-iter",
|
||||
"num-rational",
|
||||
"num-traits",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "num-bigint"
|
||||
version = "0.4.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a5e44f723f1133c9deac646763579fdb3ac745e418f2a7af9cd0c431da1f20b9"
|
||||
dependencies = [
|
||||
"num-integer",
|
||||
"num-traits",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "num-complex"
|
||||
version = "0.4.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "73f88a1307638156682bada9d7604135552957b7818057dcef22705b4d509495"
|
||||
dependencies = [
|
||||
"num-traits",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "num-integer"
|
||||
version = "0.1.46"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f"
|
||||
dependencies = [
|
||||
"num-traits",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "num-iter"
|
||||
version = "0.1.45"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1429034a0490724d0075ebb2bc9e875d6503c3cf69e235a8941aa757d83ef5bf"
|
||||
dependencies = [
|
||||
"autocfg",
|
||||
"num-integer",
|
||||
"num-traits",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "num-rational"
|
||||
version = "0.4.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f83d14da390562dca69fc84082e73e548e1ad308d24accdedd2720017cb37824"
|
||||
dependencies = [
|
||||
"num-bigint",
|
||||
"num-integer",
|
||||
"num-traits",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "num-traits"
|
||||
version = "0.2.19"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841"
|
||||
dependencies = [
|
||||
"autocfg",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "once_cell"
|
||||
version = "1.21.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d"
|
||||
|
||||
[[package]]
|
||||
name = "proc-macro2"
|
||||
version = "1.0.106"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934"
|
||||
dependencies = [
|
||||
"unicode-ident",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "quote"
|
||||
version = "1.0.44"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "21b2ebcf727b7760c461f091f9f0f539b77b8e87f2fd88131e7f1b433b3cece4"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "reimagine"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"bit-vec",
|
||||
"chrono",
|
||||
"murmur3",
|
||||
"num",
|
||||
"walkdir",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rustversion"
|
||||
version = "1.0.22"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d"
|
||||
|
||||
[[package]]
|
||||
name = "same-file"
|
||||
version = "1.0.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502"
|
||||
dependencies = [
|
||||
"winapi-util",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "shlex"
|
||||
version = "1.3.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
|
||||
|
||||
[[package]]
|
||||
name = "syn"
|
||||
version = "2.0.115"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6e614ed320ac28113fa64972c4262d5dbc89deacdfd00c34a3e4cea073243c12"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"unicode-ident",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "unicode-ident"
|
||||
version = "1.0.23"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "537dd038a89878be9b64dd4bd1b260315c1bb94f4d784956b81e27a088d9a09e"
|
||||
|
||||
[[package]]
|
||||
name = "walkdir"
|
||||
version = "2.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b"
|
||||
dependencies = [
|
||||
"same-file",
|
||||
"winapi-util",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "wasm-bindgen"
|
||||
version = "0.2.108"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "64024a30ec1e37399cf85a7ffefebdb72205ca1c972291c51512360d90bd8566"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"once_cell",
|
||||
"rustversion",
|
||||
"wasm-bindgen-macro",
|
||||
"wasm-bindgen-shared",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "wasm-bindgen-macro"
|
||||
version = "0.2.108"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "008b239d9c740232e71bd39e8ef6429d27097518b6b30bdf9086833bd5b6d608"
|
||||
dependencies = [
|
||||
"quote",
|
||||
"wasm-bindgen-macro-support",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "wasm-bindgen-macro-support"
|
||||
version = "0.2.108"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5256bae2d58f54820e6490f9839c49780dff84c65aeab9e772f15d5f0e913a55"
|
||||
dependencies = [
|
||||
"bumpalo",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
"wasm-bindgen-shared",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "wasm-bindgen-shared"
|
||||
version = "0.2.108"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1f01b580c9ac74c8d8f0c0e4afb04eeef2acf145458e52c03845ee9cd23e3d12"
|
||||
dependencies = [
|
||||
"unicode-ident",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "winapi-util"
|
||||
version = "0.1.11"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22"
|
||||
dependencies = [
|
||||
"windows-sys",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "windows-core"
|
||||
version = "0.62.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b8e83a14d34d0623b51dce9581199302a221863196a1dde71a7663a4c2be9deb"
|
||||
dependencies = [
|
||||
"windows-implement",
|
||||
"windows-interface",
|
||||
"windows-link",
|
||||
"windows-result",
|
||||
"windows-strings",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "windows-implement"
|
||||
version = "0.60.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "053e2e040ab57b9dc951b72c264860db7eb3b0200ba345b4e4c3b14f67855ddf"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "windows-interface"
|
||||
version = "0.59.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3f316c4a2570ba26bbec722032c4099d8c8bc095efccdc15688708623367e358"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "windows-link"
|
||||
version = "0.2.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5"
|
||||
|
||||
[[package]]
|
||||
name = "windows-result"
|
||||
version = "0.4.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7781fa89eaf60850ac3d2da7af8e5242a5ea78d1a11c49bf2910bb5a73853eb5"
|
||||
dependencies = [
|
||||
"windows-link",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "windows-strings"
|
||||
version = "0.5.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7837d08f69c77cf6b07689544538e017c1bfcf57e34b4c0ff58e6c2cd3b37091"
|
||||
dependencies = [
|
||||
"windows-link",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "windows-sys"
|
||||
version = "0.61.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc"
|
||||
dependencies = [
|
||||
"windows-link",
|
||||
]
|
||||
12
Cargo.toml
Normal file
12
Cargo.toml
Normal file
@@ -0,0 +1,12 @@
|
||||
[package]
|
||||
name = "reimagine"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
|
||||
[dependencies]
|
||||
anyhow = "1.0.101"
|
||||
bit-vec = "0.8.0"
|
||||
chrono = "0.4.43"
|
||||
murmur3 = "0.5.2"
|
||||
num = "0.4.3"
|
||||
walkdir = "2.5.0"
|
||||
5
README.md
Normal file
5
README.md
Normal file
@@ -0,0 +1,5 @@
|
||||
# Reimagine
|
||||
|
||||
## Finding file count
|
||||
|
||||
find . -type f -iname "*.jpg" | wc -l
|
||||
66
src/bloom_filter.rs
Normal file
66
src/bloom_filter.rs
Normal file
@@ -0,0 +1,66 @@
|
||||
use std::{f64, io::Cursor};
|
||||
|
||||
use bit_vec::BitVec;
|
||||
use murmur3::murmur3_32;
|
||||
|
||||
const FALSE_POSITIVE_PROB: f64 = 0.05;
|
||||
|
||||
pub struct BloomFilter {
|
||||
bit_vec: BitVec,
|
||||
bits: u32,
|
||||
|
||||
hash_count: u32,
|
||||
}
|
||||
|
||||
impl BloomFilter {
|
||||
pub fn new(expected_elems: u32) -> BloomFilter {
|
||||
let bits = BloomFilter::get_optimal_size(
|
||||
expected_elems, FALSE_POSITIVE_PROB);
|
||||
let bit_vec = BitVec::from_elem(bits as usize, false);
|
||||
|
||||
let hash_count = BloomFilter::get_hash_count(
|
||||
bits, expected_elems);
|
||||
|
||||
BloomFilter {
|
||||
bit_vec,
|
||||
bits,
|
||||
|
||||
hash_count
|
||||
}
|
||||
}
|
||||
|
||||
pub fn insert(&mut self, value: &String) {
|
||||
for i in 0..self.hash_count {
|
||||
let mut cursor = Cursor::new(value);
|
||||
let digest = murmur3_32(&mut cursor, i).unwrap() % self.bits;
|
||||
|
||||
self.bit_vec.set(digest as usize, true);
|
||||
}
|
||||
}
|
||||
|
||||
pub fn lookup(&self, value: &String) -> bool {
|
||||
for i in 0..self.hash_count {
|
||||
let mut cursor = Cursor::new(value);
|
||||
let hash = murmur3_32(&mut cursor, i).unwrap() % self.bits;
|
||||
|
||||
if self.bit_vec[hash as usize] == false { return false; }
|
||||
}
|
||||
|
||||
true
|
||||
}
|
||||
|
||||
// n is the expected amount of elements
|
||||
// p is the probability of a false positive
|
||||
fn get_optimal_size(n: u32, p: f64) -> u32 {
|
||||
-((n as f64 * p.ln()) / ((2_f64).ln()).powi(2)) as u32
|
||||
}
|
||||
|
||||
// m is the bit vector size
|
||||
// n is the number of items expected
|
||||
fn get_hash_count(m: u32, n: u32) -> u32 {
|
||||
let factor = m as f64 / n as f64;
|
||||
let log2 = (2_f64).ln();
|
||||
|
||||
(factor * log2) as u32
|
||||
}
|
||||
}
|
||||
41
src/crawler.rs
Normal file
41
src/crawler.rs
Normal file
@@ -0,0 +1,41 @@
|
||||
use std::path::PathBuf;
|
||||
use walkdir::WalkDir;
|
||||
|
||||
use crate::{bloom_filter::BloomFilter, index::Index};
|
||||
|
||||
|
||||
pub struct Crawler {
|
||||
bloom_filter: BloomFilter,
|
||||
|
||||
pub rejected: usize,
|
||||
}
|
||||
|
||||
impl Crawler {
|
||||
pub fn new(expected_elems: u32) -> Crawler {
|
||||
let bloom_filter = BloomFilter::new(expected_elems);
|
||||
|
||||
Crawler { bloom_filter, rejected: 0 }
|
||||
}
|
||||
|
||||
pub fn create_index(&mut self, starting_point: &PathBuf) -> Index {
|
||||
let mut index = Index::new();
|
||||
|
||||
for f in WalkDir::new(starting_point).into_iter().filter_map(|f| f.ok()) {
|
||||
if f.metadata().unwrap().is_file() {
|
||||
let filename = f.file_name().to_ascii_lowercase();
|
||||
let filename = filename.to_str().unwrap().to_string();
|
||||
|
||||
if !self.bloom_filter.lookup(&filename) {
|
||||
self.bloom_filter.insert(&filename);
|
||||
|
||||
index.add(filename, f.clone().into_path(), f.metadata().unwrap());
|
||||
} else {
|
||||
self.rejected += 1;
|
||||
println!("Rejected: {}", filename);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
index
|
||||
}
|
||||
}
|
||||
81
src/index.rs
Normal file
81
src/index.rs
Normal file
@@ -0,0 +1,81 @@
|
||||
use std::{collections::HashMap, fs::{self, DirEntry, Metadata}, hash::Hash, path::PathBuf};
|
||||
|
||||
use chrono::{DateTime, Datelike, Local, NaiveDate, NaiveDateTime, Utc};
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
struct Folder {
|
||||
files: HashMap<String, Vec<ImageInfo>>
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct ImageInfo {
|
||||
path: PathBuf,
|
||||
name: String,
|
||||
metadata: Metadata,
|
||||
}
|
||||
|
||||
impl ImageInfo {
|
||||
pub fn new(path: PathBuf, name: String, metadata: Metadata) -> ImageInfo {
|
||||
ImageInfo {
|
||||
path,
|
||||
name,
|
||||
metadata
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct Index {
|
||||
folders: HashMap<u32, Folder>
|
||||
}
|
||||
|
||||
impl Index {
|
||||
pub fn new() -> Index {
|
||||
Index {
|
||||
folders: HashMap::new()
|
||||
}
|
||||
}
|
||||
|
||||
pub fn add(&mut self, file_name: String, path: PathBuf, meta: Metadata) {
|
||||
let new_file = ImageInfo::new(path, file_name, meta.clone());
|
||||
let created_on: DateTime<Local> = meta.created().unwrap().into();
|
||||
|
||||
let year = created_on.year() as u32;
|
||||
let date = created_on.date_naive().to_string();
|
||||
|
||||
let folder_exists = self.folders.contains_key(&year);
|
||||
if !folder_exists {
|
||||
self.folders.insert(year, Folder::default());
|
||||
}
|
||||
|
||||
let folder = self.folders.get_mut(&year).unwrap();
|
||||
|
||||
let subfolder_exists = folder.files.contains_key(&date);
|
||||
if !subfolder_exists {
|
||||
folder.files.insert(date, vec![new_file]);
|
||||
return;
|
||||
}
|
||||
|
||||
let existing_subfolder = folder.files.get_mut(&date).unwrap();
|
||||
existing_subfolder.push(new_file);
|
||||
}
|
||||
|
||||
pub fn propagate(&self, output: PathBuf) {
|
||||
for (year, folder) in self.folders.iter() {
|
||||
for (date, files) in folder.files.iter() {
|
||||
let mut path = output.clone();
|
||||
path.push(year.to_string());
|
||||
path.push(date);
|
||||
|
||||
fs::create_dir_all(&path).expect("Pfad konnte nicht erstellt werden");
|
||||
|
||||
for file in files.iter() {
|
||||
let mut new_path = path.clone();
|
||||
new_path.push(&file.name);
|
||||
|
||||
fs::copy(&file.path, new_path).expect("Datei konnte nicht kopiert werden");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
36
src/main.rs
Normal file
36
src/main.rs
Normal file
@@ -0,0 +1,36 @@
|
||||
use std::path::PathBuf;
|
||||
use anyhow::Context;
|
||||
|
||||
use crate::{crawler::Crawler};
|
||||
|
||||
mod bloom_filter;
|
||||
mod crawler;
|
||||
mod index;
|
||||
|
||||
fn main() -> anyhow::Result<()> {
|
||||
let args = std::env::args()
|
||||
.skip(1)
|
||||
.collect::<Vec<String>>();
|
||||
|
||||
let master_path = args.get(0)
|
||||
.context("Kein Masterpfad angegeben")
|
||||
.unwrap();
|
||||
|
||||
let expected_elems = args.get(1)
|
||||
.context("Anzahl zu erwartender Dateien fehlt")
|
||||
.unwrap();
|
||||
let expected_elems = expected_elems.parse::<u32>()
|
||||
.context("Angegebene Anzahl ist keien gültige Zahl")
|
||||
.unwrap();
|
||||
|
||||
let mut crawler = Crawler::new(expected_elems);
|
||||
|
||||
let master_path = PathBuf::from(master_path);
|
||||
let index = crawler.create_index(&master_path);
|
||||
|
||||
index.propagate(PathBuf::from("output"));
|
||||
|
||||
println!("Total Rejected: {:?}", crawler.rejected);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
Reference in New Issue
Block a user