Skip to content

Commit

Permalink
MCG
Browse files Browse the repository at this point in the history
  • Loading branch information
qarmin committed Dec 28, 2024
1 parent 96aa639 commit 31e80f2
Show file tree
Hide file tree
Showing 11 changed files with 411 additions and 131 deletions.
242 changes: 144 additions & 98 deletions Cargo.lock

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ members = [
"krokiet"
]
exclude = [
"misc/test_read_perf",
"ci_tester",
]
resolver = "2"
Expand Down
24 changes: 24 additions & 0 deletions Changelog.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,27 @@
## Version 9.0.0 - ?

### Breaking changes
- Video cache is now incompatible with previous versions, and needs to be regenerated

### Known regressions
- Crashes when using similar videos(when hashing invalid files)
- Crashes when reading exif data - reported here - https://github.com/mindeng/nom-exif/issues/created_by/qarmin

### CI

### Core
- Updated vid_dup_finder, now it is able to find similar videos shorter than 30 seconds - [#]()
- More supported jxl image formats(using built-in jxl -> image-rs converter) - [#]()
- Rotating all images by default, basin on exif orientation - [#]()
- Using resuable

### Krokiet

### GTK GUI

### CLI


## Version 8.0.0 - 11.10.2024r

### Breaking changes
Expand Down
13 changes: 13 additions & 0 deletions Diff
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
Performance comparison of using Array and Vector with specific buffer sizes for reading files from disk and calculating their hashes.
This is quite realistic scenario which also uses rayon which sometimes sometimes mess with predictability of results.
My computer have quite good CPU, but cheap Sata SSD, so results shows disk

(Time to read files and calculate hashes in parallel, smaller is better)
| Name | 250000 files ~50 KB(SSD) | 170 files 5MB-150MB(SSD) | 1 file 0.9 GB(SSD) | 6200 files 50KB - 50MB(HDD) | 1 file 671 MB(HDD) |
| --- | --- | --- | --- | --- | --- | --- |
| Array 16KB | Base | Base | Base | Base |
| Vector 16KB | 0% | 0% | 0% | 0% | 0% |
| Vector 1MB | -7% | -4% | -16% | -45% | 0% |
| Thread local Vector 1MB | -12% | -4% | -16% | -45% | 0% |

I tried to
2 changes: 1 addition & 1 deletion czkawka_core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ once_cell = "1.20"
rawloader = "0.37"
imagepipe = "0.5"
libraw-rs = { version = "0.0.4", optional = true }
jxl-oxide = { version = "0.10.0", features = ["image"] }
jxl-oxide = { version = "0.11.0", features = ["image"] }

# Checking for invalid extensions
mime_guess = "2.0"
Expand Down
1 change: 0 additions & 1 deletion czkawka_core/benches/hash_calculation_benchmark.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
use const_format::concatcp;
use criterion::{black_box, criterion_group, criterion_main, Criterion};
use czkawka_core::duplicate::{hash_calculation, DuplicateEntry, HashType};
use std::env::temp_dir;
Expand Down
3 changes: 2 additions & 1 deletion czkawka_core/src/common_cache.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ use crate::similar_images::{convert_algorithm_to_string, convert_filters_to_stri

const CACHE_VERSION: &str = "70";
const CACHE_IMAGE_VERSION: &str = "90";
const CACHE_VIDEO_VERSION: &str = "90";

pub fn get_broken_files_cache_file() -> String {
format!("cache_broken_files_{CACHE_VERSION}.bin")
Expand All @@ -30,7 +31,7 @@ pub fn get_similar_images_cache_file(hash_size: &u8, hash_alg: &HashAlg, image_f
}

pub fn get_similar_videos_cache_file() -> String {
format!("cache_similar_videos_{CACHE_VERSION}.bin")
format!("cache_similar_videos_{CACHE_VIDEO_VERSION}.bin")
}
pub fn get_similar_music_cache_file(checking_tags: bool) -> String {
if checking_tags {
Expand Down
67 changes: 40 additions & 27 deletions czkawka_core/src/duplicate.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,10 @@
use crossbeam_channel::{Receiver, Sender};
use fun_time::fun_time;
use humansize::{format_size, BINARY};
use log::debug;
use rayon::prelude::*;
use serde::{Deserialize, Serialize};
use std::cell::RefCell;
use std::collections::{BTreeMap, HashMap, HashSet};
use std::fmt::Debug;
use std::fs::File;
Expand All @@ -9,13 +16,6 @@ use std::os::unix::fs::MetadataExt;
use std::path::{Path, PathBuf};
use std::sync::atomic::Ordering;
use std::{fs, mem};

use crossbeam_channel::{Receiver, Sender};
use fun_time::fun_time;
use humansize::{format_size, BINARY};
use log::debug;
use rayon::prelude::*;
use serde::{Deserialize, Serialize};
use xxhash_rust::xxh3::Xxh3;

use crate::common::{check_if_stop_received, delete_files_custom, prepare_thread_handler_common, send_info_and_wait_for_ending_all_threads, WorkContinueStatus};
Expand All @@ -27,6 +27,10 @@ use crate::progress_data::{CurrentStage, ProgressData};

const TEMP_HARDLINK_FILE: &str = "rzeczek.rxrxrxl";

thread_local! {
static THREAD_BUFFER: RefCell<Vec<u8>> = RefCell::new(vec![0u8; 1024 * 2024]);
}

#[derive(PartialEq, Eq, Clone, Debug, Copy, Default)]
pub enum HashType {
#[default]
Expand Down Expand Up @@ -574,22 +578,24 @@ impl DuplicateFinder {
.map(|(size, vec_file_entry)| {
let mut hashmap_with_hash: BTreeMap<String, Vec<DuplicateEntry>> = Default::default();
let mut errors: Vec<String> = Vec::new();
let mut buffer = [0u8; 1024 * 32];

atomic_counter.fetch_add(vec_file_entry.len(), Ordering::Relaxed);
if check_if_stop_received(stop_receiver) {
check_was_stopped.store(true, Ordering::Relaxed);
return None;
}
for mut file_entry in vec_file_entry {
match hash_calculation(&mut buffer, &file_entry, check_type, 0) {
Ok(hash_string) => {
file_entry.hash = hash_string.clone();
hashmap_with_hash.entry(hash_string).or_default().push(file_entry);
THREAD_BUFFER.with_borrow_mut(|buffer| {
for mut file_entry in vec_file_entry {
match hash_calculation(buffer, &file_entry, check_type, 1024 * 32) {
Ok(hash_string) => {
file_entry.hash = hash_string.clone();
hashmap_with_hash.entry(hash_string).or_default().push(file_entry);
}
Err(s) => errors.push(s),
}
Err(s) => errors.push(s),
}
}
});

Some((size, hashmap_with_hash, errors))
})
.while_some()
Expand Down Expand Up @@ -781,23 +787,30 @@ impl DuplicateFinder {
.map(|(size, vec_file_entry)| {
let mut hashmap_with_hash: BTreeMap<String, Vec<DuplicateEntry>> = Default::default();
let mut errors: Vec<String> = Vec::new();
let mut buffer = [0u8; 1024 * 16];

let mut exam_stopped = false;
atomic_counter.fetch_add(vec_file_entry.len(), Ordering::Relaxed);
for mut file_entry in vec_file_entry {
if check_if_stop_received(stop_receiver) {
check_was_stopped.store(true, Ordering::Relaxed);
return None;
}

match hash_calculation(&mut buffer, &file_entry, check_type, u64::MAX) {
Ok(hash_string) => {
file_entry.hash = hash_string.clone();
hashmap_with_hash.entry(hash_string.clone()).or_default().push(file_entry);
THREAD_BUFFER.with_borrow_mut(|buffer| {
for mut file_entry in vec_file_entry {
if check_if_stop_received(stop_receiver) {
check_was_stopped.store(true, Ordering::Relaxed);
exam_stopped = true;
break;
}

match hash_calculation(buffer, &file_entry, check_type, u64::MAX) {
Ok(hash_string) => {
file_entry.hash = hash_string.clone();
hashmap_with_hash.entry(hash_string.clone()).or_default().push(file_entry);
}
Err(s) => errors.push(s),
}
Err(s) => errors.push(s),
}
});
if exam_stopped {
return None;
}

Some((size, hashmap_with_hash, errors))
})
.while_some()
Expand Down
5 changes: 2 additions & 3 deletions krokiet/ui/popup_select_results.slint
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ import { Preview } from "preview.slint";

export component PopupSelectResults inherits Rectangle {
callback show_popup();
property <[SelectModel]> model: GuiState.select_results_list;
property <[SelectModel]> model: GuiState.select_results_list;
property <length> item_height: 30px;
out property <length> item_width: 200px;
out property <length> all_items_height: item_height * model.length;
Expand All @@ -35,7 +35,6 @@ export component PopupSelectResults inherits Rectangle {
text: i.name;
height: item_height;
width: item_width;

clicked => {
Callabler.select_items(i.data);
popup_window.close();
Expand All @@ -48,4 +47,4 @@ export component PopupSelectResults inherits Rectangle {
show_popup() => {
popup_window.show();
}
}
}
11 changes: 11 additions & 0 deletions misc/test_read_perf/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
[package]
name = "test_read_perf"
version = "0.1.0"
edition = "2021"

[dependencies]
czkawka_core = { path = "../../czkawka_core" }
walkdir = "2.5.0"
humansize = "2.1.3"
rayon = "1.10.0"
strum = { version = "0.26.3", features = ["strum_macros", "derive"] }
Loading

0 comments on commit 31e80f2

Please sign in to comment.