Skip to content

Commit

Permalink
Merge pull request #9 from nickspring/remove-float-rounding
Browse files Browse the repository at this point in the history
fix: Tests were fixed (disabled previously by mistake) + feat: round_…
  • Loading branch information
nickspring authored Sep 26, 2023
2 parents 74ea353 + e6fbe89 commit 4d9fd71
Show file tree
Hide file tree
Showing 10 changed files with 25 additions and 44 deletions.
3 changes: 1 addition & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,7 @@ repository = "https://github.com/nickspring/charset-normalizer-rs"
keywords = ["encoding", "charset", "detector", "conversion", "normalizer"]
categories = ["encoding", "internationalization", "localization"]
exclude = [
"/tests/**",
"/src/tests/**",
"/src/tests/data/**",
"/CONTRIBUTING.md",
"/CODE_OF_CONDUCT.md",
"/.github/**",
Expand Down
4 changes: 2 additions & 2 deletions src/cd.rs
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,7 @@ pub(crate) fn merge_coherence_ratios(results: &Vec<CoherenceMatches>) -> Coheren
.iter()
.map(|(&lang, scores)| CoherenceMatch {
language: lang,
score: round_float(scores.iter().sum::<f32>() / (scores.len() as f32), 4),
score: scores.iter().sum::<f32>() / (scores.len() as f32),
})
.collect();

Expand Down Expand Up @@ -265,7 +265,7 @@ pub(crate) fn coherence_ratio(

results.push(CoherenceMatch {
language,
score: round_float(ratio, 4),
score: ratio,
});

if sufficient_match_count >= 3 {
Expand Down
10 changes: 5 additions & 5 deletions src/entity.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

use crate::cd::{encoding_languages, mb_encoding_languages};
use crate::consts::{IANA_SUPPORTED_ALIASES, TOO_BIG_SEQUENCE};
use crate::utils::{decode, iana_name, is_multi_byte_encoding, range_scan, round_float};
use crate::utils::{decode, iana_name, is_multi_byte_encoding, range_scan};
use clap::Parser;
use encoding::DecoderTrap;
use ordered_float::OrderedFloat;
Expand Down Expand Up @@ -269,11 +269,11 @@ impl CharsetMatch {
}
// Return chaos in percents with rounding
pub fn chaos_percents(&self) -> f32 {
round_float(self.chaos() * 100.0, 3)
self.chaos() * 100.0
}
// Return coherence in percents with rounding
pub fn coherence_percents(&self) -> f32 {
round_float(self.coherence() * 100.0, 3)
self.coherence() * 100.0
}
// Most relevant language coherence
pub fn coherence(&self) -> f32 {
Expand Down Expand Up @@ -534,9 +534,9 @@ pub struct CLINormalizerResult {
/// Does it has SIG or BOM mark?
pub has_sig_or_bom: bool,
/// Chaos (mess) level
pub chaos: f32,
pub chaos: String,
/// Coherence (language detection) level
pub coherence: f32,
pub coherence: String,
/// Path to decoded data
pub unicode_path: Option<PathBuf>,
pub is_preferred: bool,
Expand Down
5 changes: 3 additions & 2 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ use crate::entity::{CharsetMatch, CharsetMatches, CoherenceMatches, NormalizerSe
use crate::md::mess_ratio;
use crate::utils::{
any_specified_encoding, decode, iana_name, identify_sig_or_bom, is_cp_similar,
is_multi_byte_encoding, round_float, should_strip_sig_or_bom,
is_multi_byte_encoding, should_strip_sig_or_bom,
};
use encoding::DecoderTrap;
use log::{debug, trace};
Expand All @@ -149,6 +149,7 @@ mod cd;
pub mod consts;
pub mod entity;
mod md;
mod tests;
pub mod utils;

// Given a raw bytes sequence, return the best possibles charset usable to render str objects.
Expand Down Expand Up @@ -491,7 +492,7 @@ pub fn from_bytes(bytes: &Vec<u8>, settings: Option<NormalizerSettings>) -> Char
} else {
md_ratios.iter().sum::<f32>() / (md_ratios.len() as f32)
};
let mean_mess_ratio_percent = round_float(mean_mess_ratio * 100.0, 3);
let mean_mess_ratio_percent = mean_mess_ratio * 100.0;

if mean_mess_ratio >= *settings.threshold || early_stop_count >= max_chunk_gave_up {
tested_but_soft_failure.push(encoding_iana);
Expand Down
4 changes: 2 additions & 2 deletions src/md.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use crate::consts::COMMON_SAFE_ASCII_CHARACTERS;
use crate::utils::{
is_accentuated, is_ascii, is_case_variable, is_cjk, is_emoticon, is_hangul, is_hiragana,
is_katakana, is_latin, is_punctuation, is_separator, is_suspiciously_successive_range,
is_symbol, is_thai, is_unprintable, remove_accent, round_float, unicode_range,
is_symbol, is_thai, is_unprintable, remove_accent, unicode_range,
};
use cache_macro_stable_rust::cache;
use log::trace;
Expand Down Expand Up @@ -546,5 +546,5 @@ pub(crate) fn mess_ratio(
}
trace!("===");

round_float(mean_mess_ratio, 3)
mean_mess_ratio
}
8 changes: 4 additions & 4 deletions src/normalizer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,8 @@ fn normalizer(args: &CLINormalizerArgs) -> Result<i32, String> {
language: "Unknown".to_string(),
alphabets: vec![],
has_sig_or_bom: false,
chaos: 1.0,
coherence: 0.0,
chaos: format!("{:.1}", 1.0),
coherence: format!("{:.1}", 0.0),
unicode_path: None,
is_preferred: true,
});
Expand Down Expand Up @@ -80,8 +80,8 @@ fn normalizer(args: &CLINormalizerArgs) -> Result<i32, String> {
language: format!("{}", m.most_probably_language()),
alphabets: m.unicode_ranges(),
has_sig_or_bom: m.bom(),
chaos: m.chaos_percents(),
coherence: m.coherence_percents(),
chaos: format!("{:.1}", m.chaos_percents()),
coherence: format!("{:.1}", m.coherence_percents()),
unicode_path: None,
is_preferred: true,
};
Expand Down
17 changes: 6 additions & 11 deletions src/performance.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ use chardetng::EncodingDetector;
use charset_normalizer_rs::consts::CHARDET_CORRESPONDENCE;
use charset_normalizer_rs::entity::{PerformanceArgs, PerformanceResult};
use charset_normalizer_rs::from_bytes;
use charset_normalizer_rs::utils::{get_large_test_datasets, round_float};
use charset_normalizer_rs::utils::get_large_test_datasets;
use clap::Parser;
use encoding::label::encoding_from_whatwg_label;
use log::trace;
Expand Down Expand Up @@ -34,10 +34,8 @@ fn calc_stat(results: &Vec<PerformanceResult>) -> (Duration, Duration, f32) {
let num_durations = durations.len() as u32;

// Accuracy
let accuracy = round_float(
100.0 * results.iter().filter(|r| r.correct).count() as f32 / num_durations as f32,
1,
);
let accuracy =
100.0 * results.iter().filter(|r| r.correct).count() as f32 / num_durations as f32;

(total_duration, total_duration / num_durations, accuracy)
}
Expand Down Expand Up @@ -160,14 +158,11 @@ fn performance_compare(args: &PerformanceArgs) -> i32 {
} else {
// compare speed in %
println!(
" --> Faster than charset-normalizer-rs by {:?} times",
round_float(
our_total_time.as_secs_f32() / total_duration.as_secs_f32(),
1
),
" --> Faster than charset-normalizer-rs by {:.1} times",
our_total_time.as_secs_f32() / total_duration.as_secs_f32(),
);
}
println!(" --> Accuracy: {:?}%", accuracy);
println!(" --> Accuracy: {:.1}%", accuracy);
println!(" --> Total time: {:?}", total_duration);
println!(" --> Avg time: {:?}", mean_duration);
for p in [50.0, 95.0, 99.0] {
Expand Down
4 changes: 2 additions & 2 deletions src/tests/detection_base.rs
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ fn test_empty_but_with_bom_or_sig() {
&input
);
assert_eq!(
best_guess.unwrap().byte_order_mark(),
best_guess.unwrap().bom(),
true,
"The BOM/SIG property should return True. Input: {:?}",
&input
Expand Down Expand Up @@ -112,7 +112,7 @@ fn test_content_with_bom_or_sig() {
&input
);
assert_eq!(
best_guess.unwrap().byte_order_mark(),
best_guess.unwrap().bom(),
true,
"The BOM/SIG property should return True. Input: {:?}",
&input
Expand Down
8 changes: 0 additions & 8 deletions src/tests/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -303,14 +303,6 @@ fn test_is_suspiciously_successive_range() {
}
}

#[test]
fn test_round_float() {
let tests = [(11.3434343, 2, 11.34), (11.5457343, 3, 11.546)];
for test in &tests {
assert_eq!(round_float(test.0, test.1), test.2);
}
}

#[test]
fn test_decode_test() {
let tests = [
Expand Down
6 changes: 0 additions & 6 deletions src/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -405,12 +405,6 @@ fn decode_to(
}
}

// Round float to specified precision
pub fn round_float(val: f32, precision: u8) -> f32 {
let mult = 10.0f32.powf(precision as f32);
(val * mult).round() / mult
}

// Encode string to vec of bytes with specified encoding
pub fn encode(
input: &str,
Expand Down

0 comments on commit 4d9fd71

Please sign in to comment.