diff --git a/Changes.md b/Changes.md index 7f9ff32..df05304 100644 --- a/Changes.md +++ b/Changes.md @@ -1,3 +1,8 @@ +## 0.1.3 + +- When sorting a file with repeated lines with a `--locale`, the sorting order was not always + consistent, and the `--unique` flag could leave duplicates behind. + ## 0.1.2 - 2023-06-04 - Sorting is now done in parallel using [rayon](https://docs.rs/rayon/latest/rayon/). This diff --git a/src/comparer.rs b/src/comparer.rs index 56bd28a..2e697c2 100644 --- a/src/comparer.rs +++ b/src/comparer.rs @@ -444,7 +444,16 @@ fn compare_two_strings( str2: &str, ) -> Ordering { if let Some(c) = collator { - c.compare(str1, str2) + let ord = c.compare(str1, str2); + if ord != Ordering::Equal { + return ord; + } + // If the strings are equal according to the collator they may still + // be different, in which case we want to further sort them + // somehow. Otherwise they end up sorted based on their original order + // in the file, which is random and means two files containing the + // same lines in different order could be sorted differently. + str1.cmp(str2) } else if case_insensitive { str1.to_lowercase().cmp(&str2.to_lowercase()) } else { diff --git a/src/test-cases/locale-not-unique.test b/src/test-cases/locale-not-unique.test new file mode 100644 index 0000000..e342b90 --- /dev/null +++ b/src/test-cases/locale-not-unique.test @@ -0,0 +1,14 @@ +--sort text --case-insensitive --locale en-US --unique +#### +NotSorted +#### +ª +A +a +ª +A +a +#### +A +a +ª