Use char-wise width instead of str-width (#12135)

## Summary This PR updates various references in the linter to compute the line-width for summing the width of each `char` in a `str` instead of computing the width of the `str` itself. Refer to #12133 for more details. fixes: #12130 ## Test Plan Add a file with null (`\0`) character which is zero-width. Run this test case on `main` to make sure it panics and switch over to this branch to make sure it doesn't panic now.
2024-07-01 18:56:27 +05:30
parent 37f260b5af
commit 5677614079
5 changed files with 11 additions and 6 deletions
--- a/crates/ruff_linter/resources/test/fixtures/pycodestyle/E501_4.py
+++ b/crates/ruff_linter/resources/test/fixtures/pycodestyle/E501_4.py
--- a/crates/ruff_linter/src/rules/isort/sorting.rs
+++ b/crates/ruff_linter/src/rules/isort/sorting.rs
@@ -3,7 +3,7 @@
 use std::{borrow::Cow, cmp::Ordering, cmp::Reverse};

 use natord;
-use unicode_width::UnicodeWidthStr;
+use unicode_width::UnicodeWidthChar;

 use ruff_python_stdlib::str;

@@ -106,7 +106,11 @@ impl<'a> ModuleKey<'a> {

        let maybe_length = (settings.length_sort
            || (settings.length_sort_straight && style == ImportStyle::Straight))
-            .then_some(name.map(str::width).unwrap_or_default() + level as usize);
+            .then_some(
+                name.map(|name| name.chars().map(|c| c.width().unwrap_or(0)).sum::<usize>())
+                    .unwrap_or_default()
+                    + level as usize,
+            );

        let distance = match level {
            0 => Distance::None,
@@ -157,7 +161,9 @@ impl<'a> MemberKey<'a> {
        let member_type = settings
            .order_by_type
            .then_some(member_type(name, settings));
-        let maybe_length = settings.length_sort.then_some(name.width());
+        let maybe_length = settings
+            .length_sort
+            .then(|| name.chars().map(|c| c.width().unwrap_or(0)).sum());
        let maybe_lowercase_name =
            (!settings.case_sensitive).then_some(NatOrdStr(maybe_lowercase(name)));
        let module_name = NatOrdStr::from(name);
--- a/crates/ruff_linter/src/rules/pycodestyle/mod.rs
+++ b/crates/ruff_linter/src/rules/pycodestyle/mod.rs
@@ -34,6 +34,7 @@ mod tests {
    #[test_case(Rule::InvalidEscapeSequence, Path::new("W605_1.py"))]
    #[test_case(Rule::LineTooLong, Path::new("E501.py"))]
    #[test_case(Rule::LineTooLong, Path::new("E501_3.py"))]
+    #[test_case(Rule::LineTooLong, Path::new("E501_4.py"))]
    #[test_case(Rule::MixedSpacesAndTabs, Path::new("E101.py"))]
    #[test_case(Rule::ModuleImportNotAtTopOfFile, Path::new("E40.py"))]
    #[test_case(Rule::ModuleImportNotAtTopOfFile, Path::new("E402_0.py"))]
--- a/crates/ruff_linter/src/rules/pycodestyle/overlong.rs
+++ b/crates/ruff_linter/src/rules/pycodestyle/overlong.rs
@@ -1,7 +1,5 @@
 use std::ops::Deref;

-use unicode_width::UnicodeWidthStr;
-
 use ruff_python_trivia::{is_pragma_comment, CommentRanges};
 use ruff_source_file::Line;
 use ruff_text_size::{TextLen, TextRange};
@@ -61,7 +59,7 @@ impl Overlong {
        // begins before the limit.
        let last_chunk = chunks.last().unwrap_or(second_chunk);
        if last_chunk.contains("://") {
-            if width.get() - last_chunk.width() <= limit.value() as usize {
+            if width.get() - measure(last_chunk, tab_size).get() <= limit.value() as usize {
                return None;
            }
        }
--- a/crates/ruff_linter/src/rules/pycodestyle/snapshots/ruff_linterrulespycodestyletestsE501_E501_4.py.snap
+++ b/crates/ruff_linter/src/rules/pycodestyle/snapshots/ruff_linterrulespycodestyletestsE501_E501_4.py.snap