From 82410524d9612f11387c2675a03869d489bb97ef Mon Sep 17 00:00:00 2001 From: Silvano Cerza <3314350+silvanocerza@users.noreply.github.com> Date: Wed, 2 Aug 2023 23:32:43 +0200 Subject: [PATCH] [`pylint`] Implement Pylint `bad-format-character` (`E1300`) (#6171) ## Summary Relates to #970. Add new `bad-format-character` Pylint rule. I had to make a change in `crates/ruff_python_literal/src/format.rs` to get a more detailed error in case the format character is not correct. I chose to do this since most of the format spec parsing functions are private. It would have required me reimplementing most of the parsing logic just to know if the format char was correct. This PR also doesn't reflect current Pylint functionality in two ways. It supports new format strings correctly, Pylint as of now doesn't. See pylint-dev/pylint#6085. In case there are multiple adjacent string literals delimited by whitespace the index of the wrong format char will relative to the single string. Pylint will instead reported it relative to the concatenated string. Given this: ``` "%s" "%z" % ("hello", "world") ``` Ruff will report this: ```Unsupported format character 'z' (0x7a) at index 1``` Pylint instead: ```Unsupported format character 'z' (0x7a) at index 3``` I believe it's more sensible to report the index relative to the individual string. ## Test Plan Added new snapshot and a small test in `crates/ruff_python_literal/src/format.rs`. --------- Co-authored-by: Charlie Marsh --- .../pylint/bad_string_format_character.py | 29 +++++ .../src/checkers/ast/analyze/expression.rs | 11 ++ crates/ruff/src/codes.rs | 1 + crates/ruff/src/rules/pylint/mod.rs | 6 +- .../rules/bad_string_format_character.rs | 108 ++++++++++++++++++ crates/ruff/src/rules/pylint/rules/mod.rs | 2 + ...LE1300_bad_string_format_character.py.snap | 44 +++++++ crates/ruff_python_literal/src/format.rs | 24 +++- ruff.schema.json | 1 + 9 files changed, 221 insertions(+), 5 deletions(-) create mode 100644 crates/ruff/resources/test/fixtures/pylint/bad_string_format_character.py create mode 100644 crates/ruff/src/rules/pylint/rules/bad_string_format_character.rs create mode 100644 crates/ruff/src/rules/pylint/snapshots/ruff__rules__pylint__tests__PLE1300_bad_string_format_character.py.snap diff --git a/crates/ruff/resources/test/fixtures/pylint/bad_string_format_character.py b/crates/ruff/resources/test/fixtures/pylint/bad_string_format_character.py new file mode 100644 index 0000000000..4d668418d4 --- /dev/null +++ b/crates/ruff/resources/test/fixtures/pylint/bad_string_format_character.py @@ -0,0 +1,29 @@ +# pylint: disable=missing-docstring,consider-using-f-string, pointless-statement + +## Old style formatting + +"%s %z" % ("hello", "world") # [bad-format-character] + +"%s" "%z" % ("hello", "world") # [bad-format-character] + +"""%s %z""" % ("hello", "world") # [bad-format-character] + +"""%s""" """%z""" % ("hello", "world") # [bad-format-character] + +## New style formatting + +"{:s} {:y}".format("hello", "world") # [bad-format-character] + +"{:*^30s}".format("centered") + +## f-strings + +H, W = "hello", "world" +f"{H} {W}" +f"{H:s} {W:z}" # [bad-format-character] + +f"{1:z}" # [bad-format-character] + +## False negatives + +print(("%" "z") % 1) diff --git a/crates/ruff/src/checkers/ast/analyze/expression.rs b/crates/ruff/src/checkers/ast/analyze/expression.rs index 38a2638d84..922ff248f2 100644 --- a/crates/ruff/src/checkers/ast/analyze/expression.rs +++ b/crates/ruff/src/checkers/ast/analyze/expression.rs @@ -419,6 +419,14 @@ pub(crate) fn expression(expr: &Expr, checker: &mut Checker) { } } } + + if checker.enabled(Rule::BadStringFormatCharacter) { + pylint::rules::bad_string_format_character::call( + checker, + val.as_str(), + location, + ); + } } } } @@ -1025,6 +1033,9 @@ pub(crate) fn expression(expr: &Expr, checker: &mut Checker) { checker.locator, ); } + if checker.enabled(Rule::BadStringFormatCharacter) { + pylint::rules::bad_string_format_character::percent(checker, expr); + } if checker.enabled(Rule::BadStringFormatType) { pylint::rules::bad_string_format_type(checker, expr, right); } diff --git a/crates/ruff/src/codes.rs b/crates/ruff/src/codes.rs index 64e795cde8..4bbc0cbc9f 100644 --- a/crates/ruff/src/codes.rs +++ b/crates/ruff/src/codes.rs @@ -191,6 +191,7 @@ pub fn code_to_rule(linter: Linter, code: &str) -> Option<(RuleGroup, Rule)> { (Pylint, "E1142") => (RuleGroup::Unspecified, rules::pylint::rules::AwaitOutsideAsync), (Pylint, "E1205") => (RuleGroup::Unspecified, rules::pylint::rules::LoggingTooManyArgs), (Pylint, "E1206") => (RuleGroup::Unspecified, rules::pylint::rules::LoggingTooFewArgs), + (Pylint, "E1300") => (RuleGroup::Unspecified, rules::pylint::rules::BadStringFormatCharacter), (Pylint, "E1307") => (RuleGroup::Unspecified, rules::pylint::rules::BadStringFormatType), (Pylint, "E1310") => (RuleGroup::Unspecified, rules::pylint::rules::BadStrStripCall), (Pylint, "E1507") => (RuleGroup::Unspecified, rules::pylint::rules::InvalidEnvvarValue), diff --git a/crates/ruff/src/rules/pylint/mod.rs b/crates/ruff/src/rules/pylint/mod.rs index 4e0a8f7e1b..fbe0c1abe3 100644 --- a/crates/ruff/src/rules/pylint/mod.rs +++ b/crates/ruff/src/rules/pylint/mod.rs @@ -18,8 +18,12 @@ mod tests { use crate::settings::Settings; use crate::test::test_path; - #[test_case(Rule::AwaitOutsideAsync, Path::new("await_outside_async.py"))] #[test_case(Rule::AssertOnStringLiteral, Path::new("assert_on_string_literal.py"))] + #[test_case(Rule::AwaitOutsideAsync, Path::new("await_outside_async.py"))] + #[test_case( + Rule::BadStringFormatCharacter, + Path::new("bad_string_format_character.py") + )] #[test_case(Rule::BadStrStripCall, Path::new("bad_str_strip_call.py"))] #[test_case(Rule::BadStringFormatType, Path::new("bad_string_format_type.py"))] #[test_case(Rule::BidirectionalUnicode, Path::new("bidirectional_unicode.py"))] diff --git a/crates/ruff/src/rules/pylint/rules/bad_string_format_character.rs b/crates/ruff/src/rules/pylint/rules/bad_string_format_character.rs new file mode 100644 index 0000000000..39fd1c4b34 --- /dev/null +++ b/crates/ruff/src/rules/pylint/rules/bad_string_format_character.rs @@ -0,0 +1,108 @@ +use std::str::FromStr; + +use ruff_diagnostics::{Diagnostic, Violation}; +use ruff_macros::{derive_message_formats, violation}; +use ruff_python_ast::str::{leading_quote, trailing_quote}; +use ruff_python_ast::{Expr, Ranged}; +use ruff_python_literal::{ + cformat::{CFormatErrorType, CFormatString}, + format::FormatPart, + format::FromTemplate, + format::{FormatSpec, FormatSpecError, FormatString}, +}; +use ruff_python_parser::{lexer, Mode}; +use ruff_text_size::TextRange; + +use crate::checkers::ast::Checker; + +/// ## What it does +/// Checks for unsupported format types in format strings. +/// +/// ## Why is this bad? +/// The format string is not checked at compile time, so it is easy to +/// introduce bugs by mistyping the format string. +/// +/// ## Example +/// ```python +/// # `z` is not a valid format type. +/// print("%z" % "1") +/// +/// print("{:z}".format("1")) +/// ``` +#[violation] +pub struct BadStringFormatCharacter { + format_char: char, +} + +impl Violation for BadStringFormatCharacter { + #[derive_message_formats] + fn message(&self) -> String { + format!("Unsupported format character '{}'", self.format_char) + } +} + +/// Ex) `"{:z}".format("1")` +pub(crate) fn call(checker: &mut Checker, string: &str, range: TextRange) { + if let Ok(format_string) = FormatString::from_str(string) { + for part in &format_string.format_parts { + let FormatPart::Field { format_spec, .. } = part else { + continue; + }; + + if let Err(FormatSpecError::InvalidFormatType) = FormatSpec::parse(format_spec) { + checker.diagnostics.push(Diagnostic::new( + BadStringFormatCharacter { + // The format type character is always the last one. + // More info in the official spec: + // https://docs.python.org/3/library/string.html#format-specification-mini-language + format_char: format_spec.chars().last().unwrap(), + }, + range, + )); + } + } + } +} + +/// Ex) `"%z" % "1"` +pub(crate) fn percent(checker: &mut Checker, expr: &Expr) { + // Grab each string segment (in case there's an implicit concatenation). + let mut strings: Vec = vec![]; + for (tok, range) in lexer::lex_starts_at( + checker.locator().slice(expr.range()), + Mode::Module, + expr.start(), + ) + .flatten() + { + if tok.is_string() { + strings.push(range); + } else if tok.is_percent() { + // Break as soon as we find the modulo symbol. + break; + } + } + + // If there are no string segments, abort. + if strings.is_empty() { + return; + } + + for range in &strings { + let string = checker.locator().slice(*range); + let (Some(leader), Some(trailer)) = (leading_quote(string), trailing_quote(string)) else { + return; + }; + let string = &string[leader.len()..string.len() - trailer.len()]; + + // Parse the format string (e.g. `"%s"`) into a list of `PercentFormat`. + if let Err(format_error) = CFormatString::from_str(string) { + if let CFormatErrorType::UnsupportedFormatChar(format_char) = format_error.typ { + checker.diagnostics.push(Diagnostic::new( + BadStringFormatCharacter { format_char }, + expr.range(), + )); + } + }; + } +} diff --git a/crates/ruff/src/rules/pylint/rules/mod.rs b/crates/ruff/src/rules/pylint/rules/mod.rs index 1cc36b2293..bfd44d030e 100644 --- a/crates/ruff/src/rules/pylint/rules/mod.rs +++ b/crates/ruff/src/rules/pylint/rules/mod.rs @@ -1,6 +1,7 @@ pub(crate) use assert_on_string_literal::*; pub(crate) use await_outside_async::*; pub(crate) use bad_str_strip_call::*; +pub(crate) use bad_string_format_character::BadStringFormatCharacter; pub(crate) use bad_string_format_type::*; pub(crate) use bidirectional_unicode::*; pub(crate) use binary_op_exception::*; @@ -55,6 +56,7 @@ pub(crate) use yield_in_init::*; mod assert_on_string_literal; mod await_outside_async; mod bad_str_strip_call; +pub(crate) mod bad_string_format_character; mod bad_string_format_type; mod bidirectional_unicode; mod binary_op_exception; diff --git a/crates/ruff/src/rules/pylint/snapshots/ruff__rules__pylint__tests__PLE1300_bad_string_format_character.py.snap b/crates/ruff/src/rules/pylint/snapshots/ruff__rules__pylint__tests__PLE1300_bad_string_format_character.py.snap new file mode 100644 index 0000000000..fc8bb36435 --- /dev/null +++ b/crates/ruff/src/rules/pylint/snapshots/ruff__rules__pylint__tests__PLE1300_bad_string_format_character.py.snap @@ -0,0 +1,44 @@ +--- +source: crates/ruff/src/rules/pylint/mod.rs +--- +bad_string_format_character.py:5:1: PLE1300 Unsupported format character 'z' + | +3 | ## Old style formatting +4 | +5 | "%s %z" % ("hello", "world") # [bad-format-character] + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ PLE1300 +6 | +7 | "%s" "%z" % ("hello", "world") # [bad-format-character] + | + +bad_string_format_character.py:7:1: PLE1300 Unsupported format character 'z' + | +5 | "%s %z" % ("hello", "world") # [bad-format-character] +6 | +7 | "%s" "%z" % ("hello", "world") # [bad-format-character] + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ PLE1300 +8 | +9 | """%s %z""" % ("hello", "world") # [bad-format-character] + | + +bad_string_format_character.py:9:1: PLE1300 Unsupported format character 'z' + | + 7 | "%s" "%z" % ("hello", "world") # [bad-format-character] + 8 | + 9 | """%s %z""" % ("hello", "world") # [bad-format-character] + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ PLE1300 +10 | +11 | """%s""" """%z""" % ("hello", "world") # [bad-format-character] + | + +bad_string_format_character.py:11:1: PLE1300 Unsupported format character 'z' + | + 9 | """%s %z""" % ("hello", "world") # [bad-format-character] +10 | +11 | """%s""" """%z""" % ("hello", "world") # [bad-format-character] + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ PLE1300 +12 | +13 | ## New style formatting + | + + diff --git a/crates/ruff_python_literal/src/format.rs b/crates/ruff_python_literal/src/format.rs index 7ea57c05b9..43565966f2 100644 --- a/crates/ruff_python_literal/src/format.rs +++ b/crates/ruff_python_literal/src/format.rs @@ -180,6 +180,7 @@ impl FormatParse for FormatType { Some('g') => (Some(Self::GeneralFormat(Case::Lower)), chars.as_str()), Some('G') => (Some(Self::GeneralFormat(Case::Upper)), chars.as_str()), Some('%') => (Some(Self::Percentage), chars.as_str()), + Some(_) => (None, chars.as_str()), _ => (None, text), } } @@ -283,10 +284,20 @@ impl FormatSpec { let (width, text) = parse_number(text)?; let (grouping_option, text) = FormatGrouping::parse(text); let (precision, text) = parse_precision(text)?; - let (format_type, text) = FormatType::parse(text); - if !text.is_empty() { - return Err(FormatSpecError::InvalidFormatSpecifier); - } + let (format_type, _text) = if text.is_empty() { + (None, text) + } else { + // If there's any remaining text, we should yield a valid format type and consume it + // all. + let (format_type, text) = FormatType::parse(text); + if format_type.is_none() { + return Err(FormatSpecError::InvalidFormatType); + } + if !text.is_empty() { + return Err(FormatSpecError::InvalidFormatSpecifier); + } + (format_type, text) + }; if zero && fill.is_none() { fill.replace('0'); @@ -724,6 +735,7 @@ pub enum FormatSpecError { DecimalDigitsTooMany, PrecisionTooBig, InvalidFormatSpecifier, + InvalidFormatType, UnspecifiedFormat(char, char), UnknownFormatCode(char, &'static str), PrecisionNotAllowed, @@ -1275,6 +1287,10 @@ mod tests { FormatSpec::parse("d "), Err(FormatSpecError::InvalidFormatSpecifier) ); + assert_eq!( + FormatSpec::parse("z"), + Err(FormatSpecError::InvalidFormatType) + ); } #[test] diff --git a/ruff.schema.json b/ruff.schema.json index 96ff8fe0a8..802011030a 100644 --- a/ruff.schema.json +++ b/ruff.schema.json @@ -2203,6 +2203,7 @@ "PLE1206", "PLE13", "PLE130", + "PLE1300", "PLE1307", "PLE131", "PLE1310",