From 6d80c79bac2b1bdbf890e597514c7c9d67fd9f45 Mon Sep 17 00:00:00 2001 From: Charlie Marsh Date: Fri, 31 Mar 2023 23:40:34 -0400 Subject: [PATCH] Combine `operations.rs` and `helpers.rs` (#3841) --- crates/ruff/src/checkers/ast/mod.rs | 16 +- .../rules/invalid_literal_comparisons.rs | 3 +- crates/ruff_python_ast/src/helpers.rs | 280 +++++++++++++++++- crates/ruff_python_ast/src/lib.rs | 1 - crates/ruff_python_ast/src/operations.rs | 279 ----------------- 5 files changed, 281 insertions(+), 298 deletions(-) delete mode 100644 crates/ruff_python_ast/src/operations.rs diff --git a/crates/ruff/src/checkers/ast/mod.rs b/crates/ruff/src/checkers/ast/mod.rs index fe20864cd0..913f651668 100644 --- a/crates/ruff/src/checkers/ast/mod.rs +++ b/crates/ruff/src/checkers/ast/mod.rs @@ -27,9 +27,7 @@ use ruff_python_ast::typing::{ match_annotated_subscript, parse_type_annotation, Callable, SubscriptKind, }; use ruff_python_ast::visitor::{walk_excepthandler, walk_pattern, Visitor}; -use ruff_python_ast::{ - branch_detection, cast, helpers, operations, str, typing, visibility, visitor, -}; +use ruff_python_ast::{branch_detection, cast, helpers, str, typing, visibility, visitor}; use ruff_python_stdlib::builtins::{BUILTINS, MAGIC_GLOBALS}; use ruff_python_stdlib::path::is_python_stub_file; @@ -187,7 +185,7 @@ where self.ctx.futures_allowed = false; if !self.ctx.seen_import_boundary && !helpers::is_assignment_to_a_dunder(stmt) - && !operations::in_nested_block(self.ctx.parents.iter().rev().map(Into::into)) + && !helpers::in_nested_block(self.ctx.parents.iter().rev().map(Into::into)) { self.ctx.seen_import_boundary = true; } @@ -1904,8 +1902,8 @@ where self.ctx.visible_scope = scope; // If any global bindings don't already exist in the global scope, add it. - let globals = operations::extract_globals(body); - for (name, stmt) in operations::extract_globals(body) { + let globals = helpers::extract_globals(body); + for (name, stmt) in helpers::extract_globals(body) { if self .ctx .global_scope() @@ -1967,7 +1965,7 @@ where self.ctx.visible_scope = scope; // If any global bindings don't already exist in the global scope, add it. - let globals = operations::extract_globals(body); + let globals = helpers::extract_globals(body); for (name, stmt) in &globals { if self .ctx @@ -4401,7 +4399,7 @@ impl<'a> Checker<'a> { return; } - if operations::is_unpacking_assignment(parent, expr) { + if helpers::is_unpacking_assignment(parent, expr) { self.add_binding( id, Binding { @@ -4504,7 +4502,7 @@ impl<'a> Checker<'a> { let ExprKind::Name { id, .. } = &expr.node else { return; }; - if operations::on_conditional_branch(&mut self.ctx.parents.iter().rev().map(Into::into)) { + if helpers::on_conditional_branch(&mut self.ctx.parents.iter().rev().map(Into::into)) { return; } diff --git a/crates/ruff/src/rules/pyflakes/rules/invalid_literal_comparisons.rs b/crates/ruff/src/rules/pyflakes/rules/invalid_literal_comparisons.rs index 0358d7af18..d616bbb7f1 100644 --- a/crates/ruff/src/rules/pyflakes/rules/invalid_literal_comparisons.rs +++ b/crates/ruff/src/rules/pyflakes/rules/invalid_literal_comparisons.rs @@ -6,7 +6,6 @@ use rustpython_parser::ast::{Cmpop, Expr}; use ruff_diagnostics::{AlwaysAutofixableViolation, Diagnostic, Edit}; use ruff_macros::{derive_message_formats, violation}; use ruff_python_ast::helpers; -use ruff_python_ast::operations::locate_cmpops; use ruff_python_ast::types::Range; use crate::checkers::ast::Checker; @@ -60,7 +59,7 @@ pub fn invalid_literal_comparison( comparators: &[Expr], location: Range, ) { - let located = Lazy::new(|| locate_cmpops(checker.locator.slice(location))); + let located = Lazy::new(|| helpers::locate_cmpops(checker.locator.slice(location))); let mut left = left; for (index, (op, right)) in izip!(ops, comparators).enumerate() { if matches!(op, Cmpop::Is | Cmpop::IsNot) diff --git a/crates/ruff_python_ast/src/helpers.rs b/crates/ruff_python_ast/src/helpers.rs index bde3d5fa28..0e9f9e1409 100644 --- a/crates/ruff_python_ast/src/helpers.rs +++ b/crates/ruff_python_ast/src/helpers.rs @@ -6,8 +6,8 @@ use once_cell::sync::Lazy; use regex::Regex; use rustc_hash::{FxHashMap, FxHashSet}; use rustpython_parser::ast::{ - Arguments, Constant, Excepthandler, ExcepthandlerKind, Expr, ExprKind, Keyword, KeywordData, - Located, Location, MatchCase, Pattern, PatternKind, Stmt, StmtKind, + Arguments, Cmpop, Constant, Excepthandler, ExcepthandlerKind, Expr, ExprKind, Keyword, + KeywordData, Located, Location, MatchCase, Pattern, PatternKind, Stmt, StmtKind, }; use rustpython_parser::{lexer, Mode, Tok}; use smallvec::{smallvec, SmallVec}; @@ -852,6 +852,38 @@ where } } +#[derive(Default)] +struct GlobalStatementVisitor<'a> { + globals: FxHashMap<&'a str, &'a Stmt>, +} + +impl<'a> Visitor<'a> for GlobalStatementVisitor<'a> { + fn visit_stmt(&mut self, stmt: &'a Stmt) { + match &stmt.node { + StmtKind::Global { names } => { + for name in names { + self.globals.insert(name, stmt); + } + } + StmtKind::FunctionDef { .. } + | StmtKind::AsyncFunctionDef { .. } + | StmtKind::ClassDef { .. } => { + // Don't recurse. + } + _ => visitor::walk_stmt(self, stmt), + } + } +} + +/// Extract a map from global name to its last-defining [`Stmt`]. +pub fn extract_globals(body: &[Stmt]) -> FxHashMap<&str, &Stmt> { + let mut visitor = GlobalStatementVisitor::default(); + for stmt in body { + visitor.visit_stmt(stmt); + } + visitor.globals +} + /// Convert a location within a file (relative to `base`) to an absolute /// position. pub fn to_absolute(relative: Location, base: Location) -> Location { @@ -1231,14 +1263,182 @@ pub fn is_logger_candidate(context: &Context, func: &Expr) -> bool { false } +/// Check if a node is parent of a conditional branch. +pub fn on_conditional_branch<'a>(parents: &mut impl Iterator) -> bool { + parents.any(|parent| { + if matches!( + parent.node, + StmtKind::If { .. } | StmtKind::While { .. } | StmtKind::Match { .. } + ) { + return true; + } + if let StmtKind::Expr { value } = &parent.node { + if matches!(value.node, ExprKind::IfExp { .. }) { + return true; + } + } + false + }) +} + +/// Check if a node is in a nested block. +pub fn in_nested_block<'a>(mut parents: impl Iterator) -> bool { + parents.any(|parent| { + matches!( + parent.node, + StmtKind::Try { .. } + | StmtKind::TryStar { .. } + | StmtKind::If { .. } + | StmtKind::With { .. } + | StmtKind::Match { .. } + ) + }) +} + +/// Check if a node represents an unpacking assignment. +pub fn is_unpacking_assignment(parent: &Stmt, child: &Expr) -> bool { + match &parent.node { + StmtKind::With { items, .. } => items.iter().any(|item| { + if let Some(optional_vars) = &item.optional_vars { + if matches!(optional_vars.node, ExprKind::Tuple { .. }) { + if any_over_expr(optional_vars, &|expr| expr == child) { + return true; + } + } + } + false + }), + StmtKind::Assign { targets, value, .. } => { + // In `(a, b) = (1, 2)`, `(1, 2)` is the target, and it is a tuple. + let value_is_tuple = matches!( + &value.node, + ExprKind::Set { .. } | ExprKind::List { .. } | ExprKind::Tuple { .. } + ); + // In `(a, b) = coords = (1, 2)`, `(a, b)` and `coords` are the targets, and + // `(a, b)` is a tuple. (We use "tuple" as a placeholder for any + // unpackable expression.) + let targets_are_tuples = targets.iter().all(|item| { + matches!( + item.node, + ExprKind::Set { .. } | ExprKind::List { .. } | ExprKind::Tuple { .. } + ) + }); + // If we're looking at `a` in `(a, b) = coords = (1, 2)`, then we should + // identify that the current expression is in a tuple. + let child_in_tuple = targets_are_tuples + || targets.iter().any(|item| { + matches!( + item.node, + ExprKind::Set { .. } | ExprKind::List { .. } | ExprKind::Tuple { .. } + ) && any_over_expr(item, &|expr| expr == child) + }); + + // If our child is a tuple, and value is not, it's always an unpacking + // expression. Ex) `x, y = tup` + if child_in_tuple && !value_is_tuple { + return true; + } + + // If our child isn't a tuple, but value is, it's never an unpacking expression. + // Ex) `coords = (1, 2)` + if !child_in_tuple && value_is_tuple { + return false; + } + + // If our target and the value are both tuples, then it's an unpacking + // expression assuming there's at least one non-tuple child. + // Ex) Given `(x, y) = coords = 1, 2`, `(x, y)` is considered an unpacking + // expression. Ex) Given `(x, y) = (a, b) = 1, 2`, `(x, y)` isn't + // considered an unpacking expression. + if child_in_tuple && value_is_tuple { + return !targets_are_tuples; + } + + false + } + _ => false, + } +} + +pub type LocatedCmpop = Located; + +/// Extract all [`Cmpop`] operators from a source code snippet, with appropriate +/// ranges. +/// +/// `RustPython` doesn't include line and column information on [`Cmpop`] nodes. +/// `CPython` doesn't either. This method iterates over the token stream and +/// re-identifies [`Cmpop`] nodes, annotating them with valid ranges. +pub fn locate_cmpops(contents: &str) -> Vec { + let mut tok_iter = lexer::lex(contents, Mode::Module).flatten().peekable(); + let mut ops: Vec = vec![]; + let mut count: usize = 0; + loop { + let Some((start, tok, end)) = tok_iter.next() else { + break; + }; + if matches!(tok, Tok::Lpar) { + count += 1; + continue; + } else if matches!(tok, Tok::Rpar) { + count -= 1; + continue; + } + if count == 0 { + match tok { + Tok::Not => { + if let Some((_, _, end)) = + tok_iter.next_if(|(_, tok, _)| matches!(tok, Tok::In)) + { + ops.push(LocatedCmpop::new(start, end, Cmpop::NotIn)); + } + } + Tok::In => { + ops.push(LocatedCmpop::new(start, end, Cmpop::In)); + } + Tok::Is => { + let op = if let Some((_, _, end)) = + tok_iter.next_if(|(_, tok, _)| matches!(tok, Tok::Not)) + { + LocatedCmpop::new(start, end, Cmpop::IsNot) + } else { + LocatedCmpop::new(start, end, Cmpop::Is) + }; + ops.push(op); + } + Tok::NotEqual => { + ops.push(LocatedCmpop::new(start, end, Cmpop::NotEq)); + } + Tok::EqEqual => { + ops.push(LocatedCmpop::new(start, end, Cmpop::Eq)); + } + Tok::GreaterEqual => { + ops.push(LocatedCmpop::new(start, end, Cmpop::GtE)); + } + Tok::Greater => { + ops.push(LocatedCmpop::new(start, end, Cmpop::Gt)); + } + Tok::LessEqual => { + ops.push(LocatedCmpop::new(start, end, Cmpop::LtE)); + } + Tok::Less => { + ops.push(LocatedCmpop::new(start, end, Cmpop::Lt)); + } + _ => {} + } + } + } + ops +} + #[cfg(test)] mod tests { use anyhow::Result; use rustpython_parser as parser; - use rustpython_parser::ast::Location; + use rustpython_parser::ast::{Cmpop, Location}; use crate::helpers::{ - elif_else_range, else_range, first_colon_range, identifier_range, match_trailing_content, + elif_else_range, else_range, first_colon_range, identifier_range, locate_cmpops, + match_trailing_content, LocatedCmpop, }; use crate::source_code::Locator; use crate::types::Range; @@ -1352,7 +1552,7 @@ class Class(): } #[test] - fn test_else_range() -> Result<()> { + fn extract_else_range() -> Result<()> { let contents = r#" for x in y: pass @@ -1372,7 +1572,7 @@ else: } #[test] - fn test_first_colon_range() { + fn extract_first_colon_range() { let contents = "with a: pass"; let locator = Locator::new(contents); let range = first_colon_range( @@ -1387,7 +1587,7 @@ else: } #[test] - fn test_elif_else_range() -> Result<()> { + fn extract_elif_else_range() -> Result<()> { let contents = " if a: ... @@ -1420,4 +1620,70 @@ else: assert_eq!(range.end_location.column(), 4); Ok(()) } + + #[test] + fn extract_cmpop_location() { + assert_eq!( + locate_cmpops("x == 1"), + vec![LocatedCmpop::new( + Location::new(1, 2), + Location::new(1, 4), + Cmpop::Eq + )] + ); + + assert_eq!( + locate_cmpops("x != 1"), + vec![LocatedCmpop::new( + Location::new(1, 2), + Location::new(1, 4), + Cmpop::NotEq + )] + ); + + assert_eq!( + locate_cmpops("x is 1"), + vec![LocatedCmpop::new( + Location::new(1, 2), + Location::new(1, 4), + Cmpop::Is + )] + ); + + assert_eq!( + locate_cmpops("x is not 1"), + vec![LocatedCmpop::new( + Location::new(1, 2), + Location::new(1, 8), + Cmpop::IsNot + )] + ); + + assert_eq!( + locate_cmpops("x in 1"), + vec![LocatedCmpop::new( + Location::new(1, 2), + Location::new(1, 4), + Cmpop::In + )] + ); + + assert_eq!( + locate_cmpops("x not in 1"), + vec![LocatedCmpop::new( + Location::new(1, 2), + Location::new(1, 8), + Cmpop::NotIn + )] + ); + + assert_eq!( + locate_cmpops("x != (1 is not 2)"), + vec![LocatedCmpop::new( + Location::new(1, 2), + Location::new(1, 4), + Cmpop::NotEq + )] + ); + } } diff --git a/crates/ruff_python_ast/src/lib.rs b/crates/ruff_python_ast/src/lib.rs index eee19941b9..ecf1416075 100644 --- a/crates/ruff_python_ast/src/lib.rs +++ b/crates/ruff_python_ast/src/lib.rs @@ -9,7 +9,6 @@ pub mod helpers; pub mod imports; pub mod logging; pub mod newlines; -pub mod operations; pub mod relocate; pub mod scope; pub mod source_code; diff --git a/crates/ruff_python_ast/src/operations.rs b/crates/ruff_python_ast/src/operations.rs deleted file mode 100644 index c37b160630..0000000000 --- a/crates/ruff_python_ast/src/operations.rs +++ /dev/null @@ -1,279 +0,0 @@ -use rustc_hash::FxHashMap; -use rustpython_parser::ast::{Cmpop, Expr, ExprKind, Located, Stmt, StmtKind}; -use rustpython_parser::{lexer, Mode, Tok}; - -use crate::helpers::any_over_expr; -use crate::visitor; -use crate::visitor::Visitor; - -#[derive(Default)] -struct GlobalVisitor<'a> { - globals: FxHashMap<&'a str, &'a Stmt>, -} - -impl<'a> Visitor<'a> for GlobalVisitor<'a> { - fn visit_stmt(&mut self, stmt: &'a Stmt) { - match &stmt.node { - StmtKind::Global { names } => { - for name in names { - self.globals.insert(name, stmt); - } - } - StmtKind::FunctionDef { .. } - | StmtKind::AsyncFunctionDef { .. } - | StmtKind::ClassDef { .. } => { - // Don't recurse. - } - _ => visitor::walk_stmt(self, stmt), - } - } -} - -/// Extract a map from global name to its last-defining [`Stmt`]. -pub fn extract_globals(body: &[Stmt]) -> FxHashMap<&str, &Stmt> { - let mut visitor = GlobalVisitor::default(); - for stmt in body { - visitor.visit_stmt(stmt); - } - visitor.globals -} - -/// Check if a node is parent of a conditional branch. -pub fn on_conditional_branch<'a>(parents: &mut impl Iterator) -> bool { - parents.any(|parent| { - if matches!( - parent.node, - StmtKind::If { .. } | StmtKind::While { .. } | StmtKind::Match { .. } - ) { - return true; - } - if let StmtKind::Expr { value } = &parent.node { - if matches!(value.node, ExprKind::IfExp { .. }) { - return true; - } - } - false - }) -} - -/// Check if a node is in a nested block. -pub fn in_nested_block<'a>(mut parents: impl Iterator) -> bool { - parents.any(|parent| { - matches!( - parent.node, - StmtKind::Try { .. } - | StmtKind::TryStar { .. } - | StmtKind::If { .. } - | StmtKind::With { .. } - | StmtKind::Match { .. } - ) - }) -} - -/// Check if a node represents an unpacking assignment. -pub fn is_unpacking_assignment(parent: &Stmt, child: &Expr) -> bool { - match &parent.node { - StmtKind::With { items, .. } => items.iter().any(|item| { - if let Some(optional_vars) = &item.optional_vars { - if matches!(optional_vars.node, ExprKind::Tuple { .. }) { - if any_over_expr(optional_vars, &|expr| expr == child) { - return true; - } - } - } - false - }), - StmtKind::Assign { targets, value, .. } => { - // In `(a, b) = (1, 2)`, `(1, 2)` is the target, and it is a tuple. - let value_is_tuple = matches!( - &value.node, - ExprKind::Set { .. } | ExprKind::List { .. } | ExprKind::Tuple { .. } - ); - // In `(a, b) = coords = (1, 2)`, `(a, b)` and `coords` are the targets, and - // `(a, b)` is a tuple. (We use "tuple" as a placeholder for any - // unpackable expression.) - let targets_are_tuples = targets.iter().all(|item| { - matches!( - item.node, - ExprKind::Set { .. } | ExprKind::List { .. } | ExprKind::Tuple { .. } - ) - }); - // If we're looking at `a` in `(a, b) = coords = (1, 2)`, then we should - // identify that the current expression is in a tuple. - let child_in_tuple = targets_are_tuples - || targets.iter().any(|item| { - matches!( - item.node, - ExprKind::Set { .. } | ExprKind::List { .. } | ExprKind::Tuple { .. } - ) && any_over_expr(item, &|expr| expr == child) - }); - - // If our child is a tuple, and value is not, it's always an unpacking - // expression. Ex) `x, y = tup` - if child_in_tuple && !value_is_tuple { - return true; - } - - // If our child isn't a tuple, but value is, it's never an unpacking expression. - // Ex) `coords = (1, 2)` - if !child_in_tuple && value_is_tuple { - return false; - } - - // If our target and the value are both tuples, then it's an unpacking - // expression assuming there's at least one non-tuple child. - // Ex) Given `(x, y) = coords = 1, 2`, `(x, y)` is considered an unpacking - // expression. Ex) Given `(x, y) = (a, b) = 1, 2`, `(x, y)` isn't - // considered an unpacking expression. - if child_in_tuple && value_is_tuple { - return !targets_are_tuples; - } - - false - } - _ => false, - } -} - -pub type LocatedCmpop = Located; - -/// Extract all [`Cmpop`] operators from a source code snippet, with appropriate -/// ranges. -/// -/// `RustPython` doesn't include line and column information on [`Cmpop`] nodes. -/// `CPython` doesn't either. This method iterates over the token stream and -/// re-identifies [`Cmpop`] nodes, annotating them with valid ranges. -pub fn locate_cmpops(contents: &str) -> Vec { - let mut tok_iter = lexer::lex(contents, Mode::Module).flatten().peekable(); - let mut ops: Vec = vec![]; - let mut count: usize = 0; - loop { - let Some((start, tok, end)) = tok_iter.next() else { - break; - }; - if matches!(tok, Tok::Lpar) { - count += 1; - continue; - } else if matches!(tok, Tok::Rpar) { - count -= 1; - continue; - } - if count == 0 { - match tok { - Tok::Not => { - if let Some((_, _, end)) = - tok_iter.next_if(|(_, tok, _)| matches!(tok, Tok::In)) - { - ops.push(LocatedCmpop::new(start, end, Cmpop::NotIn)); - } - } - Tok::In => { - ops.push(LocatedCmpop::new(start, end, Cmpop::In)); - } - Tok::Is => { - let op = if let Some((_, _, end)) = - tok_iter.next_if(|(_, tok, _)| matches!(tok, Tok::Not)) - { - LocatedCmpop::new(start, end, Cmpop::IsNot) - } else { - LocatedCmpop::new(start, end, Cmpop::Is) - }; - ops.push(op); - } - Tok::NotEqual => { - ops.push(LocatedCmpop::new(start, end, Cmpop::NotEq)); - } - Tok::EqEqual => { - ops.push(LocatedCmpop::new(start, end, Cmpop::Eq)); - } - Tok::GreaterEqual => { - ops.push(LocatedCmpop::new(start, end, Cmpop::GtE)); - } - Tok::Greater => { - ops.push(LocatedCmpop::new(start, end, Cmpop::Gt)); - } - Tok::LessEqual => { - ops.push(LocatedCmpop::new(start, end, Cmpop::LtE)); - } - Tok::Less => { - ops.push(LocatedCmpop::new(start, end, Cmpop::Lt)); - } - _ => {} - } - } - } - ops -} - -#[cfg(test)] -mod tests { - use rustpython_parser::ast::{Cmpop, Location}; - - use crate::operations::{locate_cmpops, LocatedCmpop}; - - #[test] - fn locates_cmpops() { - assert_eq!( - locate_cmpops("x == 1"), - vec![LocatedCmpop::new( - Location::new(1, 2), - Location::new(1, 4), - Cmpop::Eq - )] - ); - - assert_eq!( - locate_cmpops("x != 1"), - vec![LocatedCmpop::new( - Location::new(1, 2), - Location::new(1, 4), - Cmpop::NotEq - )] - ); - - assert_eq!( - locate_cmpops("x is 1"), - vec![LocatedCmpop::new( - Location::new(1, 2), - Location::new(1, 4), - Cmpop::Is - )] - ); - - assert_eq!( - locate_cmpops("x is not 1"), - vec![LocatedCmpop::new( - Location::new(1, 2), - Location::new(1, 8), - Cmpop::IsNot - )] - ); - - assert_eq!( - locate_cmpops("x in 1"), - vec![LocatedCmpop::new( - Location::new(1, 2), - Location::new(1, 4), - Cmpop::In - )] - ); - - assert_eq!( - locate_cmpops("x not in 1"), - vec![LocatedCmpop::new( - Location::new(1, 2), - Location::new(1, 8), - Cmpop::NotIn - )] - ); - - assert_eq!( - locate_cmpops("x != (1 is not 2)"), - vec![LocatedCmpop::new( - Location::new(1, 2), - Location::new(1, 4), - Cmpop::NotEq - )] - ); - } -}