diff --git a/crates/ruff/src/rules/pycodestyle/helpers.rs b/crates/ruff/src/rules/pycodestyle/helpers.rs index 8da9d37703..b21b4ea0ba 100644 --- a/crates/ruff/src/rules/pycodestyle/helpers.rs +++ b/crates/ruff/src/rules/pycodestyle/helpers.rs @@ -1,10 +1,7 @@ -use rustpython_parser::ast::{Cmpop, Expr, ExprKind}; -#[cfg(feature = "logical_lines")] -use rustpython_parser::Tok; -use unicode_width::UnicodeWidthStr; - use ruff_python_ast::helpers::{create_expr, unparse_expr}; use ruff_python_ast::source_code::Stylist; +use rustpython_parser::ast::{Cmpop, Expr, ExprKind}; +use unicode_width::UnicodeWidthStr; pub fn is_ambiguous_name(name: &str) -> bool { name == "l" || name == "I" || name == "O" @@ -58,179 +55,3 @@ pub fn is_overlong( true } - -#[cfg(feature = "logical_lines")] -pub const fn is_keyword_token(token: &Tok) -> bool { - matches!( - token, - Tok::False - | Tok::True - | Tok::None - | Tok::And - | Tok::As - | Tok::Assert - | Tok::Await - | Tok::Break - | Tok::Class - | Tok::Continue - | Tok::Def - | Tok::Del - | Tok::Elif - | Tok::Else - | Tok::Except - | Tok::Finally - | Tok::For - | Tok::From - | Tok::Global - | Tok::If - | Tok::Import - | Tok::In - | Tok::Is - | Tok::Lambda - | Tok::Nonlocal - | Tok::Not - | Tok::Or - | Tok::Pass - | Tok::Raise - | Tok::Return - | Tok::Try - | Tok::While - | Tok::With - | Tok::Yield - ) -} - -#[cfg(feature = "logical_lines")] -pub const fn is_singleton_token(token: &Tok) -> bool { - matches!( - token, - Tok::False { .. } | Tok::True { .. } | Tok::None { .. }, - ) -} - -#[cfg(feature = "logical_lines")] -pub const fn is_op_token(token: &Tok) -> bool { - matches!( - token, - Tok::Lpar - | Tok::Rpar - | Tok::Lsqb - | Tok::Rsqb - | Tok::Comma - | Tok::Semi - | Tok::Plus - | Tok::Minus - | Tok::Star - | Tok::Slash - | Tok::Vbar - | Tok::Amper - | Tok::Less - | Tok::Greater - | Tok::Equal - | Tok::Dot - | Tok::Percent - | Tok::Lbrace - | Tok::Rbrace - | Tok::NotEqual - | Tok::LessEqual - | Tok::GreaterEqual - | Tok::Tilde - | Tok::CircumFlex - | Tok::LeftShift - | Tok::RightShift - | Tok::DoubleStar - | Tok::PlusEqual - | Tok::MinusEqual - | Tok::StarEqual - | Tok::SlashEqual - | Tok::PercentEqual - | Tok::AmperEqual - | Tok::VbarEqual - | Tok::CircumflexEqual - | Tok::LeftShiftEqual - | Tok::RightShiftEqual - | Tok::DoubleStarEqual - | Tok::DoubleSlash - | Tok::DoubleSlashEqual - | Tok::At - | Tok::AtEqual - | Tok::Rarrow - | Tok::Ellipsis - | Tok::ColonEqual - | Tok::Colon - ) -} - -#[cfg(feature = "logical_lines")] -pub const fn is_skip_comment_token(token: &Tok) -> bool { - matches!( - token, - Tok::Newline | Tok::Indent | Tok::Dedent | Tok::NonLogicalNewline | Tok::Comment { .. } - ) -} - -#[cfg(feature = "logical_lines")] -pub const fn is_soft_keyword_token(token: &Tok) -> bool { - matches!(token, Tok::Match | Tok::Case) -} - -#[cfg(feature = "logical_lines")] -pub const fn is_arithmetic_token(token: &Tok) -> bool { - matches!( - token, - Tok::DoubleStar | Tok::Star | Tok::Plus | Tok::Minus | Tok::Slash | Tok::At - ) -} - -#[cfg(feature = "logical_lines")] -pub const fn is_ws_optional_token(token: &Tok) -> bool { - is_arithmetic_token(token) - || matches!( - token, - Tok::CircumFlex - | Tok::Amper - | Tok::Vbar - | Tok::LeftShift - | Tok::RightShift - | Tok::Percent - ) -} - -#[cfg(feature = "logical_lines")] -pub const fn is_ws_needed_token(token: &Tok) -> bool { - matches!( - token, - Tok::DoubleStarEqual - | Tok::StarEqual - | Tok::SlashEqual - | Tok::DoubleSlashEqual - | Tok::PlusEqual - | Tok::MinusEqual - | Tok::NotEqual - | Tok::Less - | Tok::Greater - | Tok::PercentEqual - | Tok::CircumflexEqual - | Tok::AmperEqual - | Tok::VbarEqual - | Tok::EqEqual - | Tok::LessEqual - | Tok::GreaterEqual - | Tok::LeftShiftEqual - | Tok::RightShiftEqual - | Tok::Equal - | Tok::And - | Tok::Or - | Tok::In - | Tok::Is - | Tok::Rarrow - ) -} - -#[cfg(feature = "logical_lines")] -pub const fn is_unary_token(token: &Tok) -> bool { - matches!( - token, - Tok::Plus | Tok::Minus | Tok::Star | Tok::DoubleStar | Tok::RightShift - ) -} diff --git a/crates/ruff/src/rules/pycodestyle/rules/logical_lines/extraneous_whitespace.rs b/crates/ruff/src/rules/pycodestyle/rules/logical_lines/extraneous_whitespace.rs index bf02c07a25..6f6816ecb3 100644 --- a/crates/ruff/src/rules/pycodestyle/rules/logical_lines/extraneous_whitespace.rs +++ b/crates/ruff/src/rules/pycodestyle/rules/logical_lines/extraneous_whitespace.rs @@ -1,10 +1,10 @@ use rustpython_parser::ast::Location; -use rustpython_parser::Tok; use super::{LogicalLine, Whitespace}; use ruff_diagnostics::DiagnosticKind; use ruff_diagnostics::Violation; use ruff_macros::{derive_message_formats, violation}; +use ruff_python_ast::token_kind::TokenKind; /// ## What it does /// Checks for the use of extraneous whitespace after "(". @@ -103,12 +103,12 @@ impl Violation for WhitespaceBeforePunctuation { /// E201, E202, E203 pub(crate) fn extraneous_whitespace(line: &LogicalLine) -> Vec<(Location, DiagnosticKind)> { let mut diagnostics = vec![]; - let mut last_token: Option<&Tok> = None; + let mut last_token: Option = None; for token in line.tokens() { let kind = token.kind(); match kind { - Tok::Lbrace | Tok::Lpar | Tok::Lsqb => { + TokenKind::Lbrace | TokenKind::Lpar | TokenKind::Lsqb => { if !matches!(line.trailing_whitespace(&token), Whitespace::None) { let end = token.end(); diagnostics.push(( @@ -117,18 +117,24 @@ pub(crate) fn extraneous_whitespace(line: &LogicalLine) -> Vec<(Location, Diagno )); } } - Tok::Rbrace | Tok::Rpar | Tok::Rsqb | Tok::Comma | Tok::Semi | Tok::Colon => { - let diagnostic_kind = if matches!(kind, Tok::Comma | Tok::Semi | Tok::Colon) { - DiagnosticKind::from(WhitespaceBeforePunctuation) - } else { - DiagnosticKind::from(WhitespaceBeforeCloseBracket) - }; + TokenKind::Rbrace + | TokenKind::Rpar + | TokenKind::Rsqb + | TokenKind::Comma + | TokenKind::Semi + | TokenKind::Colon => { + let diagnostic_kind = + if matches!(kind, TokenKind::Comma | TokenKind::Semi | TokenKind::Colon) { + DiagnosticKind::from(WhitespaceBeforePunctuation) + } else { + DiagnosticKind::from(WhitespaceBeforeCloseBracket) + }; if let (Whitespace::Single | Whitespace::Many | Whitespace::Tab, offset) = line.leading_whitespace(&token) { - let start = token.start(); - if !matches!(last_token, Some(Tok::Comma)) { + if !matches!(last_token, Some(TokenKind::Comma)) { + let start = token.start(); diagnostics.push(( Location::new(start.row(), start.column() - offset), diagnostic_kind, diff --git a/crates/ruff/src/rules/pycodestyle/rules/logical_lines/indentation.rs b/crates/ruff/src/rules/pycodestyle/rules/logical_lines/indentation.rs index 721577de98..5978c97927 100644 --- a/crates/ruff/src/rules/pycodestyle/rules/logical_lines/indentation.rs +++ b/crates/ruff/src/rules/pycodestyle/rules/logical_lines/indentation.rs @@ -1,8 +1,8 @@ use ruff_diagnostics::DiagnosticKind; use ruff_diagnostics::Violation; use ruff_macros::{derive_message_formats, violation}; +use ruff_python_ast::token_kind::TokenKind; use rustpython_parser::ast::Location; -use rustpython_parser::Tok; use super::LogicalLine; @@ -253,8 +253,8 @@ pub(crate) fn indentation( )); } let indent_expect = prev_logical_line - .and_then(|prev_logical_line| prev_logical_line.tokens().trimmed().last()) - .map_or(false, |t| t.kind() == &Tok::Colon); + .and_then(|prev_logical_line| prev_logical_line.tokens_trimmed().last()) + .map_or(false, |t| t.kind() == TokenKind::Colon); if indent_expect && indent_level <= prev_indent_level.unwrap_or(0) { diagnostics.push(( diff --git a/crates/ruff/src/rules/pycodestyle/rules/logical_lines/missing_whitespace.rs b/crates/ruff/src/rules/pycodestyle/rules/logical_lines/missing_whitespace.rs index 0cf73b3d04..e3f5d21aee 100644 --- a/crates/ruff/src/rules/pycodestyle/rules/logical_lines/missing_whitespace.rs +++ b/crates/ruff/src/rules/pycodestyle/rules/logical_lines/missing_whitespace.rs @@ -1,27 +1,36 @@ -use itertools::Itertools; -use rustpython_parser::Tok; - use super::LogicalLine; use ruff_diagnostics::Edit; use ruff_diagnostics::{AlwaysAutofixableViolation, Diagnostic}; use ruff_macros::{derive_message_formats, violation}; +use ruff_python_ast::token_kind::TokenKind; use ruff_python_ast::types::Range; #[violation] pub struct MissingWhitespace { - pub token: String, + pub token: TokenKind, +} + +impl MissingWhitespace { + fn token_text(&self) -> char { + match self.token { + TokenKind::Colon => ':', + TokenKind::Semi => ';', + TokenKind::Comma => ',', + _ => unreachable!(), + } + } } impl AlwaysAutofixableViolation for MissingWhitespace { #[derive_message_formats] fn message(&self) -> String { - let MissingWhitespace { token } = self; - format!("Missing whitespace after {token}") + let token = self.token_text(); + format!("Missing whitespace after '{token}'") } fn autofix_title(&self) -> String { - let MissingWhitespace { token } = self; - format!("Added missing whitespace after {token}") + let token = self.token_text(); + format!("Added missing whitespace after '{token}'") } } @@ -29,45 +38,47 @@ impl AlwaysAutofixableViolation for MissingWhitespace { pub(crate) fn missing_whitespace(line: &LogicalLine, autofix: bool) -> Vec { let mut diagnostics = vec![]; - let mut num_lsqb = 0u32; - let mut num_rsqb = 0u32; + let mut open_parentheses = 0u32; let mut prev_lsqb = None; let mut prev_lbrace = None; + let mut iter = line.tokens().iter().peekable(); - for (token, next_token) in line.tokens().iter().tuple_windows() { + while let Some(token) = iter.next() { let kind = token.kind(); match kind { - Tok::Lsqb => { - num_lsqb += 1; + TokenKind::Lsqb => { + open_parentheses += 1; prev_lsqb = Some(token.start()); } - Tok::Rsqb => { - num_rsqb += 1; + TokenKind::Rsqb => { + open_parentheses += 1; } - Tok::Lbrace => { + TokenKind::Lbrace => { prev_lbrace = Some(token.start()); } - Tok::Comma | Tok::Semi | Tok::Colon => { + TokenKind::Comma | TokenKind::Semi | TokenKind::Colon => { let after = line.text_after(&token); if !after.chars().next().map_or(false, char::is_whitespace) { - match (kind, next_token.kind()) { - (Tok::Colon, _) if num_lsqb > num_rsqb && prev_lsqb > prev_lbrace => { - continue; // Slice syntax, no space required + if let Some(next_token) = iter.peek() { + match (kind, next_token.kind()) { + (TokenKind::Colon, _) + if open_parentheses > 0 && prev_lsqb > prev_lbrace => + { + continue; // Slice syntax, no space required + } + (TokenKind::Comma, TokenKind::Rpar | TokenKind::Rsqb) => { + continue; // Allow tuple with only one element: (3,) + } + (TokenKind::Colon, TokenKind::Equal) => { + continue; // Allow assignment expression + } + _ => {} } - (Tok::Comma, Tok::Rpar | Tok::Rsqb) => { - continue; // Allow tuple with only one element: (3,) - } - (Tok::Colon, Tok::Equal) => { - continue; // Allow assignment expression - } - _ => {} } - let kind = MissingWhitespace { - token: kind.to_string(), - }; + let kind = MissingWhitespace { token: kind }; let (start, end) = token.range(); let mut diagnostic = Diagnostic::new(kind, Range::new(start, start)); diff --git a/crates/ruff/src/rules/pycodestyle/rules/logical_lines/missing_whitespace_after_keyword.rs b/crates/ruff/src/rules/pycodestyle/rules/logical_lines/missing_whitespace_after_keyword.rs index 48eef5d855..83eee70cd2 100644 --- a/crates/ruff/src/rules/pycodestyle/rules/logical_lines/missing_whitespace_after_keyword.rs +++ b/crates/ruff/src/rules/pycodestyle/rules/logical_lines/missing_whitespace_after_keyword.rs @@ -1,13 +1,12 @@ use itertools::Itertools; use rustpython_parser::ast::Location; -use rustpython_parser::Tok; use ruff_diagnostics::DiagnosticKind; use ruff_diagnostics::Violation; use ruff_macros::{derive_message_formats, violation}; +use ruff_python_ast::token_kind::TokenKind; use super::LogicalLineTokens; -use crate::rules::pycodestyle::helpers::{is_keyword_token, is_singleton_token}; #[violation] pub struct MissingWhitespaceAfterKeyword; @@ -29,12 +28,12 @@ pub(crate) fn missing_whitespace_after_keyword( let tok0_kind = tok0.kind(); let tok1_kind = tok1.kind(); - if is_keyword_token(tok0_kind) - && !(is_singleton_token(tok0_kind) - || matches!(tok0_kind, Tok::Async | Tok::Await) - || tok0_kind == &Tok::Except && tok1_kind == &Tok::Star - || tok0_kind == &Tok::Yield && tok1_kind == &Tok::Rpar - || matches!(tok1_kind, Tok::Colon | Tok::Newline)) + if tok0_kind.is_keyword() + && !(tok0_kind.is_singleton() + || matches!(tok0_kind, TokenKind::Async | TokenKind::Await) + || tok0_kind == TokenKind::Except && tok1_kind == TokenKind::Star + || tok0_kind == TokenKind::Yield && tok1_kind == TokenKind::Rpar + || matches!(tok1_kind, TokenKind::Colon | TokenKind::Newline)) && tok0.end() == tok1.start() { diagnostics.push((tok0.end(), MissingWhitespaceAfterKeyword.into())); diff --git a/crates/ruff/src/rules/pycodestyle/rules/logical_lines/missing_whitespace_around_operator.rs b/crates/ruff/src/rules/pycodestyle/rules/logical_lines/missing_whitespace_around_operator.rs index 653ea59de9..a76ee581cf 100644 --- a/crates/ruff/src/rules/pycodestyle/rules/logical_lines/missing_whitespace_around_operator.rs +++ b/crates/ruff/src/rules/pycodestyle/rules/logical_lines/missing_whitespace_around_operator.rs @@ -1,14 +1,10 @@ use rustpython_parser::ast::Location; -use rustpython_parser::Tok; use ruff_diagnostics::DiagnosticKind; use ruff_diagnostics::Violation; use ruff_macros::{derive_message_formats, violation}; +use ruff_python_ast::token_kind::TokenKind; -use crate::rules::pycodestyle::helpers::{ - is_arithmetic_token, is_keyword_token, is_op_token, is_skip_comment_token, - is_soft_keyword_token, is_unary_token, is_ws_needed_token, is_ws_optional_token, -}; use crate::rules::pycodestyle::rules::logical_lines::LogicalLineTokens; // E225 @@ -66,29 +62,27 @@ pub(crate) fn missing_whitespace_around_operator( let mut needs_space_aux: Option = None; let mut prev_end_aux: Option = None; let mut parens = 0u32; - let mut prev_type: Option<&Tok> = None; + let mut prev_type: Option = None; let mut prev_end: Option = None; for token in tokens { let kind = token.kind(); - if is_skip_comment_token(kind) { + if kind.is_skip_comment() { continue; } + match kind { - Tok::Lpar | Tok::Lambda => parens += 1, - Tok::Rpar => parens -= 1, + TokenKind::Lpar | TokenKind::Lambda => parens += 1, + TokenKind::Rpar => parens -= 1, _ => {} }; - let needs_space = (needs_space_main.is_some() && needs_space_main.unwrap()) - || needs_space_aux.is_some() - || prev_end_aux.is_some(); + let needs_space = + needs_space_main == Some(true) || needs_space_aux.is_some() || prev_end_aux.is_some(); if needs_space { if Some(token.start()) != prev_end { - if !(needs_space_main.is_some() && needs_space_main.unwrap()) - && (needs_space_aux.is_none() || !needs_space_aux.unwrap()) - { + if needs_space_main != Some(true) && needs_space_aux != Some(true) { diagnostics.push(( prev_end_aux.unwrap(), MissingWhitespaceAroundOperator.into(), @@ -97,27 +91,27 @@ pub(crate) fn missing_whitespace_around_operator( needs_space_main = Some(false); needs_space_aux = None; prev_end_aux = None; - } else if kind == &Tok::Greater && matches!(prev_type, Some(Tok::Less | Tok::Minus)) { + } else if kind == TokenKind::Greater + && matches!(prev_type, Some(TokenKind::Less | TokenKind::Minus)) + { // Tolerate the "<>" operator, even if running Python 3 // Deal with Python 3's annotated return value "->" - } else if prev_type == Some(&Tok::Slash) - && matches!(kind, Tok::Comma | Tok::Rpar | Tok::Colon) - || (prev_type == Some(&Tok::Rpar) && kind == &Tok::Colon) + } else if prev_type == Some(TokenKind::Slash) + && matches!(kind, TokenKind::Comma | TokenKind::Rpar | TokenKind::Colon) + || (prev_type == Some(TokenKind::Rpar) && kind == TokenKind::Colon) { // Tolerate the "/" operator in function definition // For more info see PEP570 } else { - if (needs_space_main.is_some() && needs_space_main.unwrap()) - || (needs_space_aux.is_some() && needs_space_aux.unwrap()) - { + if needs_space_main == Some(true) || needs_space_aux == Some(true) { diagnostics.push((prev_end.unwrap(), MissingWhitespaceAroundOperator.into())); - } else if prev_type != Some(&Tok::DoubleStar) { - if prev_type == Some(&Tok::Percent) { + } else if prev_type != Some(TokenKind::DoubleStar) { + if prev_type == Some(TokenKind::Percent) { diagnostics.push(( prev_end_aux.unwrap(), MissingWhitespaceAroundModuloOperator.into(), )); - } else if !is_arithmetic_token(prev_type.unwrap()) { + } else if !prev_type.unwrap().is_arithmetic() { diagnostics.push(( prev_end_aux.unwrap(), MissingWhitespaceAroundBitwiseOrShiftOperator.into(), @@ -133,28 +127,30 @@ pub(crate) fn missing_whitespace_around_operator( needs_space_aux = None; prev_end_aux = None; } - } else if (is_op_token(kind) || matches!(kind, Tok::Name { .. })) && prev_end.is_some() { - if kind == &Tok::Equal && parens > 0 { + } else if (kind.is_operator() || matches!(kind, TokenKind::Name)) && prev_end.is_some() { + if kind == TokenKind::Equal && parens > 0 { // Allow keyword args or defaults: foo(bar=None). - } else if is_ws_needed_token(kind) { + } else if kind.is_whitespace_needed() { needs_space_main = Some(true); needs_space_aux = None; prev_end_aux = None; - } else if is_unary_token(kind) { + } else if kind.is_unary() { // Check if the operator is used as a binary operator // Allow unary operators: -123, -x, +1. // Allow argument unpacking: foo(*args, **kwargs) if let Some(prev_type) = prev_type { - if (matches!(prev_type, Tok::Rpar | Tok::Rsqb | Tok::Rbrace)) - || (!is_op_token(prev_type) && !is_keyword_token(prev_type)) - && (!is_soft_keyword_token(prev_type)) + if (matches!( + prev_type, + TokenKind::Rpar | TokenKind::Rsqb | TokenKind::Rbrace + )) || (!prev_type.is_operator() && !prev_type.is_keyword()) + && (!prev_type.is_soft_keyword()) { needs_space_main = None; needs_space_aux = None; prev_end_aux = None; } } - } else if is_ws_optional_token(kind) { + } else if kind.is_whitespace_optional() { needs_space_main = None; needs_space_aux = None; prev_end_aux = None; @@ -163,13 +159,9 @@ pub(crate) fn missing_whitespace_around_operator( if needs_space_main.is_none() { // Surrounding space is optional, but ensure that // trailing space matches opening space - needs_space_main = None; prev_end_aux = prev_end; needs_space_aux = Some(Some(token.start()) != prev_end_aux); - } else if needs_space_main.is_some() - && needs_space_main.unwrap() - && Some(token.start()) == prev_end_aux - { + } else if needs_space_main == Some(true) && Some(token.start()) == prev_end_aux { // A needed opening space was not found diagnostics.push((prev_end.unwrap(), MissingWhitespaceAroundOperator.into())); needs_space_main = Some(false); diff --git a/crates/ruff/src/rules/pycodestyle/rules/logical_lines/mod.rs b/crates/ruff/src/rules/pycodestyle/rules/logical_lines/mod.rs index 6801536965..c9fcb1f327 100644 --- a/crates/ruff/src/rules/pycodestyle/rules/logical_lines/mod.rs +++ b/crates/ruff/src/rules/pycodestyle/rules/logical_lines/mod.rs @@ -1,16 +1,13 @@ use bitflags::bitflags; use rustpython_parser::ast::Location; use rustpython_parser::lexer::LexResult; -use rustpython_parser::Tok; use std::fmt::{Debug, Formatter}; use std::iter::FusedIterator; -use std::ops::Deref; use ruff_python_ast::source_code::Locator; +use ruff_python_ast::token_kind::TokenKind; use ruff_python_ast::types::Range; -use crate::rules::pycodestyle::helpers::{is_keyword_token, is_op_token}; - pub(crate) use extraneous_whitespace::{ extraneous_whitespace, WhitespaceAfterOpenBracket, WhitespaceBeforeCloseBracket, WhitespaceBeforePunctuation, @@ -73,12 +70,15 @@ bitflags! { const KEYWORD = 0b0000_1000; /// Whether the logical line contains a comment. const COMMENT = 0b0001_0000; + + /// Whether the logical line contains any non trivia token (no comment, newline, or in/dedent) + const NON_TRIVIA = 0b0010_0000; } } #[derive(Clone)] pub(crate) struct LogicalLines<'a> { - tokens: Tokens<'a>, + tokens: Tokens, lines: Vec, locator: &'a Locator<'a>, } @@ -91,16 +91,19 @@ impl<'a> LogicalLines<'a> { let mut parens: u32 = 0; for (start, token, end) in tokens.iter().flatten() { - builder.push_token(*start, token, *end); + let token_kind = TokenKind::from_token(token); + builder.push_token(*start, token_kind, *end); - match token { - Tok::Lbrace | Tok::Lpar | Tok::Lsqb => { + match token_kind { + TokenKind::Lbrace | TokenKind::Lpar | TokenKind::Lsqb => { parens += 1; } - Tok::Rbrace | Tok::Rpar | Tok::Rsqb => { + TokenKind::Rbrace | TokenKind::Rpar | TokenKind::Rsqb => { parens -= 1; } - Tok::Newline | Tok::NonLogicalNewline | Tok::Comment { .. } if parens == 0 => { + TokenKind::Newline | TokenKind::NonLogicalNewline | TokenKind::Comment + if parens == 0 => + { builder.finish_line(); } _ => {} @@ -153,7 +156,7 @@ pub(crate) struct LogicalLine<'a> { impl<'a> LogicalLine<'a> { /// Returns `true` if this is a comment only line pub fn is_comment_only(&self) -> bool { - self.flags() == TokenFlags::COMMENT && self.tokens().trimmed().is_empty() + self.flags() == TokenFlags::COMMENT } /// Returns logical line's text including comments, indents, dedent and trailing new lines. @@ -167,15 +170,53 @@ impl<'a> LogicalLine<'a> { self.tokens_trimmed().text() } - #[cfg(test)] pub fn tokens_trimmed(&self) -> LogicalLineTokens<'a> { - self.tokens().trimmed() + let mut front = self.line.tokens_start as usize; + let mut back = self.line.tokens_end as usize; + + let mut kinds = self.lines.tokens.kinds[front..back].iter(); + + for kind in kinds.by_ref() { + if !matches!( + kind, + TokenKind::Newline + | TokenKind::NonLogicalNewline + | TokenKind::Indent + | TokenKind::Dedent + | TokenKind::Comment + ) { + break; + } + front += 1; + } + + for kind in kinds.rev() { + if !matches!( + kind, + TokenKind::Newline + | TokenKind::NonLogicalNewline + | TokenKind::Indent + | TokenKind::Dedent + | TokenKind::Comment + ) { + break; + } + back -= 1; + } + + LogicalLineTokens { + lines: self.lines, + front, + back, + } } /// Returns the text after `token` + #[inline] pub fn text_after(&self, token: &LogicalLineToken<'a>) -> &str { debug_assert!( - (self.line.tokens_start..self.line.tokens_end).contains(&token.position), + (self.line.tokens_start as usize..self.line.tokens_end as usize) + .contains(&token.position), "Token does not belong to this line" ); @@ -187,9 +228,11 @@ impl<'a> LogicalLine<'a> { } /// Returns the text before `token` + #[inline] pub fn text_before(&self, token: &LogicalLineToken<'a>) -> &str { debug_assert!( - (self.line.tokens_start..self.line.tokens_end).contains(&token.position), + (self.line.tokens_start as usize..self.line.tokens_end as usize) + .contains(&token.position), "Token does not belong to this line" ); @@ -214,8 +257,8 @@ impl<'a> LogicalLine<'a> { pub fn tokens(&self) -> LogicalLineTokens<'a> { LogicalLineTokens { lines: self.lines, - front: self.line.tokens_start, - back: self.line.tokens_end, + front: self.line.tokens_start as usize, + back: self.line.tokens_end as usize, } } @@ -284,8 +327,8 @@ impl FusedIterator for LogicalLinesIter<'_> {} /// The tokens of a logical line pub(crate) struct LogicalLineTokens<'a> { lines: &'a LogicalLines<'a>, - front: u32, - back: u32, + front: usize, + back: usize, } impl<'a> LogicalLineTokens<'a> { @@ -297,56 +340,6 @@ impl<'a> LogicalLineTokens<'a> { } } - pub fn len(&self) -> usize { - (self.back - self.front) as usize - } - - pub fn is_empty(&self) -> bool { - self.len() == 0 - } - - pub fn trimmed(&self) -> LogicalLineTokens<'a> { - let tokens = &self.lines.tokens[self.front as usize..self.back as usize]; - let mut front = self.front; - let mut back = self.back; - - let mut iter = tokens.iter(); - - for (_, kind, _) in iter.by_ref() { - if !matches!( - kind, - Tok::Newline - | Tok::NonLogicalNewline - | Tok::Indent - | Tok::Dedent - | Tok::Comment(..) - ) { - break; - } - front += 1; - } - - for (_, kind, _) in iter.rev() { - if !matches!( - kind, - Tok::Newline - | Tok::NonLogicalNewline - | Tok::Indent - | Tok::Dedent - | Tok::Comment(..) - ) { - break; - } - back -= 1; - } - - LogicalLineTokens { - lines: self.lines, - front, - back, - } - } - pub fn text(&self) -> &'a str { match (self.first(), self.last()) { (Some(first), Some(last)) => { @@ -394,9 +387,9 @@ impl Debug for LogicalLineTokens<'_> { /// Iterator over the tokens of a [`LogicalLine`] pub(crate) struct LogicalLineTokensIter<'a> { - tokens: &'a Tokens<'a>, - front: u32, - back: u32, + tokens: &'a Tokens, + front: usize, + back: usize, } impl<'a> Iterator for LogicalLineTokensIter<'a> { @@ -417,7 +410,7 @@ impl<'a> Iterator for LogicalLineTokensIter<'a> { } fn size_hint(&self) -> (usize, Option) { - let len = (self.back - self.front) as usize; + let len = self.back - self.front; (len, Some(len)) } } @@ -443,35 +436,42 @@ impl DoubleEndedIterator for LogicalLineTokensIter<'_> { /// A token of a [`LogicalLine`] #[derive(Clone)] pub(crate) struct LogicalLineToken<'a> { - tokens: &'a Tokens<'a>, - position: u32, + tokens: &'a Tokens, + position: usize, } impl<'a> LogicalLineToken<'a> { /// Returns the token's kind - pub fn kind(&self) -> &'a Tok { + #[inline] + pub fn kind(&self) -> TokenKind { #[allow(unsafe_code)] - let (_, token, _) = unsafe { *self.tokens.get_unchecked(self.position as usize) }; - - token + unsafe { + *self.tokens.kinds.get_unchecked(self.position) + } } /// Returns the token's start location + #[inline] pub fn start(&self) -> Location { - self.range().0 + #[allow(unsafe_code)] + unsafe { + *self.tokens.starts.get_unchecked(self.position) + } } /// Returns the token's end location + #[inline] pub fn end(&self) -> Location { - self.range().1 + #[allow(unsafe_code)] + unsafe { + *self.tokens.ends.get_unchecked(self.position) + } } /// Returns a tuple with the token's `(start, end)` locations + #[inline] pub fn range(&self) -> (Location, Location) { - #[allow(unsafe_code)] - let &(start, _, end) = unsafe { self.tokens.get_unchecked(self.position as usize) }; - - (start, end) + (self.start(), self.end()) } } @@ -547,13 +547,13 @@ struct CurrentLine { /// Builder for [`LogicalLines`] #[derive(Debug, Default)] -struct LogicalLinesBuilder<'a> { - tokens: Tokens<'a>, +struct LogicalLinesBuilder { + tokens: Tokens, lines: Vec, current_line: Option, } -impl<'a> LogicalLinesBuilder<'a> { +impl LogicalLinesBuilder { fn with_capacity(tokens: usize) -> Self { Self { tokens: Tokens::with_capacity(tokens), @@ -563,7 +563,7 @@ impl<'a> LogicalLinesBuilder<'a> { // SAFETY: `LogicalLines::from_tokens` asserts that the file has less than `u32::MAX` tokens and each tokens is at least one character long #[allow(clippy::cast_possible_truncation)] - fn push_token(&mut self, start: Location, token: &'a Tok, end: Location) { + fn push_token(&mut self, start: Location, kind: TokenKind, end: Location) { let tokens_start = self.tokens.len(); let line = self.current_line.get_or_insert_with(|| CurrentLine { @@ -571,27 +571,44 @@ impl<'a> LogicalLinesBuilder<'a> { tokens_start: tokens_start as u32, }); - if matches!(token, Tok::Comment { .. }) { + if matches!(kind, TokenKind::Comment) { line.flags.insert(TokenFlags::COMMENT); - } else if is_op_token(token) { + } else if kind.is_operator() { line.flags.insert(TokenFlags::OPERATOR); line.flags.set( TokenFlags::BRACKET, matches!( - token, - Tok::Lpar | Tok::Lsqb | Tok::Lbrace | Tok::Rpar | Tok::Rsqb | Tok::Rbrace + kind, + TokenKind::Lpar + | TokenKind::Lsqb + | TokenKind::Lbrace + | TokenKind::Rpar + | TokenKind::Rsqb + | TokenKind::Rbrace ), ); } - if matches!(token, Tok::Comma | Tok::Semi | Tok::Colon) { + if matches!(kind, TokenKind::Comma | TokenKind::Semi | TokenKind::Colon) { line.flags.insert(TokenFlags::PUNCTUATION); - } else if is_keyword_token(token) { + } else if kind.is_keyword() { line.flags.insert(TokenFlags::KEYWORD); } - self.tokens.push(token, start, end); + line.flags.set( + TokenFlags::NON_TRIVIA, + !matches!( + kind, + TokenKind::Comment + | TokenKind::Newline + | TokenKind::NonLogicalNewline + | TokenKind::Dedent + | TokenKind::Indent + ), + ); + + self.tokens.push(kind, start, end); } // SAFETY: `LogicalLines::from_tokens` asserts that the file has less than `u32::MAX` tokens and each tokens is at least one character long @@ -606,7 +623,7 @@ impl<'a> LogicalLinesBuilder<'a> { } } - fn finish(mut self, locator: &'a Locator<'a>) -> LogicalLines<'a> { + fn finish<'a>(mut self, locator: &'a Locator<'a>) -> LogicalLines<'a> { self.finish_line(); LogicalLines { @@ -625,29 +642,36 @@ struct Line { } #[derive(Debug, Clone, Default)] -struct Tokens<'a>(Vec<(Location, &'a Tok, Location)>); +struct Tokens { + /// The token kinds + kinds: Vec, -impl<'a> Tokens<'a> { + /// The start locations + starts: Vec, + + /// The end locations + ends: Vec, +} + +impl Tokens { /// Creates new tokens with a reserved size of `capacity` fn with_capacity(capacity: usize) -> Self { - Self(Vec::with_capacity(capacity)) + Self { + kinds: Vec::with_capacity(capacity), + starts: Vec::with_capacity(capacity), + ends: Vec::with_capacity(capacity), + } } /// Returns the number of stored tokens. fn len(&self) -> usize { - self.0.len() + self.kinds.len() } /// Adds a new token with the given `kind` and `start`, `end` location. - fn push(&mut self, kind: &'a Tok, start: Location, end: Location) { - self.0.push((start, kind, end)); - } -} - -impl<'a> Deref for Tokens<'a> { - type Target = [(Location, &'a Tok, Location)]; - - fn deref(&self) -> &Self::Target { - &self.0 + fn push(&mut self, kind: TokenKind, start: Location, end: Location) { + self.kinds.push(kind); + self.starts.push(start); + self.ends.push(end); } } diff --git a/crates/ruff/src/rules/pycodestyle/rules/logical_lines/space_around_operator.rs b/crates/ruff/src/rules/pycodestyle/rules/logical_lines/space_around_operator.rs index 11bfa0d7b7..3bec3d288c 100644 --- a/crates/ruff/src/rules/pycodestyle/rules/logical_lines/space_around_operator.rs +++ b/crates/ruff/src/rules/pycodestyle/rules/logical_lines/space_around_operator.rs @@ -1,10 +1,10 @@ use rustpython_parser::ast::Location; -use rustpython_parser::Tok; use super::{LogicalLine, Whitespace}; use ruff_diagnostics::DiagnosticKind; use ruff_diagnostics::Violation; use ruff_macros::{derive_message_formats, violation}; +use ruff_python_ast::token_kind::TokenKind; /// ## What it does /// Checks for extraneous tabs before an operator. @@ -131,7 +131,7 @@ pub(crate) fn space_around_operator(line: &LogicalLine) -> Vec<(Location, Diagno let is_operator = is_operator_token(token.kind()); if is_operator { - let (start, end) = token.range(); + let start = token.start(); if !after_operator { match line.leading_whitespace(&token) { @@ -148,8 +148,14 @@ pub(crate) fn space_around_operator(line: &LogicalLine) -> Vec<(Location, Diagno } match line.trailing_whitespace(&token) { - Whitespace::Tab => diagnostics.push((end, TabAfterOperator.into())), - Whitespace::Many => diagnostics.push((end, MultipleSpacesAfterOperator.into())), + Whitespace::Tab => { + let end = token.end(); + diagnostics.push((end, TabAfterOperator.into())); + } + Whitespace::Many => { + let end = token.end(); + diagnostics.push((end, MultipleSpacesAfterOperator.into())); + } _ => {} } } @@ -160,39 +166,39 @@ pub(crate) fn space_around_operator(line: &LogicalLine) -> Vec<(Location, Diagno diagnostics } -const fn is_operator_token(token: &Tok) -> bool { +const fn is_operator_token(token: TokenKind) -> bool { matches!( token, - Tok::Plus - | Tok::Minus - | Tok::Star - | Tok::Slash - | Tok::Vbar - | Tok::Amper - | Tok::Less - | Tok::Greater - | Tok::Equal - | Tok::Percent - | Tok::NotEqual - | Tok::LessEqual - | Tok::GreaterEqual - | Tok::CircumFlex - | Tok::LeftShift - | Tok::RightShift - | Tok::DoubleStar - | Tok::PlusEqual - | Tok::MinusEqual - | Tok::StarEqual - | Tok::SlashEqual - | Tok::PercentEqual - | Tok::AmperEqual - | Tok::VbarEqual - | Tok::CircumflexEqual - | Tok::LeftShiftEqual - | Tok::RightShiftEqual - | Tok::DoubleStarEqual - | Tok::DoubleSlash - | Tok::DoubleSlashEqual - | Tok::ColonEqual + TokenKind::Plus + | TokenKind::Minus + | TokenKind::Star + | TokenKind::Slash + | TokenKind::Vbar + | TokenKind::Amper + | TokenKind::Less + | TokenKind::Greater + | TokenKind::Equal + | TokenKind::Percent + | TokenKind::NotEqual + | TokenKind::LessEqual + | TokenKind::GreaterEqual + | TokenKind::CircumFlex + | TokenKind::LeftShift + | TokenKind::RightShift + | TokenKind::DoubleStar + | TokenKind::PlusEqual + | TokenKind::MinusEqual + | TokenKind::StarEqual + | TokenKind::SlashEqual + | TokenKind::PercentEqual + | TokenKind::AmperEqual + | TokenKind::VbarEqual + | TokenKind::CircumflexEqual + | TokenKind::LeftShiftEqual + | TokenKind::RightShiftEqual + | TokenKind::DoubleStarEqual + | TokenKind::DoubleSlash + | TokenKind::DoubleSlashEqual + | TokenKind::ColonEqual ) } diff --git a/crates/ruff/src/rules/pycodestyle/rules/logical_lines/whitespace_around_keywords.rs b/crates/ruff/src/rules/pycodestyle/rules/logical_lines/whitespace_around_keywords.rs index 567c5906cf..b6c64a738b 100644 --- a/crates/ruff/src/rules/pycodestyle/rules/logical_lines/whitespace_around_keywords.rs +++ b/crates/ruff/src/rules/pycodestyle/rules/logical_lines/whitespace_around_keywords.rs @@ -1,7 +1,6 @@ use rustpython_parser::ast::Location; use super::{LogicalLine, Whitespace}; -use crate::rules::pycodestyle::helpers::is_keyword_token; use ruff_diagnostics::DiagnosticKind; use ruff_diagnostics::Violation; use ruff_macros::{derive_message_formats, violation}; @@ -115,32 +114,40 @@ pub(crate) fn whitespace_around_keywords(line: &LogicalLine) -> Vec<(Location, D let mut after_keyword = false; for token in line.tokens() { - let is_keyword = is_keyword_token(token.kind()); - + let is_keyword = token.kind().is_keyword(); if is_keyword { - let (start, end) = token.range(); - if !after_keyword { match line.leading_whitespace(&token) { - (Whitespace::Tab, offset) => diagnostics.push(( - Location::new(start.row(), start.column() - offset), - TabBeforeKeyword.into(), - )), - (Whitespace::Many, offset) => diagnostics.push(( - Location::new(start.row(), start.column() - offset), - MultipleSpacesBeforeKeyword.into(), - )), + (Whitespace::Tab, offset) => { + let start = token.start(); + diagnostics.push(( + Location::new(start.row(), start.column() - offset), + TabBeforeKeyword.into(), + )); + } + (Whitespace::Many, offset) => { + let start = token.start(); + diagnostics.push(( + Location::new(start.row(), start.column() - offset), + MultipleSpacesBeforeKeyword.into(), + )); + } _ => {} } } match line.trailing_whitespace(&token) { - Whitespace::Tab => diagnostics.push((end, TabAfterKeyword.into())), - Whitespace::Many => diagnostics.push((end, MultipleSpacesAfterKeyword.into())), + Whitespace::Tab => { + let end = token.end(); + diagnostics.push((end, TabAfterKeyword.into())); + } + Whitespace::Many => { + let end = token.end(); + diagnostics.push((end, MultipleSpacesAfterKeyword.into())); + } _ => {} } } - after_keyword = is_keyword; } diff --git a/crates/ruff/src/rules/pycodestyle/rules/logical_lines/whitespace_around_named_parameter_equals.rs b/crates/ruff/src/rules/pycodestyle/rules/logical_lines/whitespace_around_named_parameter_equals.rs index 95f8938e22..1aa3619cc6 100644 --- a/crates/ruff/src/rules/pycodestyle/rules/logical_lines/whitespace_around_named_parameter_equals.rs +++ b/crates/ruff/src/rules/pycodestyle/rules/logical_lines/whitespace_around_named_parameter_equals.rs @@ -1,12 +1,10 @@ -use rustpython_parser::ast::Location; -use rustpython_parser::Tok; - use ruff_diagnostics::DiagnosticKind; use ruff_diagnostics::Violation; use ruff_macros::{derive_message_formats, violation}; +use ruff_python_ast::token_kind::TokenKind; +use rustpython_parser::ast::Location; use super::LogicalLineTokens; -use crate::rules::pycodestyle::helpers::is_op_token; #[violation] pub struct UnexpectedSpacesAroundKeywordParameterEquals; @@ -31,8 +29,8 @@ impl Violation for MissingWhitespaceAroundParameterEquals { fn is_in_def(tokens: &LogicalLineTokens) -> bool { for token in tokens { match token.kind() { - Tok::Async | Tok::Indent | Tok::Dedent => continue, - Tok::Def => return true, + TokenKind::Async | TokenKind::Indent | TokenKind::Dedent => continue, + TokenKind::Def => return true, _ => return false, } } @@ -45,76 +43,86 @@ pub(crate) fn whitespace_around_named_parameter_equals( tokens: &LogicalLineTokens, ) -> Vec<(Location, DiagnosticKind)> { let mut diagnostics = vec![]; - let mut parens = 0; - let mut require_space = false; - let mut no_space = false; + let mut parens = 0u32; let mut annotated_func_arg = false; let mut prev_end: Option = None; let in_def = is_in_def(tokens); + let mut iter = tokens.iter().peekable(); - for token in tokens { + while let Some(token) = iter.next() { let kind = token.kind(); - if kind == &Tok::NonLogicalNewline { + if kind == TokenKind::NonLogicalNewline { continue; } - if no_space { - no_space = false; - if Some(token.start()) != prev_end { - diagnostics.push(( - prev_end.unwrap(), - UnexpectedSpacesAroundKeywordParameterEquals.into(), - )); - } - } - if require_space { - require_space = false; - let start = token.start(); - if Some(start) == prev_end { - diagnostics.push((start, MissingWhitespaceAroundParameterEquals.into())); - } - } - if is_op_token(kind) { - match kind { - Tok::Lpar | Tok::Lsqb => { - parens += 1; - } - Tok::Rpar | Tok::Rsqb => { - parens -= 1; - } - Tok::Colon if parens == 1 && in_def => { - annotated_func_arg = true; - } - Tok::Comma if parens == 1 => { + match kind { + TokenKind::Lpar | TokenKind::Lsqb => { + parens += 1; + } + TokenKind::Rpar | TokenKind::Rsqb => { + parens -= 1; + + if parens == 0 { annotated_func_arg = false; } - Tok::Equal if parens > 0 => { - if annotated_func_arg && parens == 1 { - require_space = true; - let start = token.start(); - if Some(start) == prev_end { - diagnostics - .push((start, MissingWhitespaceAroundParameterEquals.into())); + } + + TokenKind::Colon if parens == 1 && in_def => { + annotated_func_arg = true; + } + TokenKind::Comma if parens == 1 => { + annotated_func_arg = false; + } + TokenKind::Equal if parens > 0 => { + if annotated_func_arg && parens == 1 { + let start = token.start(); + if Some(start) == prev_end { + diagnostics.push((start, MissingWhitespaceAroundParameterEquals.into())); + } + + while let Some(next) = iter.peek() { + if next.kind() == TokenKind::NonLogicalNewline { + iter.next(); + } else { + let next_start = next.start(); + + if next_start == token.end() { + diagnostics.push(( + next_start, + MissingWhitespaceAroundParameterEquals.into(), + )); + } + break; } - } else { - no_space = true; - if Some(token.start()) != prev_end { - diagnostics.push(( - prev_end.unwrap(), - UnexpectedSpacesAroundKeywordParameterEquals.into(), - )); + } + } else { + if Some(token.start()) != prev_end { + diagnostics.push(( + prev_end.unwrap(), + UnexpectedSpacesAroundKeywordParameterEquals.into(), + )); + } + + while let Some(next) = iter.peek() { + if next.kind() == TokenKind::NonLogicalNewline { + iter.next(); + } else { + if next.start() != token.end() { + diagnostics.push(( + token.end(), + UnexpectedSpacesAroundKeywordParameterEquals.into(), + )); + } + break; } } } - _ => {} - } - - if parens < 1 { - annotated_func_arg = false; } + _ => {} } + prev_end = Some(token.end()); } diagnostics diff --git a/crates/ruff/src/rules/pycodestyle/rules/logical_lines/whitespace_before_comment.rs b/crates/ruff/src/rules/pycodestyle/rules/logical_lines/whitespace_before_comment.rs index 8ab2bedc88..ec45cfd689 100644 --- a/crates/ruff/src/rules/pycodestyle/rules/logical_lines/whitespace_before_comment.rs +++ b/crates/ruff/src/rules/pycodestyle/rules/logical_lines/whitespace_before_comment.rs @@ -1,12 +1,11 @@ -use rustpython_parser::ast::Location; -use rustpython_parser::Tok; - use super::LogicalLineTokens; use ruff_diagnostics::DiagnosticKind; use ruff_diagnostics::Violation; use ruff_macros::{derive_message_formats, violation}; use ruff_python_ast::source_code::Locator; +use ruff_python_ast::token_kind::TokenKind; use ruff_python_ast::types::Range; +use rustpython_parser::ast::Location; /// ## What it does /// Checks if inline comments are separated by at least two spaces. @@ -147,7 +146,7 @@ pub(crate) fn whitespace_before_comment( for token in tokens { let kind = token.kind(); - if let Tok::Comment { .. } = kind { + if let TokenKind::Comment = kind { let (start, end) = token.range(); let line = locator.slice(Range::new( Location::new(start.row(), 0), @@ -194,7 +193,7 @@ pub(crate) fn whitespace_before_comment( } } } - } else if !matches!(kind, Tok::NonLogicalNewline) { + } else if !matches!(kind, TokenKind::NonLogicalNewline) { prev_end = token.end(); } } diff --git a/crates/ruff/src/rules/pycodestyle/rules/logical_lines/whitespace_before_parameters.rs b/crates/ruff/src/rules/pycodestyle/rules/logical_lines/whitespace_before_parameters.rs index 10a5a3ab65..a9c87d7651 100644 --- a/crates/ruff/src/rules/pycodestyle/rules/logical_lines/whitespace_before_parameters.rs +++ b/crates/ruff/src/rules/pycodestyle/rules/logical_lines/whitespace_before_parameters.rs @@ -1,27 +1,36 @@ -use rustpython_parser::ast::Location; -use rustpython_parser::Tok; - use ruff_diagnostics::{AlwaysAutofixableViolation, Diagnostic, Edit}; use ruff_macros::{derive_message_formats, violation}; +use ruff_python_ast::token_kind::TokenKind; use ruff_python_ast::types::Range; +use rustpython_parser::ast::Location; use super::LogicalLineTokens; #[violation] pub struct WhitespaceBeforeParameters { - pub bracket: String, + pub bracket: TokenKind, +} + +impl WhitespaceBeforeParameters { + fn bracket_text(&self) -> char { + match self.bracket { + TokenKind::Lpar => '(', + TokenKind::Lsqb => '[', + _ => unreachable!(), + } + } } impl AlwaysAutofixableViolation for WhitespaceBeforeParameters { #[derive_message_formats] fn message(&self) -> String { - let WhitespaceBeforeParameters { bracket } = self; - format!("Whitespace before {bracket}") + let bracket = self.bracket_text(); + format!("Whitespace before '{bracket}'") } fn autofix_title(&self) -> String { - let WhitespaceBeforeParameters { bracket } = self; - format!("Removed whitespace before {bracket}") + let bracket = self.bracket_text(); + format!("Removed whitespace before '{bracket}'") } } @@ -33,28 +42,26 @@ pub(crate) fn whitespace_before_parameters( let mut diagnostics = vec![]; let previous = tokens.first().unwrap(); - let mut pre_pre_kind: Option<&Tok> = None; + let mut pre_pre_kind: Option = None; let mut prev_token = previous.kind(); let mut prev_end = previous.end(); for token in tokens { let kind = token.kind(); - if matches!(kind, Tok::Lpar | Tok::Lsqb) - && token.start() != prev_end + if matches!(kind, TokenKind::Lpar | TokenKind::Lsqb) && matches!( prev_token, - Tok::Name { .. } | Tok::Rpar | Tok::Rsqb | Tok::Rbrace + TokenKind::Name | TokenKind::Rpar | TokenKind::Rsqb | TokenKind::Rbrace ) - && (pre_pre_kind != Some(&Tok::Class)) + && (pre_pre_kind != Some(TokenKind::Class)) + && token.start() != prev_end { let start = Location::new(prev_end.row(), prev_end.column()); let end = token.end(); let end = Location::new(end.row(), end.column() - 1); - let kind: WhitespaceBeforeParameters = WhitespaceBeforeParameters { - bracket: kind.to_string(), - }; + let kind: WhitespaceBeforeParameters = WhitespaceBeforeParameters { bracket: kind }; let mut diagnostic = Diagnostic::new(kind, Range::new(start, end)); diff --git a/crates/ruff_python_ast/src/lib.rs b/crates/ruff_python_ast/src/lib.rs index afb3a92558..872547620f 100644 --- a/crates/ruff_python_ast/src/lib.rs +++ b/crates/ruff_python_ast/src/lib.rs @@ -12,6 +12,7 @@ pub mod relocate; pub mod scope; pub mod source_code; pub mod str; +pub mod token_kind; pub mod types; pub mod typing; pub mod visibility; diff --git a/crates/ruff_python_ast/src/token_kind.rs b/crates/ruff_python_ast/src/token_kind.rs new file mode 100644 index 0000000000..74758dd202 --- /dev/null +++ b/crates/ruff_python_ast/src/token_kind.rs @@ -0,0 +1,455 @@ +use rustpython_parser::Tok; + +#[derive(Copy, Clone, PartialEq, Eq, Hash, Debug)] +pub enum TokenKind { + /// Token value for a name, commonly known as an identifier. + Name, + /// Token value for an integer. + Int, + /// Token value for a floating point number. + Float, + /// Token value for a complex number. + Complex, + /// Token value for a string. + String, + /// Token value for a comment. These are filtered out of the token stream prior to parsing. + Comment, + /// Token value for a newline. + Newline, + /// Token value for a newline that is not a logical line break. These are filtered out of + /// the token stream prior to parsing. + NonLogicalNewline, + /// Token value for an indent. + Indent, + /// Token value for a dedent. + Dedent, + EndOfFile, + /// Token value for a left parenthesis `(`. + Lpar, + /// Token value for a right parenthesis `)`. + Rpar, + /// Token value for a left square bracket `[`. + Lsqb, + /// Token value for a right square bracket `]`. + Rsqb, + /// Token value for a colon `:`. + Colon, + /// Token value for a comma `,`. + Comma, + /// Token value for a semicolon `;`. + Semi, + /// Token value for plus `+`. + Plus, + /// Token value for minus `-`. + Minus, + /// Token value for star `*`. + Star, + /// Token value for slash `/`. + Slash, + /// Token value for vertical bar `|`. + Vbar, + /// Token value for ampersand `&`. + Amper, + /// Token value for less than `<`. + Less, + /// Token value for greater than `>`. + Greater, + /// Token value for equal `=`. + Equal, + /// Token value for dot `.`. + Dot, + /// Token value for percent `%`. + Percent, + /// Token value for left bracket `{`. + Lbrace, + /// Token value for right bracket `}`. + Rbrace, + /// Token value for double equal `==`. + EqEqual, + /// Token value for not equal `!=`. + NotEqual, + /// Token value for less than or equal `<=`. + LessEqual, + /// Token value for greater than or equal `>=`. + GreaterEqual, + /// Token value for tilde `~`. + Tilde, + /// Token value for caret `^`. + CircumFlex, + /// Token value for left shift `<<`. + LeftShift, + /// Token value for right shift `>>`. + RightShift, + /// Token value for double star `**`. + DoubleStar, + /// Token value for double star equal `**=`. + DoubleStarEqual, + /// Token value for plus equal `+=`. + PlusEqual, + /// Token value for minus equal `-=`. + MinusEqual, + /// Token value for star equal `*=`. + StarEqual, + /// Token value for slash equal `/=`. + SlashEqual, + /// Token value for percent equal `%=`. + PercentEqual, + /// Token value for ampersand equal `&=`. + AmperEqual, + /// Token value for vertical bar equal `|=`. + VbarEqual, + /// Token value for caret equal `^=`. + CircumflexEqual, + /// Token value for left shift equal `<<=`. + LeftShiftEqual, + /// Token value for right shift equal `>>=`. + RightShiftEqual, + /// Token value for double slash `//`. + DoubleSlash, + /// Token value for double slash equal `//=`. + DoubleSlashEqual, + /// Token value for colon equal `:=`. + ColonEqual, + /// Token value for at `@`. + At, + /// Token value for at equal `@=`. + AtEqual, + /// Token value for arrow `->`. + Rarrow, + /// Token value for ellipsis `...`. + Ellipsis, + + // Self documenting. + // Keywords (alphabetically): + False, + None, + True, + + And, + As, + Assert, + Async, + Await, + Break, + Class, + Continue, + Def, + Del, + Elif, + Else, + Except, + Finally, + For, + From, + Global, + If, + Import, + In, + Is, + Lambda, + Nonlocal, + Not, + Or, + Pass, + Raise, + Return, + Try, + While, + Match, + Case, + With, + Yield, + + // RustPython specific. + StartModule, + StartInteractive, + StartExpression, +} + +impl TokenKind { + pub const fn is_whitespace_needed(&self) -> bool { + matches!( + self, + TokenKind::DoubleStarEqual + | TokenKind::StarEqual + | TokenKind::SlashEqual + | TokenKind::DoubleSlashEqual + | TokenKind::PlusEqual + | TokenKind::MinusEqual + | TokenKind::NotEqual + | TokenKind::Less + | TokenKind::Greater + | TokenKind::PercentEqual + | TokenKind::CircumflexEqual + | TokenKind::AmperEqual + | TokenKind::VbarEqual + | TokenKind::EqEqual + | TokenKind::LessEqual + | TokenKind::GreaterEqual + | TokenKind::LeftShiftEqual + | TokenKind::RightShiftEqual + | TokenKind::Equal + | TokenKind::And + | TokenKind::Or + | TokenKind::In + | TokenKind::Is + | TokenKind::Rarrow + ) + } + + pub const fn is_whitespace_optional(&self) -> bool { + self.is_arithmetic() + || matches!( + self, + TokenKind::CircumFlex + | TokenKind::Amper + | TokenKind::Vbar + | TokenKind::LeftShift + | TokenKind::RightShift + | TokenKind::Percent + ) + } + + pub const fn is_unary(&self) -> bool { + matches!( + self, + TokenKind::Plus + | TokenKind::Minus + | TokenKind::Star + | TokenKind::DoubleStar + | TokenKind::RightShift + ) + } + + pub const fn is_keyword(&self) -> bool { + matches!( + self, + TokenKind::False + | TokenKind::True + | TokenKind::None + | TokenKind::And + | TokenKind::As + | TokenKind::Assert + | TokenKind::Await + | TokenKind::Break + | TokenKind::Class + | TokenKind::Continue + | TokenKind::Def + | TokenKind::Del + | TokenKind::Elif + | TokenKind::Else + | TokenKind::Except + | TokenKind::Finally + | TokenKind::For + | TokenKind::From + | TokenKind::Global + | TokenKind::If + | TokenKind::Import + | TokenKind::In + | TokenKind::Is + | TokenKind::Lambda + | TokenKind::Nonlocal + | TokenKind::Not + | TokenKind::Or + | TokenKind::Pass + | TokenKind::Raise + | TokenKind::Return + | TokenKind::Try + | TokenKind::While + | TokenKind::With + | TokenKind::Yield + ) + } + + pub const fn is_operator(&self) -> bool { + matches!( + self, + TokenKind::Lpar + | TokenKind::Rpar + | TokenKind::Lsqb + | TokenKind::Rsqb + | TokenKind::Comma + | TokenKind::Semi + | TokenKind::Plus + | TokenKind::Minus + | TokenKind::Star + | TokenKind::Slash + | TokenKind::Vbar + | TokenKind::Amper + | TokenKind::Less + | TokenKind::Greater + | TokenKind::Equal + | TokenKind::Dot + | TokenKind::Percent + | TokenKind::Lbrace + | TokenKind::Rbrace + | TokenKind::NotEqual + | TokenKind::LessEqual + | TokenKind::GreaterEqual + | TokenKind::Tilde + | TokenKind::CircumFlex + | TokenKind::LeftShift + | TokenKind::RightShift + | TokenKind::DoubleStar + | TokenKind::PlusEqual + | TokenKind::MinusEqual + | TokenKind::StarEqual + | TokenKind::SlashEqual + | TokenKind::PercentEqual + | TokenKind::AmperEqual + | TokenKind::VbarEqual + | TokenKind::CircumflexEqual + | TokenKind::LeftShiftEqual + | TokenKind::RightShiftEqual + | TokenKind::DoubleStarEqual + | TokenKind::DoubleSlash + | TokenKind::DoubleSlashEqual + | TokenKind::At + | TokenKind::AtEqual + | TokenKind::Rarrow + | TokenKind::Ellipsis + | TokenKind::ColonEqual + | TokenKind::Colon + ) + } + + pub const fn is_singleton(&self) -> bool { + matches!(self, TokenKind::False | TokenKind::True | TokenKind::None) + } + + pub const fn is_skip_comment(&self) -> bool { + matches!( + self, + TokenKind::Newline + | TokenKind::Indent + | TokenKind::Dedent + | TokenKind::NonLogicalNewline + | TokenKind::Comment + ) + } + + pub const fn is_arithmetic(&self) -> bool { + matches!( + self, + TokenKind::DoubleStar + | TokenKind::Star + | TokenKind::Plus + | TokenKind::Minus + | TokenKind::Slash + | TokenKind::At + ) + } + + pub const fn is_soft_keyword(&self) -> bool { + matches!(self, TokenKind::Match | TokenKind::Case) + } + + pub const fn from_token(token: &Tok) -> Self { + match token { + Tok::Name { .. } => TokenKind::Name, + Tok::Int { .. } => TokenKind::Int, + Tok::Float { .. } => TokenKind::Float, + Tok::Complex { .. } => TokenKind::Complex, + Tok::String { .. } => TokenKind::String, + Tok::Comment(_) => TokenKind::Comment, + Tok::Newline => TokenKind::Newline, + Tok::NonLogicalNewline => TokenKind::NonLogicalNewline, + Tok::Indent => TokenKind::Indent, + Tok::Dedent => TokenKind::Dedent, + Tok::EndOfFile => TokenKind::EndOfFile, + Tok::Lpar => TokenKind::Lpar, + Tok::Rpar => TokenKind::Rpar, + Tok::Lsqb => TokenKind::Lsqb, + Tok::Rsqb => TokenKind::Rsqb, + Tok::Colon => TokenKind::Colon, + Tok::Comma => TokenKind::Comma, + Tok::Semi => TokenKind::Semi, + Tok::Plus => TokenKind::Plus, + Tok::Minus => TokenKind::Minus, + Tok::Star => TokenKind::Star, + Tok::Slash => TokenKind::Slash, + Tok::Vbar => TokenKind::Vbar, + Tok::Amper => TokenKind::Amper, + Tok::Less => TokenKind::Less, + Tok::Greater => TokenKind::Greater, + Tok::Equal => TokenKind::Equal, + Tok::Dot => TokenKind::Dot, + Tok::Percent => TokenKind::Percent, + Tok::Lbrace => TokenKind::Lbrace, + Tok::Rbrace => TokenKind::Rbrace, + Tok::EqEqual => TokenKind::EqEqual, + Tok::NotEqual => TokenKind::NotEqual, + Tok::LessEqual => TokenKind::LessEqual, + Tok::GreaterEqual => TokenKind::GreaterEqual, + Tok::Tilde => TokenKind::Tilde, + Tok::CircumFlex => TokenKind::CircumFlex, + Tok::LeftShift => TokenKind::LeftShift, + Tok::RightShift => TokenKind::RightShift, + Tok::DoubleStar => TokenKind::DoubleStar, + Tok::DoubleStarEqual => TokenKind::DoubleStarEqual, + Tok::PlusEqual => TokenKind::PlusEqual, + Tok::MinusEqual => TokenKind::MinusEqual, + Tok::StarEqual => TokenKind::StarEqual, + Tok::SlashEqual => TokenKind::SlashEqual, + Tok::PercentEqual => TokenKind::PercentEqual, + Tok::AmperEqual => TokenKind::AmperEqual, + Tok::VbarEqual => TokenKind::VbarEqual, + Tok::CircumflexEqual => TokenKind::CircumflexEqual, + Tok::LeftShiftEqual => TokenKind::LeftShiftEqual, + Tok::RightShiftEqual => TokenKind::RightShiftEqual, + Tok::DoubleSlash => TokenKind::DoubleSlash, + Tok::DoubleSlashEqual => TokenKind::DoubleSlashEqual, + Tok::ColonEqual => TokenKind::ColonEqual, + Tok::At => TokenKind::At, + Tok::AtEqual => TokenKind::AtEqual, + Tok::Rarrow => TokenKind::Rarrow, + Tok::Ellipsis => TokenKind::Ellipsis, + Tok::False => TokenKind::False, + Tok::None => TokenKind::None, + Tok::True => TokenKind::True, + Tok::And => TokenKind::And, + Tok::As => TokenKind::As, + Tok::Assert => TokenKind::Assert, + Tok::Async => TokenKind::Async, + Tok::Await => TokenKind::Await, + Tok::Break => TokenKind::Break, + Tok::Class => TokenKind::Class, + Tok::Continue => TokenKind::Continue, + Tok::Def => TokenKind::Def, + Tok::Del => TokenKind::Del, + Tok::Elif => TokenKind::Elif, + Tok::Else => TokenKind::Else, + Tok::Except => TokenKind::Except, + Tok::Finally => TokenKind::Finally, + Tok::For => TokenKind::For, + Tok::From => TokenKind::From, + Tok::Global => TokenKind::Global, + Tok::If => TokenKind::If, + Tok::Import => TokenKind::Import, + Tok::In => TokenKind::In, + Tok::Is => TokenKind::Is, + Tok::Lambda => TokenKind::Lambda, + Tok::Nonlocal => TokenKind::Nonlocal, + Tok::Not => TokenKind::Not, + Tok::Or => TokenKind::Or, + Tok::Pass => TokenKind::Pass, + Tok::Raise => TokenKind::Raise, + Tok::Return => TokenKind::Return, + Tok::Try => TokenKind::Try, + Tok::While => TokenKind::While, + Tok::Match => TokenKind::Match, + Tok::Case => TokenKind::Case, + Tok::With => TokenKind::With, + Tok::Yield => TokenKind::Yield, + Tok::StartModule => TokenKind::StartModule, + Tok::StartInteractive => TokenKind::StartInteractive, + Tok::StartExpression => TokenKind::StartExpression, + } + } +} + +impl From<&Tok> for TokenKind { + fn from(value: &Tok) -> Self { + Self::from_token(value) + } +}