Compare commits

...

6 Commits

Author SHA1 Message Date
Amethyst Reese
5b1d172906 Use tokens split_at in range suppression loading 2026-01-13 14:04:01 -08:00
Amethyst Reese
6c01f1a1e8 Clear indents in outer loop 2026-01-13 14:02:10 -08:00
Amethyst Reese
a457967e8f Add split_at method to Tokens 2026-01-13 11:16:38 -08:00
Amethyst Reese
c62b57d952 remove old code 2026-01-13 10:21:28 -08:00
Amethyst Reese
202987f488 clippy 2026-01-13 10:21:27 -08:00
Amethyst Reese
17b5e4292b Skip walking all tokens when loading range suppressions
Adapted from https://github.com/astral-sh/ruff/pull/21441#pullrequestreview-3503773083
2026-01-13 10:21:27 -08:00
7 changed files with 196 additions and 75 deletions

View File

@@ -405,7 +405,8 @@ pub fn add_noqa_to_path(
);
// Parse range suppression comments
let suppressions = Suppressions::from_tokens(settings, locator.contents(), parsed.tokens());
let suppressions =
Suppressions::from_tokens(settings, locator.contents(), parsed.tokens(), &indexer);
// Generate diagnostics, ignoring any existing `noqa` directives.
let diagnostics = check_path(
@@ -479,7 +480,8 @@ pub fn lint_only(
);
// Parse range suppression comments
let suppressions = Suppressions::from_tokens(settings, locator.contents(), parsed.tokens());
let suppressions =
Suppressions::from_tokens(settings, locator.contents(), parsed.tokens(), &indexer);
// Generate diagnostics.
let diagnostics = check_path(
@@ -596,7 +598,8 @@ pub fn lint_fix<'a>(
);
// Parse range suppression comments
let suppressions = Suppressions::from_tokens(settings, locator.contents(), parsed.tokens());
let suppressions =
Suppressions::from_tokens(settings, locator.contents(), parsed.tokens(), &indexer);
// Generate diagnostics.
let diagnostics = check_path(
@@ -978,7 +981,8 @@ mod tests {
&locator,
&indexer,
);
let suppressions = Suppressions::from_tokens(settings, locator.contents(), parsed.tokens());
let suppressions =
Suppressions::from_tokens(settings, locator.contents(), parsed.tokens(), &indexer);
let mut diagnostics = check_path(
path,
None,

View File

@@ -957,7 +957,7 @@ mod tests {
&indexer,
);
let suppressions =
Suppressions::from_tokens(&settings, locator.contents(), parsed.tokens());
Suppressions::from_tokens(&settings, locator.contents(), parsed.tokens(), &indexer);
let mut messages = check_path(
Path::new("<filename>"),
None,

View File

@@ -3,13 +3,13 @@ use core::fmt;
use ruff_db::diagnostic::Diagnostic;
use ruff_diagnostics::{Edit, Fix};
use ruff_python_ast::token::{TokenKind, Tokens};
use ruff_python_ast::whitespace::indentation;
use ruff_python_index::Indexer;
use rustc_hash::FxHashSet;
use std::cell::Cell;
use std::{error::Error, fmt::Formatter};
use thiserror::Error;
use ruff_python_trivia::Cursor;
use ruff_python_trivia::{Cursor, indentation_at_offset};
use ruff_text_size::{Ranged, TextLen, TextRange, TextSize, TextSlice};
use smallvec::{SmallVec, smallvec};
@@ -153,10 +153,15 @@ pub struct Suppressions {
}
impl Suppressions {
pub fn from_tokens(settings: &LinterSettings, source: &str, tokens: &Tokens) -> Suppressions {
pub fn from_tokens(
settings: &LinterSettings,
source: &str,
tokens: &Tokens,
indexer: &Indexer,
) -> Suppressions {
if is_range_suppressions_enabled(settings) {
let builder = SuppressionsBuilder::new(source);
builder.load_from_tokens(tokens)
builder.load_from_tokens(tokens, indexer)
} else {
Suppressions::default()
}
@@ -355,7 +360,6 @@ pub(crate) struct SuppressionsBuilder<'a> {
valid: Vec<Suppression>,
invalid: Vec<InvalidSuppression>,
errors: Vec<ParseError>,
pending: Vec<PendingSuppressionComment<'a>>,
}
@@ -368,74 +372,110 @@ impl<'a> SuppressionsBuilder<'a> {
}
}
pub(crate) fn load_from_tokens(mut self, tokens: &Tokens) -> Suppressions {
let default_indent = "";
pub(crate) fn load_from_tokens(mut self, tokens: &Tokens, indexer: &Indexer) -> Suppressions {
let mut indents: Vec<&str> = vec![];
let mut errors = Vec::new();
// Iterate through tokens, tracking indentation, filtering trailing comments, and then
// looking for matching comments from the previous block when reaching a dedent token.
for (token_index, token) in tokens.iter().enumerate() {
match token.kind() {
TokenKind::Indent => {
indents.push(self.source.slice(token));
}
TokenKind::Dedent => {
self.match_comments(indents.last().copied().unwrap_or_default(), token.range());
indents.pop();
}
TokenKind::Comment => {
let mut parser = SuppressionParser::new(self.source, token.range());
match parser.parse_comment() {
Ok(comment) => {
let indent = indentation(self.source, &comment.range);
let Some(indent) = indent else {
// trailing suppressions are not supported
self.invalid.push(InvalidSuppression {
kind: InvalidSuppressionKind::Trailing,
comment,
});
continue;
};
// comment matches current block's indentation, or precedes an indent/dedent token
if indent == indents.last().copied().unwrap_or_default()
|| tokens[token_index..]
.iter()
.find(|t| !t.kind().is_trivia())
.is_some_and(|t| {
matches!(t.kind(), TokenKind::Dedent | TokenKind::Indent)
})
{
self.pending
.push(PendingSuppressionComment { indent, comment });
} else {
// weirdly indented? ¯\_(ツ)_/¯
self.invalid.push(InvalidSuppression {
kind: InvalidSuppressionKind::Indentation,
comment,
});
}
}
Err(ParseError {
kind: ParseErrorKind::NotASuppression,
..
}) => {}
Err(error) => {
self.errors.push(error);
}
let mut suppressions = indexer
.comment_ranges()
.iter()
.copied()
.filter_map(|comment_range| {
let mut parser = SuppressionParser::new(self.source, comment_range);
match parser.parse_comment() {
Ok(comment) => Some(comment),
Err(ParseError {
kind: ParseErrorKind::NotASuppression,
..
}) => None,
Err(error) => {
errors.push(error);
None
}
}
_ => {}
}
}
})
.peekable();
self.match_comments(default_indent, TextRange::up_to(self.source.text_len()));
'comments: while let Some(suppression) = suppressions.peek() {
indents.clear();
let (before, after) = tokens.split_at(suppression.range.start());
let last_indent = before
.iter()
.rfind(|token| token.kind() == TokenKind::Indent)
.map(|token| self.source.slice(token))
.unwrap_or_default();
indents.push(last_indent);
// Iterate through tokens, tracking indentation, filtering trailing comments, and then
// looking for matching comments from the previous block when reaching a dedent token.
for (token_index, token) in after.iter().enumerate() {
let current_indent = indents.last().copied().unwrap_or_default();
match token.kind() {
TokenKind::Indent => {
indents.push(self.source.slice(token));
}
TokenKind::Dedent => {
self.match_comments(current_indent, token.range());
indents.pop();
if indents.is_empty() {
// OR if self.pending is empty?
continue 'comments;
}
}
TokenKind::Comment => {
let Some(suppression) =
suppressions.next_if(|suppression| suppression.range == token.range())
else {
continue;
};
let Some(indent) =
indentation_at_offset(suppression.range.start(), self.source)
else {
// trailing suppressions are not supported
self.invalid.push(InvalidSuppression {
kind: InvalidSuppressionKind::Trailing,
comment: suppression,
});
continue;
};
// comment matches current block's indentation, or precedes an indent/dedent token
if indent == current_indent
|| tokens[token_index..]
.iter()
.find(|t| !t.kind().is_trivia())
.is_some_and(|t| {
t.kind() == TokenKind::Dedent || t.kind() == TokenKind::Indent
})
{
self.pending.push(PendingSuppressionComment {
indent,
comment: suppression,
});
} else {
// weirdly indented? ¯\_(ツ)_/¯
self.invalid.push(InvalidSuppression {
kind: InvalidSuppressionKind::Indentation,
comment: suppression,
});
}
}
_ => {}
}
}
self.match_comments("", TextRange::up_to(self.source.text_len()));
}
Suppressions {
valid: self.valid,
invalid: self.invalid,
errors: self.errors,
errors,
}
}
@@ -691,6 +731,7 @@ mod tests {
use insta::assert_debug_snapshot;
use itertools::Itertools;
use ruff_python_index::Indexer;
use ruff_python_parser::{Mode, ParseOptions, parse};
use ruff_text_size::{TextLen, TextRange, TextSize};
use similar::DiffableStr;
@@ -1585,10 +1626,12 @@ def bar():
/// Parse all suppressions and errors in a module for testing
fn debug(source: &'_ str) -> DebugSuppressions<'_> {
let parsed = parse(source, ParseOptions::from(Mode::Module)).unwrap();
let indexer = Indexer::from_tokens(parsed.tokens(), source);
let suppressions = Suppressions::from_tokens(
&LinterSettings::default().with_preview_mode(),
source,
parsed.tokens(),
&indexer,
);
DebugSuppressions {
source,

View File

@@ -235,7 +235,8 @@ pub(crate) fn test_contents<'a>(
&locator,
&indexer,
);
let suppressions = Suppressions::from_tokens(settings, locator.contents(), parsed.tokens());
let suppressions =
Suppressions::from_tokens(settings, locator.contents(), parsed.tokens(), &indexer);
let messages = check_path(
path,
path.parent()
@@ -303,7 +304,7 @@ pub(crate) fn test_contents<'a>(
);
let suppressions =
Suppressions::from_tokens(settings, locator.contents(), parsed.tokens());
Suppressions::from_tokens(settings, locator.contents(), parsed.tokens(), &indexer);
let fixed_messages = check_path(
path,
None,

View File

@@ -185,6 +185,31 @@ impl Tokens {
after
}
/// Returns a pair of token slices from both before and after the given [`TextSize`] offset.
///
/// If the given offset is between two tokens, the "before" slice will end just before the
/// following token. In other words, if the offset is between the end of previous token and
/// start of next token, the "before" slice will end just before the next token. The "after"
/// slice will contain the rest of the tokens.
///
/// # Panics
///
/// If the given offset is inside a token range at any point
/// other than the start of the range.
pub fn split_at(&self, offset: TextSize) -> (&[Token], &[Token]) {
let partition_point = self.partition_point(|token| token.start() < offset);
let before = &self.raw[..partition_point];
let after = &self.raw[partition_point..];
if let Some(last) = before.last() {
assert!(
offset >= last.end(),
"Offset {offset:?} is inside token `{last:?}`"
);
}
(before, after)
}
}
impl<'a> IntoIterator for &'a Tokens {
@@ -513,4 +538,44 @@ mod tests {
let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter());
tokens.in_range(TextRange::new(0.into(), 6.into()));
}
#[test]
fn tokens_split_at_first_token_start() {
let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter());
let (before, after) = tokens.split_at(TextSize::new(0));
assert_eq!(before.len(), 0);
assert_eq!(after.len(), 10);
}
#[test]
fn tokens_split_at_last_token_end() {
let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter());
let (before, after) = tokens.split_at(TextSize::new(33));
assert_eq!(before.len(), 10);
assert_eq!(after.len(), 0);
}
#[test]
fn tokens_split_at_inside_gap() {
let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter());
let (before, after) = tokens.split_at(TextSize::new(13));
assert_eq!(before.len(), 6);
assert_eq!(after.len(), 4);
}
#[test]
#[should_panic(expected = "Offset 18 is inside token `Comment 15..24`")]
fn tokens_split_at_inside_token() {
let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter());
tokens.split_at(TextSize::new(18));
}
#[test]
fn tokens_split_at_matches_before_and_after() {
let offset = TextSize::new(15);
let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter());
let (before, after) = tokens.split_at(offset);
assert_eq!(before, tokens.before(offset));
assert_eq!(after, tokens.after(offset));
}
}

View File

@@ -120,8 +120,12 @@ pub(crate) fn check(
let directives = extract_directives(parsed.tokens(), Flags::all(), &locator, &indexer);
// Parse range suppression comments
let suppressions =
Suppressions::from_tokens(&settings.linter, locator.contents(), parsed.tokens());
let suppressions = Suppressions::from_tokens(
&settings.linter,
locator.contents(),
parsed.tokens(),
&indexer,
);
// Generate checks.
let diagnostics = check_path(

View File

@@ -213,8 +213,12 @@ impl Workspace {
&indexer,
);
let suppressions =
Suppressions::from_tokens(&self.settings.linter, locator.contents(), parsed.tokens());
let suppressions = Suppressions::from_tokens(
&self.settings.linter,
locator.contents(),
parsed.tokens(),
&indexer,
);
// Generate checks.
let diagnostics = check_path(