[pycodestyle] Handle each cell separately for too-many-newlines-at-end-of-file (W391) (#15308)

Jupyter notebooks are converted into source files by joining with
newlines, which confuses the check [too-many-newlines-at-end-of-file
(W391)](https://docs.astral.sh/ruff/rules/too-many-newlines-at-end-of-file/#too-many-newlines-at-end-of-file-w391).
This PR introduces logic to apply the check cell-wise (and, in
particular, correctly handles empty cells.)

Closes #13763
This commit is contained in:
Dylan
2025-01-09 10:50:39 -06:00
committed by GitHub
parent d0b2bbd55e
commit b0905c4b04
5 changed files with 260 additions and 30 deletions

View File

@@ -0,0 +1,92 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"True\n"
]
}
],
"source": [
"True"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# just a comment in this cell"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# a comment and some newlines\n",
"\n",
"\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"1 + 1\n",
"# a comment\n",
"\n",
"\n",
"\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"1+1\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"\n",
"\n",
"\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

View File

@@ -183,7 +183,11 @@ pub(crate) fn check_tokens(
}
if settings.rules.enabled(Rule::TooManyNewlinesAtEndOfFile) {
pycodestyle::rules::too_many_newlines_at_end_of_file(&mut diagnostics, tokens);
pycodestyle::rules::too_many_newlines_at_end_of_file(
&mut diagnostics,
tokens,
cell_offsets,
);
}
diagnostics.retain(|diagnostic| settings.rules.enabled(diagnostic.kind.rule()));

View File

@@ -79,6 +79,7 @@ mod tests {
#[test_case(Rule::TooManyNewlinesAtEndOfFile, Path::new("W391_2.py"))]
#[test_case(Rule::TooManyNewlinesAtEndOfFile, Path::new("W391_3.py"))]
#[test_case(Rule::TooManyNewlinesAtEndOfFile, Path::new("W391_4.py"))]
#[test_case(Rule::TooManyNewlinesAtEndOfFile, Path::new("W391.ipynb"))]
fn preview_rules(rule_code: Rule, path: &Path) -> Result<()> {
let snapshot = format!(
"preview__{}_{}",

View File

@@ -1,11 +1,18 @@
use std::iter::Peekable;
use itertools::Itertools;
use ruff_diagnostics::{AlwaysFixableViolation, Diagnostic, Edit, Fix};
use ruff_macros::{derive_message_formats, ViolationMetadata};
use ruff_python_parser::{TokenKind, Tokens};
use ruff_notebook::CellOffsets;
use ruff_python_parser::{Token, TokenKind, Tokens};
use ruff_text_size::{Ranged, TextRange, TextSize};
/// ## What it does
/// Checks for files with multiple trailing blank lines.
///
/// In the case of notebooks, this check is applied to
/// each cell separately.
///
/// ## Why is this bad?
/// Trailing blank lines in a file are superfluous.
///
@@ -23,17 +30,19 @@ use ruff_text_size::{Ranged, TextRange, TextSize};
#[derive(ViolationMetadata)]
pub(crate) struct TooManyNewlinesAtEndOfFile {
num_trailing_newlines: u32,
in_notebook: bool,
}
impl AlwaysFixableViolation for TooManyNewlinesAtEndOfFile {
#[derive_message_formats]
fn message(&self) -> String {
let domain = if self.in_notebook { "cell" } else { "file" };
// We expect a single trailing newline; so two trailing newlines is one too many, three
// trailing newlines is two too many, etc.
if self.num_trailing_newlines > 2 {
"Too many newlines at end of file".to_string()
format!("Too many newlines at end of {domain}")
} else {
"Extra newline at end of file".to_string()
format!("Extra newline at end of {domain}")
}
}
@@ -48,22 +57,68 @@ impl AlwaysFixableViolation for TooManyNewlinesAtEndOfFile {
}
/// W391
pub(crate) fn too_many_newlines_at_end_of_file(diagnostics: &mut Vec<Diagnostic>, tokens: &Tokens) {
let mut num_trailing_newlines = 0u32;
let mut start: Option<TextSize> = None;
let mut end: Option<TextSize> = None;
pub(crate) fn too_many_newlines_at_end_of_file(
diagnostics: &mut Vec<Diagnostic>,
tokens: &Tokens,
cell_offsets: Option<&CellOffsets>,
) {
let mut tokens_iter = tokens.iter().rev().peekable();
// Count the number of trailing newlines.
for token in tokens.iter().rev() {
match token.kind() {
TokenKind::NonLogicalNewline | TokenKind::Newline => {
if num_trailing_newlines == 0 {
end = Some(token.end());
if let Some(cell_offsets) = cell_offsets {
diagnostics.extend(notebook_newline_diagnostics(tokens_iter, cell_offsets));
} else if let Some(diagnostic) = newline_diagnostic(&mut tokens_iter, false) {
diagnostics.push(diagnostic);
};
}
/// Collects trailing newline diagnostics for each cell
fn notebook_newline_diagnostics<'a>(
mut tokens_iter: Peekable<impl Iterator<Item = &'a Token>>,
cell_offsets: &CellOffsets,
) -> Vec<Diagnostic> {
let mut results = Vec::new();
let offset_iter = cell_offsets.iter().rev();
// NB: When interpreting the below, recall that the iterators
// have been reversed.
for &offset in offset_iter {
// Advance to offset
tokens_iter
.peeking_take_while(|tok| tok.end() >= offset)
.for_each(drop);
let Some(diagnostic) = newline_diagnostic(&mut tokens_iter, true) else {
continue;
};
results.push(diagnostic);
}
results
}
/// Possible diagnostic, with fix, for too many newlines in cell or source file
fn newline_diagnostic<'a>(
tokens_iter: &mut Peekable<impl Iterator<Item = &'a Token>>,
in_notebook: bool,
) -> Option<Diagnostic> {
let mut num_trailing_newlines: u32 = 0;
let mut newline_range_start: Option<TextSize> = None;
let mut newline_range_end: Option<TextSize> = None;
while let Some(next_token) = tokens_iter.peek() {
match next_token.kind() {
TokenKind::Newline | TokenKind::NonLogicalNewline => {
if newline_range_end.is_none() {
newline_range_end = Some(next_token.end());
}
start = Some(token.end());
newline_range_start = Some(next_token.end());
tokens_iter.next();
num_trailing_newlines += 1;
}
TokenKind::Dedent => continue,
TokenKind::Dedent => {
tokens_iter.next();
}
_ => {
break;
}
@@ -71,19 +126,23 @@ pub(crate) fn too_many_newlines_at_end_of_file(diagnostics: &mut Vec<Diagnostic>
}
if num_trailing_newlines == 0 || num_trailing_newlines == 1 {
return;
}
let range = match (start, end) {
(Some(start), Some(end)) => TextRange::new(start, end),
_ => return,
return None;
};
let mut diagnostic = Diagnostic::new(
TooManyNewlinesAtEndOfFile {
num_trailing_newlines,
},
range,
);
diagnostic.set_fix(Fix::safe_edit(Edit::range_deletion(range)));
diagnostics.push(diagnostic);
let (start, end) = (match (newline_range_start, newline_range_end) {
(Some(s), Some(e)) => Some((s, e)),
_ => None,
})?;
let diagnostic_range = TextRange::new(start, end);
Some(
Diagnostic::new(
TooManyNewlinesAtEndOfFile {
num_trailing_newlines,
in_notebook,
},
diagnostic_range,
)
.with_fix(Fix::safe_edit(Edit::range_deletion(diagnostic_range))),
)
}

View File

@@ -0,0 +1,74 @@
---
source: crates/ruff_linter/src/rules/pycodestyle/mod.rs
---
W391.ipynb:5:1: W391 [*] Too many newlines at end of cell
|
3 | # just a comment in this cell
4 | # a comment and some newlines
5 | /
6 | |
7 | |
8 | |
| |_^ W391
9 | 1 + 1
10 | # a comment
|
= help: Remove trailing newlines
Safe fix
3 3 | # just a comment in this cell
4 4 | # a comment and some newlines
5 5 |
6 |-
7 |-
8 |-
9 6 | 1 + 1
10 7 | # a comment
11 8 |
W391.ipynb:11:1: W391 [*] Too many newlines at end of cell
|
9 | 1 + 1
10 | # a comment
11 | /
12 | |
13 | |
14 | |
15 | |
| |_^ W391
16 | 1+1
|
= help: Remove trailing newlines
Safe fix
9 9 | 1 + 1
10 10 | # a comment
11 11 |
12 |-
13 |-
14 |-
15 |-
16 12 | 1+1
17 13 |
18 14 |
W391.ipynb:17:1: W391 [*] Too many newlines at end of cell
|
16 | 1+1
17 | /
18 | |
19 | |
20 | |
21 | |
| |_^ W391
|
= help: Remove trailing newlines
Safe fix
15 15 |
16 16 | 1+1
17 17 |
18 |-
19 |-
20 |-
21 |-