Compare commits
1 Commits
implicit-s
...
charlie/on
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
6e6188c599 |
@@ -387,11 +387,6 @@ We have several ways of benchmarking and profiling Ruff:
|
||||
- Microbenchmarks which run the linter or the formatter on individual files. These run on pull requests.
|
||||
- Profiling the linter on either the microbenchmarks or entire projects
|
||||
|
||||
> \[!NOTE\]
|
||||
> When running benchmarks, ensure that your CPU is otherwise idle (e.g., close any background
|
||||
> applications, like web browsers). You may also want to switch your CPU to a "performance"
|
||||
> mode, if it exists, especially when benchmarking short-lived processes.
|
||||
|
||||
### CPython Benchmark
|
||||
|
||||
First, clone [CPython](https://github.com/python/cpython). It's a large and diverse Python codebase,
|
||||
|
||||
@@ -48,7 +48,6 @@ serde = { workspace = true }
|
||||
serde_json = { workspace = true }
|
||||
shellexpand = { workspace = true }
|
||||
strum = { workspace = true, features = [] }
|
||||
tempfile = { workspace = true }
|
||||
thiserror = { workspace = true }
|
||||
toml = { workspace = true }
|
||||
tracing = { workspace = true, features = ["log"] }
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
use std::fmt::Debug;
|
||||
use std::fs::{self, File};
|
||||
use std::hash::Hasher;
|
||||
use std::io::{self, BufReader, Write};
|
||||
use std::io::{self, BufReader, BufWriter, Write};
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::sync::atomic::{AtomicU64, Ordering};
|
||||
use std::sync::Mutex;
|
||||
@@ -15,7 +15,6 @@ use rayon::iter::ParallelIterator;
|
||||
use rayon::iter::{IntoParallelIterator, ParallelBridge};
|
||||
use rustc_hash::FxHashMap;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use tempfile::NamedTempFile;
|
||||
|
||||
use ruff_cache::{CacheKey, CacheKeyHasher};
|
||||
use ruff_diagnostics::{DiagnosticKind, Fix};
|
||||
@@ -166,29 +165,15 @@ impl Cache {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
// Write the cache to a temporary file first and then rename it for an "atomic" write.
|
||||
// Protects against data loss if the process is killed during the write and races between different ruff
|
||||
// processes, resulting in a corrupted cache file. https://github.com/astral-sh/ruff/issues/8147#issuecomment-1943345964
|
||||
let mut temp_file =
|
||||
NamedTempFile::new_in(self.path.parent().expect("Write path must have a parent"))
|
||||
.context("Failed to create temporary file")?;
|
||||
|
||||
// Serialize to in-memory buffer because hyperfine benchmark showed that it's faster than
|
||||
// using a `BufWriter` and our cache files are small enough that streaming isn't necessary.
|
||||
let serialized =
|
||||
bincode::serialize(&self.package).context("Failed to serialize cache data")?;
|
||||
temp_file
|
||||
.write_all(&serialized)
|
||||
.context("Failed to write serialized cache to temporary file.")?;
|
||||
|
||||
temp_file.persist(&self.path).with_context(|| {
|
||||
let file = File::create(&self.path)
|
||||
.with_context(|| format!("Failed to create cache file '{}'", self.path.display()))?;
|
||||
let writer = BufWriter::new(file);
|
||||
bincode::serialize_into(writer, &self.package).with_context(|| {
|
||||
format!(
|
||||
"Failed to rename temporary cache file to {}",
|
||||
"Failed to serialise cache to file '{}'",
|
||||
self.path.display()
|
||||
)
|
||||
})?;
|
||||
|
||||
Ok(())
|
||||
})
|
||||
}
|
||||
|
||||
/// Applies the pending changes without storing the cache to disk.
|
||||
|
||||
@@ -1,75 +0,0 @@
|
||||
import codecs
|
||||
import io
|
||||
from pathlib import Path
|
||||
|
||||
# Errors
|
||||
with open("FURB129.py") as f:
|
||||
for _line in f.readlines():
|
||||
pass
|
||||
a = [line.lower() for line in f.readlines()]
|
||||
b = {line.upper() for line in f.readlines()}
|
||||
c = {line.lower(): line.upper() for line in f.readlines()}
|
||||
|
||||
with Path("FURB129.py").open() as f:
|
||||
for _line in f.readlines():
|
||||
pass
|
||||
|
||||
for _line in open("FURB129.py").readlines():
|
||||
pass
|
||||
|
||||
for _line in Path("FURB129.py").open().readlines():
|
||||
pass
|
||||
|
||||
|
||||
def func():
|
||||
f = Path("FURB129.py").open()
|
||||
for _line in f.readlines():
|
||||
pass
|
||||
f.close()
|
||||
|
||||
|
||||
def func(f: io.BytesIO):
|
||||
for _line in f.readlines():
|
||||
pass
|
||||
|
||||
|
||||
def func():
|
||||
with (open("FURB129.py") as f, foo as bar):
|
||||
for _line in f.readlines():
|
||||
pass
|
||||
for _line in bar.readlines():
|
||||
pass
|
||||
|
||||
|
||||
# False positives
|
||||
def func(f):
|
||||
for _line in f.readlines():
|
||||
pass
|
||||
|
||||
|
||||
def func(f: codecs.StreamReader):
|
||||
for _line in f.readlines():
|
||||
pass
|
||||
|
||||
|
||||
def func():
|
||||
class A:
|
||||
def readlines(self) -> list[str]:
|
||||
return ["a", "b", "c"]
|
||||
|
||||
return A()
|
||||
|
||||
|
||||
for _line in func().readlines():
|
||||
pass
|
||||
|
||||
# OK
|
||||
for _line in ["a", "b", "c"]:
|
||||
pass
|
||||
with open("FURB129.py") as f:
|
||||
for _line in f:
|
||||
pass
|
||||
for _line in f.readlines(10):
|
||||
pass
|
||||
for _not_line in f.readline():
|
||||
pass
|
||||
@@ -162,26 +162,3 @@ async def f(x: bool):
|
||||
T = asyncio.create_task(asyncio.sleep(1))
|
||||
else:
|
||||
T = None
|
||||
|
||||
|
||||
# Error
|
||||
def f():
|
||||
loop = asyncio.new_event_loop()
|
||||
loop.create_task(main()) # Error
|
||||
|
||||
# Error
|
||||
def f():
|
||||
loop = asyncio.get_event_loop()
|
||||
loop.create_task(main()) # Error
|
||||
|
||||
# OK
|
||||
def f():
|
||||
global task
|
||||
loop = asyncio.new_event_loop()
|
||||
task = loop.create_task(main()) # Error
|
||||
|
||||
# OK
|
||||
def f():
|
||||
global task
|
||||
loop = asyncio.get_event_loop()
|
||||
task = loop.create_task(main()) # Error
|
||||
|
||||
@@ -2,14 +2,11 @@ use ruff_python_ast::Comprehension;
|
||||
|
||||
use crate::checkers::ast::Checker;
|
||||
use crate::codes::Rule;
|
||||
use crate::rules::{flake8_simplify, refurb};
|
||||
use crate::rules::flake8_simplify;
|
||||
|
||||
/// Run lint rules over a [`Comprehension`] syntax nodes.
|
||||
pub(crate) fn comprehension(comprehension: &Comprehension, checker: &mut Checker) {
|
||||
if checker.enabled(Rule::InDictKeys) {
|
||||
flake8_simplify::rules::key_in_dict_comprehension(checker, comprehension);
|
||||
}
|
||||
if checker.enabled(Rule::ReadlinesInFor) {
|
||||
refurb::rules::readlines_in_comprehension(checker, comprehension);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1317,9 +1317,6 @@ pub(crate) fn statement(stmt: &Stmt, checker: &mut Checker) {
|
||||
if checker.enabled(Rule::UnnecessaryDictIndexLookup) {
|
||||
pylint::rules::unnecessary_dict_index_lookup(checker, for_stmt);
|
||||
}
|
||||
if checker.enabled(Rule::ReadlinesInFor) {
|
||||
refurb::rules::readlines_in_for(checker, for_stmt);
|
||||
}
|
||||
if !is_async {
|
||||
if checker.enabled(Rule::ReimplementedBuiltin) {
|
||||
flake8_simplify::rules::convert_for_loop_to_any_all(checker, stmt);
|
||||
|
||||
@@ -40,7 +40,7 @@ use ruff_diagnostics::{Diagnostic, IsolationLevel};
|
||||
use ruff_notebook::{CellOffsets, NotebookIndex};
|
||||
use ruff_python_ast::all::{extract_all_names, DunderAllFlags};
|
||||
use ruff_python_ast::helpers::{
|
||||
collect_import_from_member, extract_handled_exceptions, is_docstring_stmt, to_module_path,
|
||||
collect_import_from_member, extract_handled_exceptions, to_module_path,
|
||||
};
|
||||
use ruff_python_ast::identifier::Identifier;
|
||||
use ruff_python_ast::str::trailing_quote;
|
||||
@@ -71,38 +71,6 @@ mod analyze;
|
||||
mod annotation;
|
||||
mod deferred;
|
||||
|
||||
/// State representing whether a docstring is expected or not for the next statement.
|
||||
#[derive(Default, Debug, Copy, Clone, PartialEq)]
|
||||
enum DocstringState {
|
||||
/// The next statement is expected to be a docstring, but not necessarily so.
|
||||
///
|
||||
/// For example, in the following code:
|
||||
///
|
||||
/// ```python
|
||||
/// class Foo:
|
||||
/// pass
|
||||
///
|
||||
///
|
||||
/// def bar(x, y):
|
||||
/// """Docstring."""
|
||||
/// return x + y
|
||||
/// ```
|
||||
///
|
||||
/// For `Foo`, the state is expected when the checker is visiting the class
|
||||
/// body but isn't going to be present. While, for `bar` function, the docstring
|
||||
/// is expected and present.
|
||||
#[default]
|
||||
Expected,
|
||||
Other,
|
||||
}
|
||||
|
||||
impl DocstringState {
|
||||
/// Returns `true` if the next statement is expected to be a docstring.
|
||||
const fn is_expected(self) -> bool {
|
||||
matches!(self, DocstringState::Expected)
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) struct Checker<'a> {
|
||||
/// The [`Path`] to the file under analysis.
|
||||
path: &'a Path,
|
||||
@@ -146,8 +114,6 @@ pub(crate) struct Checker<'a> {
|
||||
pub(crate) flake8_bugbear_seen: Vec<TextRange>,
|
||||
/// The end offset of the last visited statement.
|
||||
last_stmt_end: TextSize,
|
||||
/// A state describing if a docstring is expected or not.
|
||||
docstring_state: DocstringState,
|
||||
}
|
||||
|
||||
impl<'a> Checker<'a> {
|
||||
@@ -187,7 +153,6 @@ impl<'a> Checker<'a> {
|
||||
cell_offsets,
|
||||
notebook_index,
|
||||
last_stmt_end: TextSize::default(),
|
||||
docstring_state: DocstringState::default(),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -385,16 +350,6 @@ where
|
||||
// the node.
|
||||
let flags_snapshot = self.semantic.flags;
|
||||
|
||||
// Update the semantic model if it is in a docstring. This should be done after the
|
||||
// flags snapshot to ensure that it gets reset once the statement is analyzed.
|
||||
if self.docstring_state.is_expected() {
|
||||
if is_docstring_stmt(stmt) {
|
||||
self.semantic.flags |= SemanticModelFlags::DOCSTRING;
|
||||
}
|
||||
// Reset the state irrespective of whether the statement is a docstring or not.
|
||||
self.docstring_state = DocstringState::Other;
|
||||
}
|
||||
|
||||
// Step 1: Binding
|
||||
match stmt {
|
||||
Stmt::AugAssign(ast::StmtAugAssign {
|
||||
@@ -696,8 +651,6 @@ where
|
||||
self.semantic.set_globals(globals);
|
||||
}
|
||||
|
||||
// Set the docstring state before visiting the class body.
|
||||
self.docstring_state = DocstringState::Expected;
|
||||
self.visit_body(body);
|
||||
}
|
||||
Stmt::TypeAlias(ast::StmtTypeAlias {
|
||||
@@ -2008,8 +1961,6 @@ impl<'a> Checker<'a> {
|
||||
};
|
||||
|
||||
self.visit_parameters(parameters);
|
||||
// Set the docstring state before visiting the function body.
|
||||
self.docstring_state = DocstringState::Expected;
|
||||
self.visit_body(body);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1025,7 +1025,6 @@ pub fn code_to_rule(linter: Linter, code: &str) -> Option<(RuleGroup, Rule)> {
|
||||
#[allow(deprecated)]
|
||||
(Refurb, "113") => (RuleGroup::Nursery, rules::refurb::rules::RepeatedAppend),
|
||||
(Refurb, "118") => (RuleGroup::Preview, rules::refurb::rules::ReimplementedOperator),
|
||||
(Refurb, "129") => (RuleGroup::Preview, rules::refurb::rules::ReadlinesInFor),
|
||||
#[allow(deprecated)]
|
||||
(Refurb, "131") => (RuleGroup::Nursery, rules::refurb::rules::DeleteFullSlice),
|
||||
#[allow(deprecated)]
|
||||
|
||||
@@ -419,20 +419,23 @@ mod tests {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test_case(Path::new("line_ending_crlf.py"))]
|
||||
#[test_case(Path::new("line_ending_lf.py"))]
|
||||
fn source_code_style(path: &Path) -> Result<()> {
|
||||
let snapshot = format!("{}", path.to_string_lossy());
|
||||
let diagnostics = test_path(
|
||||
Path::new("isort").join(path).as_path(),
|
||||
&LinterSettings {
|
||||
src: vec![test_resource_path("fixtures/isort")],
|
||||
..LinterSettings::for_rule(Rule::UnsortedImports)
|
||||
},
|
||||
)?;
|
||||
crate::assert_messages!(snapshot, diagnostics);
|
||||
Ok(())
|
||||
}
|
||||
// Test currently disabled as line endings are automatically converted to
|
||||
// platform-appropriate ones in CI/CD #[test_case(Path::new("
|
||||
// line_ending_crlf.py"))] #[test_case(Path::new("line_ending_lf.py"))]
|
||||
// fn source_code_style(path: &Path) -> Result<()> {
|
||||
// let snapshot = format!("{}", path.to_string_lossy());
|
||||
// let diagnostics = test_path(
|
||||
// Path::new("isort")
|
||||
// .join(path)
|
||||
// .as_path(),
|
||||
// &LinterSettings {
|
||||
// src: vec![test_resource_path("fixtures/isort")],
|
||||
// ..LinterSettings::for_rule(Rule::UnsortedImports)
|
||||
// },
|
||||
// )?;
|
||||
// crate::assert_messages!(snapshot, diagnostics);
|
||||
// Ok(())
|
||||
// }
|
||||
|
||||
#[test_case(Path::new("separate_local_folder_imports.py"))]
|
||||
fn known_local_folder(path: &Path) -> Result<()> {
|
||||
|
||||
@@ -1,23 +0,0 @@
|
||||
---
|
||||
source: crates/ruff_linter/src/rules/isort/mod.rs
|
||||
---
|
||||
line_ending_crlf.py:1:1: I001 [*] Import block is un-sorted or un-formatted
|
||||
|
|
||||
1 | / from long_module_name import member_one, member_two, member_three, member_four, member_five
|
||||
2 | |
|
||||
| |_^ I001
|
||||
|
|
||||
= help: Organize imports
|
||||
|
||||
ℹ Safe fix
|
||||
1 |-from long_module_name import member_one, member_two, member_three, member_four, member_five
|
||||
1 |+from long_module_name import (
|
||||
2 |+ member_five,
|
||||
3 |+ member_four,
|
||||
4 |+ member_one,
|
||||
5 |+ member_three,
|
||||
6 |+ member_two,
|
||||
7 |+)
|
||||
2 8 |
|
||||
|
||||
|
||||
@@ -1,23 +0,0 @@
|
||||
---
|
||||
source: crates/ruff_linter/src/rules/isort/mod.rs
|
||||
---
|
||||
line_ending_lf.py:1:1: I001 [*] Import block is un-sorted or un-formatted
|
||||
|
|
||||
1 | / from long_module_name import member_one, member_two, member_three, member_four, member_five
|
||||
2 | |
|
||||
| |_^ I001
|
||||
|
|
||||
= help: Organize imports
|
||||
|
||||
ℹ Safe fix
|
||||
1 |-from long_module_name import member_one, member_two, member_three, member_four, member_five
|
||||
1 |+from long_module_name import (
|
||||
2 |+ member_five,
|
||||
3 |+ member_four,
|
||||
4 |+ member_one,
|
||||
5 |+ member_three,
|
||||
6 |+ member_two,
|
||||
7 |+)
|
||||
2 8 |
|
||||
|
||||
|
||||
@@ -17,7 +17,6 @@ mod tests {
|
||||
#[test_case(Rule::ReadWholeFile, Path::new("FURB101.py"))]
|
||||
#[test_case(Rule::RepeatedAppend, Path::new("FURB113.py"))]
|
||||
#[test_case(Rule::ReimplementedOperator, Path::new("FURB118.py"))]
|
||||
#[test_case(Rule::ReadlinesInFor, Path::new("FURB129.py"))]
|
||||
#[test_case(Rule::DeleteFullSlice, Path::new("FURB131.py"))]
|
||||
#[test_case(Rule::CheckAndRemoveFromSet, Path::new("FURB132.py"))]
|
||||
#[test_case(Rule::IfExprMinMax, Path::new("FURB136.py"))]
|
||||
|
||||
@@ -9,7 +9,6 @@ pub(crate) use math_constant::*;
|
||||
pub(crate) use metaclass_abcmeta::*;
|
||||
pub(crate) use print_empty_string::*;
|
||||
pub(crate) use read_whole_file::*;
|
||||
pub(crate) use readlines_in_for::*;
|
||||
pub(crate) use redundant_log_base::*;
|
||||
pub(crate) use regex_flag_alias::*;
|
||||
pub(crate) use reimplemented_operator::*;
|
||||
@@ -31,7 +30,6 @@ mod math_constant;
|
||||
mod metaclass_abcmeta;
|
||||
mod print_empty_string;
|
||||
mod read_whole_file;
|
||||
mod readlines_in_for;
|
||||
mod redundant_log_base;
|
||||
mod regex_flag_alias;
|
||||
mod reimplemented_operator;
|
||||
|
||||
@@ -1,92 +0,0 @@
|
||||
use ruff_diagnostics::{AlwaysFixableViolation, Diagnostic, Edit, Fix};
|
||||
use ruff_macros::{derive_message_formats, violation};
|
||||
use ruff_python_ast::{Comprehension, Expr, StmtFor};
|
||||
use ruff_python_semantic::analyze::typing;
|
||||
use ruff_python_semantic::analyze::typing::is_io_base_expr;
|
||||
use ruff_text_size::Ranged;
|
||||
|
||||
use crate::checkers::ast::Checker;
|
||||
|
||||
/// ## What it does
|
||||
/// Checks for uses of `readlines()` when iterating over a file line-by-line.
|
||||
///
|
||||
/// ## Why is this bad?
|
||||
/// Rather than iterating over all lines in a file by calling `readlines()`,
|
||||
/// it's more convenient and performant to iterate over the file object
|
||||
/// directly.
|
||||
///
|
||||
/// ## Example
|
||||
/// ```python
|
||||
/// with open("file.txt") as fp:
|
||||
/// for line in fp.readlines():
|
||||
/// ...
|
||||
/// ```
|
||||
///
|
||||
/// Use instead:
|
||||
/// ```python
|
||||
/// with open("file.txt") as fp:
|
||||
/// for line in fp:
|
||||
/// ...
|
||||
/// ```
|
||||
///
|
||||
/// ## References
|
||||
/// - [Python documentation: `io.IOBase.readlines`](https://docs.python.org/3/library/io.html#io.IOBase.readlines)
|
||||
#[violation]
|
||||
pub(crate) struct ReadlinesInFor;
|
||||
|
||||
impl AlwaysFixableViolation for ReadlinesInFor {
|
||||
#[derive_message_formats]
|
||||
fn message(&self) -> String {
|
||||
format!("Instead of calling `readlines()`, iterate over file object directly")
|
||||
}
|
||||
|
||||
fn fix_title(&self) -> String {
|
||||
"Remove `readlines()`".into()
|
||||
}
|
||||
}
|
||||
|
||||
/// FURB129
|
||||
pub(crate) fn readlines_in_for(checker: &mut Checker, for_stmt: &StmtFor) {
|
||||
readlines_in_iter(checker, for_stmt.iter.as_ref());
|
||||
}
|
||||
|
||||
/// FURB129
|
||||
pub(crate) fn readlines_in_comprehension(checker: &mut Checker, comprehension: &Comprehension) {
|
||||
readlines_in_iter(checker, &comprehension.iter);
|
||||
}
|
||||
|
||||
fn readlines_in_iter(checker: &mut Checker, iter_expr: &Expr) {
|
||||
let Expr::Call(expr_call) = iter_expr else {
|
||||
return;
|
||||
};
|
||||
|
||||
let Expr::Attribute(expr_attr) = expr_call.func.as_ref() else {
|
||||
return;
|
||||
};
|
||||
|
||||
if expr_attr.attr.as_str() != "readlines" || !expr_call.arguments.is_empty() {
|
||||
return;
|
||||
}
|
||||
|
||||
// Determine whether `fp` in `fp.readlines()` was bound to a file object.
|
||||
if let Expr::Name(name) = expr_attr.value.as_ref() {
|
||||
if !checker
|
||||
.semantic()
|
||||
.resolve_name(name)
|
||||
.map(|id| checker.semantic().binding(id))
|
||||
.is_some_and(|binding| typing::is_io_base(binding, checker.semantic()))
|
||||
{
|
||||
return;
|
||||
}
|
||||
} else {
|
||||
if !is_io_base_expr(expr_attr.value.as_ref(), checker.semantic()) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
let mut diagnostic = Diagnostic::new(ReadlinesInFor, expr_call.range());
|
||||
diagnostic.set_fix(Fix::unsafe_edit(Edit::range_deletion(
|
||||
expr_call.range().add_start(expr_attr.value.range().len()),
|
||||
)));
|
||||
checker.diagnostics.push(diagnostic);
|
||||
}
|
||||
@@ -1,207 +0,0 @@
|
||||
---
|
||||
source: crates/ruff_linter/src/rules/refurb/mod.rs
|
||||
---
|
||||
FURB129.py:7:18: FURB129 [*] Instead of calling `readlines()`, iterate over file object directly
|
||||
|
|
||||
5 | # Errors
|
||||
6 | with open("FURB129.py") as f:
|
||||
7 | for _line in f.readlines():
|
||||
| ^^^^^^^^^^^^^ FURB129
|
||||
8 | pass
|
||||
9 | a = [line.lower() for line in f.readlines()]
|
||||
|
|
||||
= help: Remove `readlines()`
|
||||
|
||||
ℹ Unsafe fix
|
||||
4 4 |
|
||||
5 5 | # Errors
|
||||
6 6 | with open("FURB129.py") as f:
|
||||
7 |- for _line in f.readlines():
|
||||
7 |+ for _line in f:
|
||||
8 8 | pass
|
||||
9 9 | a = [line.lower() for line in f.readlines()]
|
||||
10 10 | b = {line.upper() for line in f.readlines()}
|
||||
|
||||
FURB129.py:9:35: FURB129 [*] Instead of calling `readlines()`, iterate over file object directly
|
||||
|
|
||||
7 | for _line in f.readlines():
|
||||
8 | pass
|
||||
9 | a = [line.lower() for line in f.readlines()]
|
||||
| ^^^^^^^^^^^^^ FURB129
|
||||
10 | b = {line.upper() for line in f.readlines()}
|
||||
11 | c = {line.lower(): line.upper() for line in f.readlines()}
|
||||
|
|
||||
= help: Remove `readlines()`
|
||||
|
||||
ℹ Unsafe fix
|
||||
6 6 | with open("FURB129.py") as f:
|
||||
7 7 | for _line in f.readlines():
|
||||
8 8 | pass
|
||||
9 |- a = [line.lower() for line in f.readlines()]
|
||||
9 |+ a = [line.lower() for line in f]
|
||||
10 10 | b = {line.upper() for line in f.readlines()}
|
||||
11 11 | c = {line.lower(): line.upper() for line in f.readlines()}
|
||||
12 12 |
|
||||
|
||||
FURB129.py:10:35: FURB129 [*] Instead of calling `readlines()`, iterate over file object directly
|
||||
|
|
||||
8 | pass
|
||||
9 | a = [line.lower() for line in f.readlines()]
|
||||
10 | b = {line.upper() for line in f.readlines()}
|
||||
| ^^^^^^^^^^^^^ FURB129
|
||||
11 | c = {line.lower(): line.upper() for line in f.readlines()}
|
||||
|
|
||||
= help: Remove `readlines()`
|
||||
|
||||
ℹ Unsafe fix
|
||||
7 7 | for _line in f.readlines():
|
||||
8 8 | pass
|
||||
9 9 | a = [line.lower() for line in f.readlines()]
|
||||
10 |- b = {line.upper() for line in f.readlines()}
|
||||
10 |+ b = {line.upper() for line in f}
|
||||
11 11 | c = {line.lower(): line.upper() for line in f.readlines()}
|
||||
12 12 |
|
||||
13 13 | with Path("FURB129.py").open() as f:
|
||||
|
||||
FURB129.py:11:49: FURB129 [*] Instead of calling `readlines()`, iterate over file object directly
|
||||
|
|
||||
9 | a = [line.lower() for line in f.readlines()]
|
||||
10 | b = {line.upper() for line in f.readlines()}
|
||||
11 | c = {line.lower(): line.upper() for line in f.readlines()}
|
||||
| ^^^^^^^^^^^^^ FURB129
|
||||
12 |
|
||||
13 | with Path("FURB129.py").open() as f:
|
||||
|
|
||||
= help: Remove `readlines()`
|
||||
|
||||
ℹ Unsafe fix
|
||||
8 8 | pass
|
||||
9 9 | a = [line.lower() for line in f.readlines()]
|
||||
10 10 | b = {line.upper() for line in f.readlines()}
|
||||
11 |- c = {line.lower(): line.upper() for line in f.readlines()}
|
||||
11 |+ c = {line.lower(): line.upper() for line in f}
|
||||
12 12 |
|
||||
13 13 | with Path("FURB129.py").open() as f:
|
||||
14 14 | for _line in f.readlines():
|
||||
|
||||
FURB129.py:14:18: FURB129 [*] Instead of calling `readlines()`, iterate over file object directly
|
||||
|
|
||||
13 | with Path("FURB129.py").open() as f:
|
||||
14 | for _line in f.readlines():
|
||||
| ^^^^^^^^^^^^^ FURB129
|
||||
15 | pass
|
||||
|
|
||||
= help: Remove `readlines()`
|
||||
|
||||
ℹ Unsafe fix
|
||||
11 11 | c = {line.lower(): line.upper() for line in f.readlines()}
|
||||
12 12 |
|
||||
13 13 | with Path("FURB129.py").open() as f:
|
||||
14 |- for _line in f.readlines():
|
||||
14 |+ for _line in f:
|
||||
15 15 | pass
|
||||
16 16 |
|
||||
17 17 | for _line in open("FURB129.py").readlines():
|
||||
|
||||
FURB129.py:17:14: FURB129 [*] Instead of calling `readlines()`, iterate over file object directly
|
||||
|
|
||||
15 | pass
|
||||
16 |
|
||||
17 | for _line in open("FURB129.py").readlines():
|
||||
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ FURB129
|
||||
18 | pass
|
||||
|
|
||||
= help: Remove `readlines()`
|
||||
|
||||
ℹ Unsafe fix
|
||||
14 14 | for _line in f.readlines():
|
||||
15 15 | pass
|
||||
16 16 |
|
||||
17 |-for _line in open("FURB129.py").readlines():
|
||||
17 |+for _line in open("FURB129.py"):
|
||||
18 18 | pass
|
||||
19 19 |
|
||||
20 20 | for _line in Path("FURB129.py").open().readlines():
|
||||
|
||||
FURB129.py:20:14: FURB129 [*] Instead of calling `readlines()`, iterate over file object directly
|
||||
|
|
||||
18 | pass
|
||||
19 |
|
||||
20 | for _line in Path("FURB129.py").open().readlines():
|
||||
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ FURB129
|
||||
21 | pass
|
||||
|
|
||||
= help: Remove `readlines()`
|
||||
|
||||
ℹ Unsafe fix
|
||||
17 17 | for _line in open("FURB129.py").readlines():
|
||||
18 18 | pass
|
||||
19 19 |
|
||||
20 |-for _line in Path("FURB129.py").open().readlines():
|
||||
20 |+for _line in Path("FURB129.py").open():
|
||||
21 21 | pass
|
||||
22 22 |
|
||||
23 23 |
|
||||
|
||||
FURB129.py:26:18: FURB129 [*] Instead of calling `readlines()`, iterate over file object directly
|
||||
|
|
||||
24 | def func():
|
||||
25 | f = Path("FURB129.py").open()
|
||||
26 | for _line in f.readlines():
|
||||
| ^^^^^^^^^^^^^ FURB129
|
||||
27 | pass
|
||||
28 | f.close()
|
||||
|
|
||||
= help: Remove `readlines()`
|
||||
|
||||
ℹ Unsafe fix
|
||||
23 23 |
|
||||
24 24 | def func():
|
||||
25 25 | f = Path("FURB129.py").open()
|
||||
26 |- for _line in f.readlines():
|
||||
26 |+ for _line in f:
|
||||
27 27 | pass
|
||||
28 28 | f.close()
|
||||
29 29 |
|
||||
|
||||
FURB129.py:32:18: FURB129 [*] Instead of calling `readlines()`, iterate over file object directly
|
||||
|
|
||||
31 | def func(f: io.BytesIO):
|
||||
32 | for _line in f.readlines():
|
||||
| ^^^^^^^^^^^^^ FURB129
|
||||
33 | pass
|
||||
|
|
||||
= help: Remove `readlines()`
|
||||
|
||||
ℹ Unsafe fix
|
||||
29 29 |
|
||||
30 30 |
|
||||
31 31 | def func(f: io.BytesIO):
|
||||
32 |- for _line in f.readlines():
|
||||
32 |+ for _line in f:
|
||||
33 33 | pass
|
||||
34 34 |
|
||||
35 35 |
|
||||
|
||||
FURB129.py:38:22: FURB129 [*] Instead of calling `readlines()`, iterate over file object directly
|
||||
|
|
||||
36 | def func():
|
||||
37 | with (open("FURB129.py") as f, foo as bar):
|
||||
38 | for _line in f.readlines():
|
||||
| ^^^^^^^^^^^^^ FURB129
|
||||
39 | pass
|
||||
40 | for _line in bar.readlines():
|
||||
|
|
||||
= help: Remove `readlines()`
|
||||
|
||||
ℹ Unsafe fix
|
||||
35 35 |
|
||||
36 36 | def func():
|
||||
37 37 | with (open("FURB129.py") as f, foo as bar):
|
||||
38 |- for _line in f.readlines():
|
||||
38 |+ for _line in f:
|
||||
39 39 | pass
|
||||
40 40 | for _line in bar.readlines():
|
||||
41 41 | pass
|
||||
|
||||
|
||||
@@ -52,15 +52,14 @@ use ruff_text_size::Ranged;
|
||||
/// - [The Python Standard Library](https://docs.python.org/3/library/asyncio-task.html#asyncio.create_task)
|
||||
#[violation]
|
||||
pub struct AsyncioDanglingTask {
|
||||
expr: String,
|
||||
method: Method,
|
||||
}
|
||||
|
||||
impl Violation for AsyncioDanglingTask {
|
||||
#[derive_message_formats]
|
||||
fn message(&self) -> String {
|
||||
let AsyncioDanglingTask { expr, method } = self;
|
||||
format!("Store a reference to the return value of `{expr}.{method}`")
|
||||
let AsyncioDanglingTask { method } = self;
|
||||
format!("Store a reference to the return value of `asyncio.{method}`")
|
||||
}
|
||||
}
|
||||
|
||||
@@ -81,35 +80,23 @@ pub(crate) fn asyncio_dangling_task(expr: &Expr, semantic: &SemanticModel) -> Op
|
||||
})
|
||||
{
|
||||
return Some(Diagnostic::new(
|
||||
AsyncioDanglingTask {
|
||||
expr: "asyncio".to_string(),
|
||||
method,
|
||||
},
|
||||
AsyncioDanglingTask { method },
|
||||
expr.range(),
|
||||
));
|
||||
}
|
||||
|
||||
// Ex) `loop = ...; loop.create_task(...)`
|
||||
// Ex) `loop = asyncio.get_running_loop(); loop.create_task(...)`
|
||||
if let Expr::Attribute(ast::ExprAttribute { attr, value, .. }) = func.as_ref() {
|
||||
if attr == "create_task" {
|
||||
if let Expr::Name(name) = value.as_ref() {
|
||||
if typing::resolve_assignment(value, semantic).is_some_and(|call_path| {
|
||||
matches!(
|
||||
call_path.as_slice(),
|
||||
[
|
||||
"asyncio",
|
||||
"get_event_loop" | "get_running_loop" | "new_event_loop"
|
||||
]
|
||||
)
|
||||
}) {
|
||||
return Some(Diagnostic::new(
|
||||
AsyncioDanglingTask {
|
||||
expr: name.id.to_string(),
|
||||
method: Method::CreateTask,
|
||||
},
|
||||
expr.range(),
|
||||
));
|
||||
}
|
||||
if typing::resolve_assignment(value, semantic).is_some_and(|call_path| {
|
||||
matches!(call_path.as_slice(), ["asyncio", "get_running_loop"])
|
||||
}) {
|
||||
return Some(Diagnostic::new(
|
||||
AsyncioDanglingTask {
|
||||
method: Method::CreateTask,
|
||||
},
|
||||
expr.range(),
|
||||
));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -25,7 +25,7 @@ RUF006.py:68:12: RUF006 Store a reference to the return value of `asyncio.create
|
||||
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ RUF006
|
||||
|
|
||||
|
||||
RUF006.py:74:26: RUF006 Store a reference to the return value of `loop.create_task`
|
||||
RUF006.py:74:26: RUF006 Store a reference to the return value of `asyncio.create_task`
|
||||
|
|
||||
72 | def f():
|
||||
73 | loop = asyncio.get_running_loop()
|
||||
@@ -33,7 +33,7 @@ RUF006.py:74:26: RUF006 Store a reference to the return value of `loop.create_ta
|
||||
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ RUF006
|
||||
|
|
||||
|
||||
RUF006.py:97:5: RUF006 Store a reference to the return value of `loop.create_task`
|
||||
RUF006.py:97:5: RUF006 Store a reference to the return value of `asyncio.create_task`
|
||||
|
|
||||
95 | def f():
|
||||
96 | loop = asyncio.get_running_loop()
|
||||
@@ -41,24 +41,4 @@ RUF006.py:97:5: RUF006 Store a reference to the return value of `loop.create_tas
|
||||
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ RUF006
|
||||
|
|
||||
|
||||
RUF006.py:170:5: RUF006 Store a reference to the return value of `loop.create_task`
|
||||
|
|
||||
168 | def f():
|
||||
169 | loop = asyncio.new_event_loop()
|
||||
170 | loop.create_task(main()) # Error
|
||||
| ^^^^^^^^^^^^^^^^^^^^^^^^ RUF006
|
||||
171 |
|
||||
172 | # Error
|
||||
|
|
||||
|
||||
RUF006.py:175:5: RUF006 Store a reference to the return value of `loop.create_task`
|
||||
|
|
||||
173 | def f():
|
||||
174 | loop = asyncio.get_event_loop()
|
||||
175 | loop.create_task(main()) # Error
|
||||
| ^^^^^^^^^^^^^^^^^^^^^^^^ RUF006
|
||||
176 |
|
||||
177 | # OK
|
||||
|
|
||||
|
||||
|
||||
|
||||
@@ -2,7 +2,8 @@ use ruff_python_ast::BytesLiteral;
|
||||
use ruff_text_size::Ranged;
|
||||
|
||||
use crate::prelude::*;
|
||||
use crate::string::{StringNormalizer, StringPart};
|
||||
use crate::preview::is_hex_codes_in_unicode_sequences_enabled;
|
||||
use crate::string::{Quoting, StringPart};
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct FormatBytesLiteral;
|
||||
@@ -11,9 +12,14 @@ impl FormatNodeRule<BytesLiteral> for FormatBytesLiteral {
|
||||
fn fmt_fields(&self, item: &BytesLiteral, f: &mut PyFormatter) -> FormatResult<()> {
|
||||
let locator = f.context().locator();
|
||||
|
||||
StringNormalizer::from_context(f.context())
|
||||
.with_preferred_quote_style(f.options().quote_style())
|
||||
.normalize(&StringPart::from_source(item.range(), &locator), &locator)
|
||||
StringPart::from_source(item.range(), &locator)
|
||||
.normalize(
|
||||
Quoting::CanChange,
|
||||
&locator,
|
||||
f.options().quote_style(),
|
||||
f.context().docstring(),
|
||||
is_hex_codes_in_unicode_sequences_enabled(f.context()),
|
||||
)
|
||||
.fmt(f)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2,7 +2,8 @@ use ruff_python_ast::FString;
|
||||
use ruff_text_size::Ranged;
|
||||
|
||||
use crate::prelude::*;
|
||||
use crate::string::{Quoting, StringNormalizer, StringPart};
|
||||
use crate::preview::is_hex_codes_in_unicode_sequences_enabled;
|
||||
use crate::string::{Quoting, StringPart};
|
||||
|
||||
/// Formats an f-string which is part of a larger f-string expression.
|
||||
///
|
||||
@@ -25,12 +26,13 @@ impl Format<PyFormatContext<'_>> for FormatFString<'_> {
|
||||
fn fmt(&self, f: &mut PyFormatter) -> FormatResult<()> {
|
||||
let locator = f.context().locator();
|
||||
|
||||
let result = StringNormalizer::from_context(f.context())
|
||||
.with_quoting(self.quoting)
|
||||
.with_preferred_quote_style(f.options().quote_style())
|
||||
let result = StringPart::from_source(self.value.range(), &locator)
|
||||
.normalize(
|
||||
&StringPart::from_source(self.value.range(), &locator),
|
||||
self.quoting,
|
||||
&locator,
|
||||
f.options().quote_style(),
|
||||
f.context().docstring(),
|
||||
is_hex_codes_in_unicode_sequences_enabled(f.context()),
|
||||
)
|
||||
.fmt(f);
|
||||
|
||||
|
||||
@@ -2,7 +2,8 @@ use ruff_python_ast::StringLiteral;
|
||||
use ruff_text_size::Ranged;
|
||||
|
||||
use crate::prelude::*;
|
||||
use crate::string::{docstring, Quoting, StringNormalizer, StringPart};
|
||||
use crate::preview::is_hex_codes_in_unicode_sequences_enabled;
|
||||
use crate::string::{docstring, Quoting, StringPart};
|
||||
use crate::QuoteStyle;
|
||||
|
||||
pub(crate) struct FormatStringLiteral<'a> {
|
||||
@@ -58,13 +59,13 @@ impl Format<PyFormatContext<'_>> for FormatStringLiteral<'_> {
|
||||
quote_style
|
||||
};
|
||||
|
||||
let normalized = StringNormalizer::from_context(f.context())
|
||||
.with_quoting(self.layout.quoting())
|
||||
.with_preferred_quote_style(quote_style)
|
||||
.normalize(
|
||||
&StringPart::from_source(self.value.range(), &locator),
|
||||
&locator,
|
||||
);
|
||||
let normalized = StringPart::from_source(self.value.range(), &locator).normalize(
|
||||
self.layout.quoting(),
|
||||
&locator,
|
||||
quote_style,
|
||||
f.context().docstring(),
|
||||
is_hex_codes_in_unicode_sequences_enabled(f.context()),
|
||||
);
|
||||
|
||||
if self.layout.is_docstring() {
|
||||
docstring::format(&normalized, f)
|
||||
|
||||
@@ -1,220 +0,0 @@
|
||||
use std::iter::FusedIterator;
|
||||
|
||||
use memchr::memchr2;
|
||||
|
||||
use ruff_python_ast::{
|
||||
self as ast, AnyNodeRef, Expr, ExprBytesLiteral, ExprFString, ExprStringLiteral, ExpressionRef,
|
||||
StringLiteral,
|
||||
};
|
||||
use ruff_source_file::Locator;
|
||||
use ruff_text_size::{Ranged, TextLen, TextRange};
|
||||
|
||||
use crate::expression::expr_f_string::f_string_quoting;
|
||||
use crate::other::f_string::FormatFString;
|
||||
use crate::other::string_literal::{FormatStringLiteral, StringLiteralKind};
|
||||
use crate::prelude::*;
|
||||
use crate::string::{Quoting, StringPrefix, StringQuotes};
|
||||
|
||||
/// Represents any kind of string expression. This could be either a string,
|
||||
/// bytes or f-string.
|
||||
#[derive(Copy, Clone, Debug)]
|
||||
pub(crate) enum AnyString<'a> {
|
||||
String(&'a ExprStringLiteral),
|
||||
Bytes(&'a ExprBytesLiteral),
|
||||
FString(&'a ExprFString),
|
||||
}
|
||||
|
||||
impl<'a> AnyString<'a> {
|
||||
/// Creates a new [`AnyString`] from the given [`Expr`].
|
||||
///
|
||||
/// Returns `None` if the expression is not either a string, bytes or f-string.
|
||||
pub(crate) fn from_expression(expression: &'a Expr) -> Option<AnyString<'a>> {
|
||||
match expression {
|
||||
Expr::StringLiteral(string) => Some(AnyString::String(string)),
|
||||
Expr::BytesLiteral(bytes) => Some(AnyString::Bytes(bytes)),
|
||||
Expr::FString(fstring) => Some(AnyString::FString(fstring)),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns `true` if the string is implicitly concatenated.
|
||||
pub(crate) fn is_implicit_concatenated(self) -> bool {
|
||||
match self {
|
||||
Self::String(ExprStringLiteral { value, .. }) => value.is_implicit_concatenated(),
|
||||
Self::Bytes(ExprBytesLiteral { value, .. }) => value.is_implicit_concatenated(),
|
||||
Self::FString(ExprFString { value, .. }) => value.is_implicit_concatenated(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the quoting to be used for this string.
|
||||
pub(super) fn quoting(self, locator: &Locator<'_>) -> Quoting {
|
||||
match self {
|
||||
Self::String(_) | Self::Bytes(_) => Quoting::CanChange,
|
||||
Self::FString(f_string) => f_string_quoting(f_string, locator),
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns a vector of all the [`AnyStringPart`] of this string.
|
||||
pub(super) fn parts(self, quoting: Quoting) -> AnyStringPartsIter<'a> {
|
||||
match self {
|
||||
Self::String(ExprStringLiteral { value, .. }) => {
|
||||
AnyStringPartsIter::String(value.iter())
|
||||
}
|
||||
Self::Bytes(ExprBytesLiteral { value, .. }) => AnyStringPartsIter::Bytes(value.iter()),
|
||||
Self::FString(ExprFString { value, .. }) => {
|
||||
AnyStringPartsIter::FString(value.iter(), quoting)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn is_multiline(self, source: &str) -> bool {
|
||||
match self {
|
||||
AnyString::String(_) | AnyString::Bytes(_) => {
|
||||
let contents = &source[self.range()];
|
||||
let prefix = StringPrefix::parse(contents);
|
||||
let quotes = StringQuotes::parse(
|
||||
&contents[TextRange::new(prefix.text_len(), contents.text_len())],
|
||||
);
|
||||
|
||||
quotes.is_some_and(StringQuotes::is_triple)
|
||||
&& memchr2(b'\n', b'\r', contents.as_bytes()).is_some()
|
||||
}
|
||||
AnyString::FString(fstring) => {
|
||||
memchr2(b'\n', b'\r', source[fstring.range].as_bytes()).is_some()
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Ranged for AnyString<'_> {
|
||||
fn range(&self) -> TextRange {
|
||||
match self {
|
||||
Self::String(expr) => expr.range(),
|
||||
Self::Bytes(expr) => expr.range(),
|
||||
Self::FString(expr) => expr.range(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> From<&AnyString<'a>> for AnyNodeRef<'a> {
|
||||
fn from(value: &AnyString<'a>) -> Self {
|
||||
match value {
|
||||
AnyString::String(expr) => AnyNodeRef::ExprStringLiteral(expr),
|
||||
AnyString::Bytes(expr) => AnyNodeRef::ExprBytesLiteral(expr),
|
||||
AnyString::FString(expr) => AnyNodeRef::ExprFString(expr),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> From<AnyString<'a>> for AnyNodeRef<'a> {
|
||||
fn from(value: AnyString<'a>) -> Self {
|
||||
AnyNodeRef::from(&value)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> From<&AnyString<'a>> for ExpressionRef<'a> {
|
||||
fn from(value: &AnyString<'a>) -> Self {
|
||||
match value {
|
||||
AnyString::String(expr) => ExpressionRef::StringLiteral(expr),
|
||||
AnyString::Bytes(expr) => ExpressionRef::BytesLiteral(expr),
|
||||
AnyString::FString(expr) => ExpressionRef::FString(expr),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub(super) enum AnyStringPartsIter<'a> {
|
||||
String(std::slice::Iter<'a, StringLiteral>),
|
||||
Bytes(std::slice::Iter<'a, ast::BytesLiteral>),
|
||||
FString(std::slice::Iter<'a, ast::FStringPart>, Quoting),
|
||||
}
|
||||
|
||||
impl<'a> Iterator for AnyStringPartsIter<'a> {
|
||||
type Item = AnyStringPart<'a>;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
let part = match self {
|
||||
Self::String(inner) => {
|
||||
let part = inner.next()?;
|
||||
AnyStringPart::String {
|
||||
part,
|
||||
layout: StringLiteralKind::String,
|
||||
}
|
||||
}
|
||||
Self::Bytes(inner) => AnyStringPart::Bytes(inner.next()?),
|
||||
Self::FString(inner, quoting) => {
|
||||
let part = inner.next()?;
|
||||
match part {
|
||||
ast::FStringPart::Literal(string_literal) => AnyStringPart::String {
|
||||
part: string_literal,
|
||||
layout: StringLiteralKind::InImplicitlyConcatenatedFString(*quoting),
|
||||
},
|
||||
ast::FStringPart::FString(f_string) => AnyStringPart::FString {
|
||||
part: f_string,
|
||||
quoting: *quoting,
|
||||
},
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
Some(part)
|
||||
}
|
||||
}
|
||||
|
||||
impl FusedIterator for AnyStringPartsIter<'_> {}
|
||||
|
||||
/// Represents any kind of string which is part of an implicitly concatenated
|
||||
/// string. This could be either a string, bytes or f-string.
|
||||
///
|
||||
/// This is constructed from the [`AnyString::parts`] method on [`AnyString`].
|
||||
#[derive(Clone, Debug)]
|
||||
pub(super) enum AnyStringPart<'a> {
|
||||
String {
|
||||
part: &'a ast::StringLiteral,
|
||||
layout: StringLiteralKind,
|
||||
},
|
||||
Bytes(&'a ast::BytesLiteral),
|
||||
FString {
|
||||
part: &'a ast::FString,
|
||||
quoting: Quoting,
|
||||
},
|
||||
}
|
||||
|
||||
impl AnyStringPart<'_> {
|
||||
pub(super) fn is_multiline(self, source: &str) -> bool {
|
||||
let text = &source[self.range()];
|
||||
memchr2(b'\n', b'\r', text.as_bytes()).is_some()
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> From<&AnyStringPart<'a>> for AnyNodeRef<'a> {
|
||||
fn from(value: &AnyStringPart<'a>) -> Self {
|
||||
match value {
|
||||
AnyStringPart::String { part, .. } => AnyNodeRef::StringLiteral(part),
|
||||
AnyStringPart::Bytes(part) => AnyNodeRef::BytesLiteral(part),
|
||||
AnyStringPart::FString { part, .. } => AnyNodeRef::FString(part),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Ranged for AnyStringPart<'_> {
|
||||
fn range(&self) -> TextRange {
|
||||
match self {
|
||||
Self::String { part, .. } => part.range(),
|
||||
Self::Bytes(part) => part.range(),
|
||||
Self::FString { part, .. } => part.range(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Format<PyFormatContext<'_>> for AnyStringPart<'_> {
|
||||
fn fmt(&self, f: &mut PyFormatter) -> FormatResult<()> {
|
||||
match self {
|
||||
AnyStringPart::String { part, layout } => {
|
||||
FormatStringLiteral::new(part, *layout).fmt(f)
|
||||
}
|
||||
AnyStringPart::Bytes(bytes_literal) => bytes_literal.format().fmt(f),
|
||||
AnyStringPart::FString { part, quoting } => FormatFString::new(part, *quoting).fmt(f),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -109,7 +109,7 @@ use super::{NormalizedString, QuoteChar};
|
||||
/// `indent-width * spaces` to tabs because doing so could break ASCII art and other docstrings
|
||||
/// that use spaces for alignment.
|
||||
pub(crate) fn format(normalized: &NormalizedString, f: &mut PyFormatter) -> FormatResult<()> {
|
||||
let docstring = &normalized.text();
|
||||
let docstring = &normalized.text;
|
||||
|
||||
// Black doesn't change the indentation of docstrings that contain an escaped newline
|
||||
if contains_unescaped_newline(docstring) {
|
||||
@@ -125,7 +125,7 @@ pub(crate) fn format(normalized: &NormalizedString, f: &mut PyFormatter) -> Form
|
||||
let mut lines = docstring.split('\n').peekable();
|
||||
|
||||
// Start the string
|
||||
write!(f, [normalized.prefix(), normalized.quotes()])?;
|
||||
write!(f, [normalized.prefix, normalized.quotes])?;
|
||||
// We track where in the source docstring we are (in source code byte offsets)
|
||||
let mut offset = normalized.start();
|
||||
|
||||
@@ -141,7 +141,7 @@ pub(crate) fn format(normalized: &NormalizedString, f: &mut PyFormatter) -> Form
|
||||
|
||||
// Edge case: The first line is `""" "content`, so we need to insert chaperone space that keep
|
||||
// inner quotes and closing quotes from getting to close to avoid `""""content`
|
||||
if trim_both.starts_with(normalized.quotes().quote_char.as_char()) {
|
||||
if trim_both.starts_with(normalized.quotes.quote_char.as_char()) {
|
||||
space().fmt(f)?;
|
||||
}
|
||||
|
||||
@@ -168,7 +168,7 @@ pub(crate) fn format(normalized: &NormalizedString, f: &mut PyFormatter) -> Form
|
||||
{
|
||||
space().fmt(f)?;
|
||||
}
|
||||
normalized.quotes().fmt(f)?;
|
||||
normalized.quotes.fmt(f)?;
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
@@ -194,7 +194,7 @@ pub(crate) fn format(normalized: &NormalizedString, f: &mut PyFormatter) -> Form
|
||||
offset,
|
||||
stripped_indentation,
|
||||
already_normalized,
|
||||
quote_char: normalized.quotes().quote_char,
|
||||
quote_char: normalized.quotes.quote_char,
|
||||
code_example: CodeExample::default(),
|
||||
}
|
||||
.add_iter(lines)?;
|
||||
@@ -207,7 +207,7 @@ pub(crate) fn format(normalized: &NormalizedString, f: &mut PyFormatter) -> Form
|
||||
space().fmt(f)?;
|
||||
}
|
||||
|
||||
write!(f, [normalized.quotes()])
|
||||
write!(f, [normalized.quotes])
|
||||
}
|
||||
|
||||
fn contains_unescaped_newline(haystack: &str) -> bool {
|
||||
@@ -1569,7 +1569,7 @@ fn docstring_format_source(
|
||||
/// that avoids `content""""` and `content\"""`. This does only applies to un-escaped backslashes,
|
||||
/// so `content\\ """` doesn't need a space while `content\\\ """` does.
|
||||
fn needs_chaperone_space(normalized: &NormalizedString, trim_end: &str) -> bool {
|
||||
trim_end.ends_with(normalized.quotes().quote_char.as_char())
|
||||
trim_end.ends_with(normalized.quotes.quote_char.as_char())
|
||||
|| trim_end.chars().rev().take_while(|c| *c == '\\').count() % 2 == 1
|
||||
}
|
||||
|
||||
|
||||
@@ -1,19 +1,26 @@
|
||||
use bitflags::bitflags;
|
||||
use std::borrow::Cow;
|
||||
use std::iter::FusedIterator;
|
||||
|
||||
use bitflags::bitflags;
|
||||
use memchr::memchr2;
|
||||
|
||||
pub(crate) use any::AnyString;
|
||||
pub(crate) use normalize::{NormalizedString, StringNormalizer};
|
||||
use ruff_formatter::{format_args, write};
|
||||
use ruff_python_ast::{
|
||||
self as ast, Expr, ExprBytesLiteral, ExprFString, ExprStringLiteral, ExpressionRef,
|
||||
};
|
||||
use ruff_python_ast::{AnyNodeRef, StringLiteral};
|
||||
use ruff_source_file::Locator;
|
||||
use ruff_text_size::{Ranged, TextLen, TextRange, TextSize};
|
||||
|
||||
use crate::comments::{leading_comments, trailing_comments};
|
||||
use crate::expression::expr_f_string::f_string_quoting;
|
||||
use crate::expression::parentheses::in_parentheses_only_soft_line_break_or_space;
|
||||
use crate::other::f_string::FormatFString;
|
||||
use crate::other::string_literal::{FormatStringLiteral, StringLiteralKind};
|
||||
use crate::prelude::*;
|
||||
use crate::QuoteStyle;
|
||||
|
||||
mod any;
|
||||
pub(crate) mod docstring;
|
||||
mod normalize;
|
||||
|
||||
#[derive(Copy, Clone, Debug, Default)]
|
||||
pub(crate) enum Quoting {
|
||||
@@ -22,6 +29,202 @@ pub(crate) enum Quoting {
|
||||
Preserve,
|
||||
}
|
||||
|
||||
/// Represents any kind of string expression. This could be either a string,
|
||||
/// bytes or f-string.
|
||||
#[derive(Copy, Clone, Debug)]
|
||||
pub(crate) enum AnyString<'a> {
|
||||
String(&'a ExprStringLiteral),
|
||||
Bytes(&'a ExprBytesLiteral),
|
||||
FString(&'a ExprFString),
|
||||
}
|
||||
|
||||
impl<'a> AnyString<'a> {
|
||||
/// Creates a new [`AnyString`] from the given [`Expr`].
|
||||
///
|
||||
/// Returns `None` if the expression is not either a string, bytes or f-string.
|
||||
pub(crate) fn from_expression(expression: &'a Expr) -> Option<AnyString<'a>> {
|
||||
match expression {
|
||||
Expr::StringLiteral(string) => Some(AnyString::String(string)),
|
||||
Expr::BytesLiteral(bytes) => Some(AnyString::Bytes(bytes)),
|
||||
Expr::FString(fstring) => Some(AnyString::FString(fstring)),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns `true` if the string is implicitly concatenated.
|
||||
pub(crate) fn is_implicit_concatenated(self) -> bool {
|
||||
match self {
|
||||
Self::String(ExprStringLiteral { value, .. }) => value.is_implicit_concatenated(),
|
||||
Self::Bytes(ExprBytesLiteral { value, .. }) => value.is_implicit_concatenated(),
|
||||
Self::FString(ExprFString { value, .. }) => value.is_implicit_concatenated(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the quoting to be used for this string.
|
||||
fn quoting(self, locator: &Locator<'_>) -> Quoting {
|
||||
match self {
|
||||
Self::String(_) | Self::Bytes(_) => Quoting::CanChange,
|
||||
Self::FString(f_string) => f_string_quoting(f_string, locator),
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns a vector of all the [`AnyStringPart`] of this string.
|
||||
fn parts(self, quoting: Quoting) -> AnyStringPartsIter<'a> {
|
||||
match self {
|
||||
Self::String(ExprStringLiteral { value, .. }) => {
|
||||
AnyStringPartsIter::String(value.iter())
|
||||
}
|
||||
Self::Bytes(ExprBytesLiteral { value, .. }) => AnyStringPartsIter::Bytes(value.iter()),
|
||||
Self::FString(ExprFString { value, .. }) => {
|
||||
AnyStringPartsIter::FString(value.iter(), quoting)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn is_multiline(self, source: &str) -> bool {
|
||||
match self {
|
||||
AnyString::String(_) | AnyString::Bytes(_) => {
|
||||
let contents = &source[self.range()];
|
||||
let prefix = StringPrefix::parse(contents);
|
||||
let quotes = StringQuotes::parse(
|
||||
&contents[TextRange::new(prefix.text_len(), contents.text_len())],
|
||||
);
|
||||
|
||||
quotes.is_some_and(StringQuotes::is_triple)
|
||||
&& memchr2(b'\n', b'\r', contents.as_bytes()).is_some()
|
||||
}
|
||||
AnyString::FString(fstring) => {
|
||||
memchr2(b'\n', b'\r', source[fstring.range].as_bytes()).is_some()
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Ranged for AnyString<'_> {
|
||||
fn range(&self) -> TextRange {
|
||||
match self {
|
||||
Self::String(expr) => expr.range(),
|
||||
Self::Bytes(expr) => expr.range(),
|
||||
Self::FString(expr) => expr.range(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> From<&AnyString<'a>> for AnyNodeRef<'a> {
|
||||
fn from(value: &AnyString<'a>) -> Self {
|
||||
match value {
|
||||
AnyString::String(expr) => AnyNodeRef::ExprStringLiteral(expr),
|
||||
AnyString::Bytes(expr) => AnyNodeRef::ExprBytesLiteral(expr),
|
||||
AnyString::FString(expr) => AnyNodeRef::ExprFString(expr),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> From<AnyString<'a>> for AnyNodeRef<'a> {
|
||||
fn from(value: AnyString<'a>) -> Self {
|
||||
AnyNodeRef::from(&value)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> From<&AnyString<'a>> for ExpressionRef<'a> {
|
||||
fn from(value: &AnyString<'a>) -> Self {
|
||||
match value {
|
||||
AnyString::String(expr) => ExpressionRef::StringLiteral(expr),
|
||||
AnyString::Bytes(expr) => ExpressionRef::BytesLiteral(expr),
|
||||
AnyString::FString(expr) => ExpressionRef::FString(expr),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
enum AnyStringPartsIter<'a> {
|
||||
String(std::slice::Iter<'a, StringLiteral>),
|
||||
Bytes(std::slice::Iter<'a, ast::BytesLiteral>),
|
||||
FString(std::slice::Iter<'a, ast::FStringPart>, Quoting),
|
||||
}
|
||||
|
||||
impl<'a> Iterator for AnyStringPartsIter<'a> {
|
||||
type Item = AnyStringPart<'a>;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
let part = match self {
|
||||
Self::String(inner) => {
|
||||
let part = inner.next()?;
|
||||
AnyStringPart::String {
|
||||
part,
|
||||
layout: StringLiteralKind::String,
|
||||
}
|
||||
}
|
||||
Self::Bytes(inner) => AnyStringPart::Bytes(inner.next()?),
|
||||
Self::FString(inner, quoting) => {
|
||||
let part = inner.next()?;
|
||||
match part {
|
||||
ast::FStringPart::Literal(string_literal) => AnyStringPart::String {
|
||||
part: string_literal,
|
||||
layout: StringLiteralKind::InImplicitlyConcatenatedFString(*quoting),
|
||||
},
|
||||
ast::FStringPart::FString(f_string) => AnyStringPart::FString {
|
||||
part: f_string,
|
||||
quoting: *quoting,
|
||||
},
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
Some(part)
|
||||
}
|
||||
}
|
||||
|
||||
impl FusedIterator for AnyStringPartsIter<'_> {}
|
||||
|
||||
/// Represents any kind of string which is part of an implicitly concatenated
|
||||
/// string. This could be either a string, bytes or f-string.
|
||||
///
|
||||
/// This is constructed from the [`AnyString::parts`] method on [`AnyString`].
|
||||
#[derive(Clone, Debug)]
|
||||
enum AnyStringPart<'a> {
|
||||
String {
|
||||
part: &'a ast::StringLiteral,
|
||||
layout: StringLiteralKind,
|
||||
},
|
||||
Bytes(&'a ast::BytesLiteral),
|
||||
FString {
|
||||
part: &'a ast::FString,
|
||||
quoting: Quoting,
|
||||
},
|
||||
}
|
||||
|
||||
impl<'a> From<&AnyStringPart<'a>> for AnyNodeRef<'a> {
|
||||
fn from(value: &AnyStringPart<'a>) -> Self {
|
||||
match value {
|
||||
AnyStringPart::String { part, .. } => AnyNodeRef::StringLiteral(part),
|
||||
AnyStringPart::Bytes(part) => AnyNodeRef::BytesLiteral(part),
|
||||
AnyStringPart::FString { part, .. } => AnyNodeRef::FString(part),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Ranged for AnyStringPart<'_> {
|
||||
fn range(&self) -> TextRange {
|
||||
match self {
|
||||
Self::String { part, .. } => part.range(),
|
||||
Self::Bytes(part) => part.range(),
|
||||
Self::FString { part, .. } => part.range(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Format<PyFormatContext<'_>> for AnyStringPart<'_> {
|
||||
fn fmt(&self, f: &mut PyFormatter) -> FormatResult<()> {
|
||||
match self {
|
||||
AnyStringPart::String { part, layout } => {
|
||||
FormatStringLiteral::new(part, *layout).fmt(f)
|
||||
}
|
||||
AnyStringPart::Bytes(bytes_literal) => bytes_literal.format().fmt(f),
|
||||
AnyStringPart::FString { part, quoting } => FormatFString::new(part, *quoting).fmt(f),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Formats any implicitly concatenated string. This could be any valid combination
|
||||
/// of string, bytes or f-string literals.
|
||||
pub(crate) struct FormatStringContinuation<'a> {
|
||||
@@ -39,120 +242,18 @@ impl Format<PyFormatContext<'_>> for FormatStringContinuation<'_> {
|
||||
let comments = f.context().comments().clone();
|
||||
let quoting = self.string.quoting(&f.context().locator());
|
||||
|
||||
let parts = self.string.parts(quoting);
|
||||
let mut joiner = f.join_with(in_parentheses_only_soft_line_break_or_space());
|
||||
|
||||
// Don't try the flat layout if it is know that the implicit string remains on multiple lines either because one
|
||||
// part is a multline or a part has a leading or trailing comment.
|
||||
let should_try_flat = !parts.clone().any(|part| {
|
||||
let part_comments = comments.leading_dangling_trailing(&part);
|
||||
|
||||
part.is_multiline(f.context().source())
|
||||
|| part_comments.has_leading()
|
||||
|| part_comments.has_trailing()
|
||||
});
|
||||
|
||||
let format_flat = format_with(|f: &mut PyFormatter| {
|
||||
let mut merged_prefix = StringPrefix::empty();
|
||||
let mut all_raw = true;
|
||||
let quotes = parts.clone().next().map_or(
|
||||
StringQuotes {
|
||||
triple: false,
|
||||
quote_char: QuoteChar::Double,
|
||||
},
|
||||
|part| StringPart::from_source(part.range(), &f.context().locator()).quotes,
|
||||
);
|
||||
|
||||
for part in parts.clone() {
|
||||
let string_part = StringPart::from_source(part.range(), &f.context().locator());
|
||||
|
||||
let prefix = string_part.prefix;
|
||||
merged_prefix = prefix.union(merged_prefix);
|
||||
all_raw &= prefix.is_raw_string();
|
||||
|
||||
// quotes are more complicated. We need to collect the statistics about the used quotes for each string
|
||||
// - number of single quotes
|
||||
// - number of double quotes
|
||||
// - number of triple quotes
|
||||
// And they need to be normalized as a second step
|
||||
// Also requires tracking how many times a simple string uses an escaped triple quoted sequence to avoid
|
||||
// stability issues.
|
||||
}
|
||||
|
||||
// Prefer lower case raw string flags over uppercase if both are present.
|
||||
if merged_prefix.contains(StringPrefix::RAW)
|
||||
&& merged_prefix.contains(StringPrefix::RAW_UPPER)
|
||||
{
|
||||
merged_prefix.remove(StringPrefix::RAW_UPPER);
|
||||
}
|
||||
|
||||
// Remove the raw prefix if there's a mixture of raw and non-raw string. The formatting code coming later normalizes raw strings to regular
|
||||
// strings if the flag isn't present.
|
||||
if !all_raw {
|
||||
merged_prefix.remove(StringPrefix::RAW);
|
||||
}
|
||||
|
||||
// We need to find the common prefix and quotes for all parts and use that one.
|
||||
// no prefix: easy
|
||||
// bitflags! {
|
||||
// #[derive(Copy, Clone, Debug, PartialEq, Eq)]
|
||||
// pub(crate) struct StringPrefix: u8 {
|
||||
// const UNICODE = 0b0000_0001;
|
||||
// /// `r"test"`
|
||||
// const RAW = 0b0000_0010;
|
||||
// /// `R"test"
|
||||
// const RAW_UPPER = 0b0000_0100;
|
||||
// const BYTE = 0b0000_1000;
|
||||
// const F_STRING = 0b0001_0000;
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// Prefix precedence:
|
||||
// - Unicode -> Always remove
|
||||
// - Raw upper -> Remove except when all parts are raw upper
|
||||
// - Raw -> Remove except when all parts are raw or raw upper.
|
||||
// - F-String -> Preserve
|
||||
// - Bytes -> Preserve
|
||||
// Quotes:
|
||||
// - Single quotes: Identify the number of single and double quotes in the string and use the one with the least count.
|
||||
// - single and triple: Use triple quotes
|
||||
// - triples: Use `choose_quote` for every part and use the one with the highest count
|
||||
|
||||
write!(f, [merged_prefix, quotes])?;
|
||||
for part in parts.clone() {
|
||||
let string_part = StringPart::from_source(part.range(), &f.context().locator());
|
||||
|
||||
write!(f, [source_text_slice(string_part.content_range)])?;
|
||||
}
|
||||
|
||||
quotes.fmt(f)
|
||||
});
|
||||
|
||||
let format_expanded = format_with(|f| {
|
||||
let mut joiner = f.join_with(in_parentheses_only_soft_line_break_or_space());
|
||||
|
||||
for part in parts.clone() {
|
||||
joiner.entry(&format_args![
|
||||
line_suffix_boundary(),
|
||||
leading_comments(comments.leading(&part)),
|
||||
part,
|
||||
trailing_comments(comments.trailing(&part))
|
||||
]);
|
||||
}
|
||||
|
||||
joiner.finish()
|
||||
});
|
||||
|
||||
// TODO: where's the group coming from?
|
||||
|
||||
if should_try_flat {
|
||||
group(&format_args![
|
||||
if_group_fits_on_line(&format_flat),
|
||||
if_group_breaks(&format_expanded)
|
||||
])
|
||||
.fmt(f)
|
||||
} else {
|
||||
format_expanded.fmt(f)
|
||||
for part in self.string.parts(quoting) {
|
||||
joiner.entry(&format_args![
|
||||
line_suffix_boundary(),
|
||||
leading_comments(comments.leading(&part)),
|
||||
part,
|
||||
trailing_comments(comments.trailing(&part))
|
||||
]);
|
||||
}
|
||||
|
||||
joiner.finish()
|
||||
}
|
||||
}
|
||||
|
||||
@@ -190,22 +291,142 @@ impl StringPart {
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the prefix of the string part.
|
||||
pub(crate) const fn prefix(&self) -> StringPrefix {
|
||||
self.prefix
|
||||
}
|
||||
/// Computes the strings preferred quotes and normalizes its content.
|
||||
///
|
||||
/// The parent docstring quote style should be set when formatting a code
|
||||
/// snippet within the docstring. The quote style should correspond to the
|
||||
/// style of quotes used by said docstring. Normalization will ensure the
|
||||
/// quoting styles don't conflict.
|
||||
pub(crate) fn normalize<'a>(
|
||||
self,
|
||||
quoting: Quoting,
|
||||
locator: &'a Locator,
|
||||
configured_style: QuoteStyle,
|
||||
parent_docstring_quote_char: Option<QuoteChar>,
|
||||
normalize_hex: bool,
|
||||
) -> NormalizedString<'a> {
|
||||
// Per PEP 8, always prefer double quotes for triple-quoted strings.
|
||||
// Except when using quote-style-preserve.
|
||||
let preferred_style = if self.quotes.triple {
|
||||
// ... unless we're formatting a code snippet inside a docstring,
|
||||
// then we specifically want to invert our quote style to avoid
|
||||
// writing out invalid Python.
|
||||
//
|
||||
// It's worth pointing out that we can actually wind up being
|
||||
// somewhat out of sync with PEP8 in this case. Consider this
|
||||
// example:
|
||||
//
|
||||
// def foo():
|
||||
// '''
|
||||
// Something.
|
||||
//
|
||||
// >>> """tricksy"""
|
||||
// '''
|
||||
// pass
|
||||
//
|
||||
// Ideally, this would be reformatted as:
|
||||
//
|
||||
// def foo():
|
||||
// """
|
||||
// Something.
|
||||
//
|
||||
// >>> '''tricksy'''
|
||||
// """
|
||||
// pass
|
||||
//
|
||||
// But the logic here results in the original quoting being
|
||||
// preserved. This is because the quoting style of the outer
|
||||
// docstring is determined, in part, by looking at its contents. In
|
||||
// this case, it notices that it contains a `"""` and thus infers
|
||||
// that using `'''` would overall read better because it avoids
|
||||
// the need to escape the interior `"""`. Except... in this case,
|
||||
// the `"""` is actually part of a code snippet that could get
|
||||
// reformatted to using a different quoting style itself.
|
||||
//
|
||||
// Fixing this would, I believe, require some fairly seismic
|
||||
// changes to how formatting strings works. Namely, we would need
|
||||
// to look for code snippets before normalizing the docstring, and
|
||||
// then figure out the quoting style more holistically by looking
|
||||
// at the various kinds of quotes used in the code snippets and
|
||||
// what reformatting them might look like.
|
||||
//
|
||||
// Overall this is a bit of a corner case and just inverting the
|
||||
// style from what the parent ultimately decided upon works, even
|
||||
// if it doesn't have perfect alignment with PEP8.
|
||||
if let Some(quote) = parent_docstring_quote_char {
|
||||
QuoteStyle::from(quote.invert())
|
||||
} else if configured_style.is_preserve() {
|
||||
QuoteStyle::Preserve
|
||||
} else {
|
||||
QuoteStyle::Double
|
||||
}
|
||||
} else {
|
||||
configured_style
|
||||
};
|
||||
|
||||
/// Returns the surrounding quotes of the string part.
|
||||
pub(crate) const fn quotes(&self) -> StringQuotes {
|
||||
self.quotes
|
||||
}
|
||||
let raw_content = &locator.slice(self.content_range);
|
||||
|
||||
/// Returns the range of the string's content in the source (minus prefix and quotes).
|
||||
pub(crate) const fn content_range(&self) -> TextRange {
|
||||
let quotes = match quoting {
|
||||
Quoting::Preserve => self.quotes,
|
||||
Quoting::CanChange => {
|
||||
if let Some(preferred_quote) = QuoteChar::from_style(preferred_style) {
|
||||
if self.prefix.is_raw_string() {
|
||||
choose_quotes_raw(raw_content, self.quotes, preferred_quote)
|
||||
} else {
|
||||
choose_quotes(raw_content, self.quotes, preferred_quote)
|
||||
}
|
||||
} else {
|
||||
self.quotes
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
let normalized = normalize_string(raw_content, quotes, self.prefix, normalize_hex);
|
||||
|
||||
NormalizedString {
|
||||
prefix: self.prefix,
|
||||
content_range: self.content_range,
|
||||
text: normalized,
|
||||
quotes,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub(crate) struct NormalizedString<'a> {
|
||||
prefix: StringPrefix,
|
||||
|
||||
/// The quotes of the normalized string (preferred quotes)
|
||||
quotes: StringQuotes,
|
||||
|
||||
/// The range of the string's content in the source (minus prefix and quotes).
|
||||
content_range: TextRange,
|
||||
|
||||
/// The normalized text
|
||||
text: Cow<'a, str>,
|
||||
}
|
||||
|
||||
impl Ranged for NormalizedString<'_> {
|
||||
fn range(&self) -> TextRange {
|
||||
self.content_range
|
||||
}
|
||||
}
|
||||
|
||||
impl Format<PyFormatContext<'_>> for NormalizedString<'_> {
|
||||
fn fmt(&self, f: &mut Formatter<PyFormatContext<'_>>) -> FormatResult<()> {
|
||||
write!(f, [self.prefix, self.quotes])?;
|
||||
match &self.text {
|
||||
Cow::Borrowed(_) => {
|
||||
source_text_slice(self.range()).fmt(f)?;
|
||||
}
|
||||
Cow::Owned(normalized) => {
|
||||
text(normalized).fmt(f)?;
|
||||
}
|
||||
}
|
||||
self.quotes.fmt(f)
|
||||
}
|
||||
}
|
||||
|
||||
bitflags! {
|
||||
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
|
||||
pub(crate) struct StringPrefix: u8 {
|
||||
@@ -286,6 +507,171 @@ impl Format<PyFormatContext<'_>> for StringPrefix {
|
||||
}
|
||||
}
|
||||
|
||||
/// Choose the appropriate quote style for a raw string.
|
||||
///
|
||||
/// The preferred quote style is chosen unless the string contains unescaped quotes of the
|
||||
/// preferred style. For example, `r"foo"` is chosen over `r'foo'` if the preferred quote
|
||||
/// style is double quotes.
|
||||
fn choose_quotes_raw(
|
||||
input: &str,
|
||||
quotes: StringQuotes,
|
||||
preferred_quote: QuoteChar,
|
||||
) -> StringQuotes {
|
||||
let preferred_quote_char = preferred_quote.as_char();
|
||||
let mut chars = input.chars().peekable();
|
||||
let contains_unescaped_configured_quotes = loop {
|
||||
match chars.next() {
|
||||
Some('\\') => {
|
||||
// Ignore escaped characters
|
||||
chars.next();
|
||||
}
|
||||
// `"` or `'`
|
||||
Some(c) if c == preferred_quote_char => {
|
||||
if !quotes.triple {
|
||||
break true;
|
||||
}
|
||||
|
||||
match chars.peek() {
|
||||
// We can't turn `r'''\""'''` into `r"""\"""""`, this would confuse the parser
|
||||
// about where the closing triple quotes start
|
||||
None => break true,
|
||||
Some(next) if *next == preferred_quote_char => {
|
||||
// `""` or `''`
|
||||
chars.next();
|
||||
|
||||
// We can't turn `r'''""'''` into `r""""""""`, nor can we have
|
||||
// `"""` or `'''` respectively inside the string
|
||||
if chars.peek().is_none() || chars.peek() == Some(&preferred_quote_char) {
|
||||
break true;
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
Some(_) => continue,
|
||||
None => break false,
|
||||
}
|
||||
};
|
||||
|
||||
StringQuotes {
|
||||
triple: quotes.triple,
|
||||
quote_char: if contains_unescaped_configured_quotes {
|
||||
quotes.quote_char
|
||||
} else {
|
||||
preferred_quote
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
/// Choose the appropriate quote style for a string.
|
||||
///
|
||||
/// For single quoted strings, the preferred quote style is used, unless the alternative quote style
|
||||
/// would require fewer escapes.
|
||||
///
|
||||
/// For triple quoted strings, the preferred quote style is always used, unless the string contains
|
||||
/// a triplet of the quote character (e.g., if double quotes are preferred, double quotes will be
|
||||
/// used unless the string contains `"""`).
|
||||
fn choose_quotes(input: &str, quotes: StringQuotes, preferred_quote: QuoteChar) -> StringQuotes {
|
||||
let quote = if quotes.triple {
|
||||
// True if the string contains a triple quote sequence of the configured quote style.
|
||||
let mut uses_triple_quotes = false;
|
||||
let mut chars = input.chars().peekable();
|
||||
|
||||
while let Some(c) = chars.next() {
|
||||
let preferred_quote_char = preferred_quote.as_char();
|
||||
match c {
|
||||
'\\' => {
|
||||
if matches!(chars.peek(), Some('"' | '\\')) {
|
||||
chars.next();
|
||||
}
|
||||
}
|
||||
// `"` or `'`
|
||||
c if c == preferred_quote_char => {
|
||||
match chars.peek().copied() {
|
||||
Some(c) if c == preferred_quote_char => {
|
||||
// `""` or `''`
|
||||
chars.next();
|
||||
|
||||
match chars.peek().copied() {
|
||||
Some(c) if c == preferred_quote_char => {
|
||||
// `"""` or `'''`
|
||||
chars.next();
|
||||
uses_triple_quotes = true;
|
||||
break;
|
||||
}
|
||||
Some(_) => {}
|
||||
None => {
|
||||
// Handle `''' ""'''`. At this point we have consumed both
|
||||
// double quotes, so on the next iteration the iterator is empty
|
||||
// and we'd miss the string ending with a preferred quote
|
||||
uses_triple_quotes = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
Some(_) => {
|
||||
// A single quote char, this is ok
|
||||
}
|
||||
None => {
|
||||
// Trailing quote at the end of the comment
|
||||
uses_triple_quotes = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
_ => continue,
|
||||
}
|
||||
}
|
||||
|
||||
if uses_triple_quotes {
|
||||
// String contains a triple quote sequence of the configured quote style.
|
||||
// Keep the existing quote style.
|
||||
quotes.quote_char
|
||||
} else {
|
||||
preferred_quote
|
||||
}
|
||||
} else {
|
||||
let mut single_quotes = 0u32;
|
||||
let mut double_quotes = 0u32;
|
||||
|
||||
for c in input.chars() {
|
||||
match c {
|
||||
'\'' => {
|
||||
single_quotes += 1;
|
||||
}
|
||||
|
||||
'"' => {
|
||||
double_quotes += 1;
|
||||
}
|
||||
|
||||
_ => continue,
|
||||
}
|
||||
}
|
||||
|
||||
match preferred_quote {
|
||||
QuoteChar::Single => {
|
||||
if single_quotes > double_quotes {
|
||||
QuoteChar::Double
|
||||
} else {
|
||||
QuoteChar::Single
|
||||
}
|
||||
}
|
||||
QuoteChar::Double => {
|
||||
if double_quotes > single_quotes {
|
||||
QuoteChar::Single
|
||||
} else {
|
||||
QuoteChar::Double
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
StringQuotes {
|
||||
triple: quotes.triple,
|
||||
quote_char: quote,
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Copy, Clone, Debug)]
|
||||
pub(crate) struct StringQuotes {
|
||||
triple: bool,
|
||||
@@ -389,3 +775,269 @@ impl TryFrom<char> for QuoteChar {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Adds the necessary quote escapes and removes unnecessary escape sequences when quoting `input`
|
||||
/// with the provided [`StringQuotes`] style.
|
||||
///
|
||||
/// Returns the normalized string and whether it contains new lines.
|
||||
fn normalize_string(
|
||||
input: &str,
|
||||
quotes: StringQuotes,
|
||||
prefix: StringPrefix,
|
||||
normalize_hex: bool,
|
||||
) -> Cow<str> {
|
||||
// The normalized string if `input` is not yet normalized.
|
||||
// `output` must remain empty if `input` is already normalized.
|
||||
let mut output = String::new();
|
||||
// Tracks the last index of `input` that has been written to `output`.
|
||||
// If `last_index` is `0` at the end, then the input is already normalized and can be returned as is.
|
||||
let mut last_index = 0;
|
||||
|
||||
let quote = quotes.quote_char;
|
||||
let preferred_quote = quote.as_char();
|
||||
let opposite_quote = quote.invert().as_char();
|
||||
|
||||
let mut chars = input.char_indices().peekable();
|
||||
|
||||
let is_raw = prefix.is_raw_string();
|
||||
let is_fstring = prefix.is_fstring();
|
||||
let mut formatted_value_nesting = 0u32;
|
||||
|
||||
while let Some((index, c)) = chars.next() {
|
||||
if is_fstring && matches!(c, '{' | '}') {
|
||||
if chars.peek().copied().is_some_and(|(_, next)| next == c) {
|
||||
// Skip over the second character of the double braces
|
||||
chars.next();
|
||||
} else if c == '{' {
|
||||
formatted_value_nesting += 1;
|
||||
} else {
|
||||
// Safe to assume that `c == '}'` here because of the matched pattern above
|
||||
formatted_value_nesting = formatted_value_nesting.saturating_sub(1);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
if c == '\r' {
|
||||
output.push_str(&input[last_index..index]);
|
||||
|
||||
// Skip over the '\r' character, keep the `\n`
|
||||
if chars.peek().copied().is_some_and(|(_, next)| next == '\n') {
|
||||
chars.next();
|
||||
}
|
||||
// Replace the `\r` with a `\n`
|
||||
else {
|
||||
output.push('\n');
|
||||
}
|
||||
|
||||
last_index = index + '\r'.len_utf8();
|
||||
} else if !is_raw {
|
||||
if c == '\\' {
|
||||
if let Some((_, next)) = chars.clone().next() {
|
||||
if next == '\\' {
|
||||
// Skip over escaped backslashes
|
||||
chars.next();
|
||||
} else if normalize_hex {
|
||||
if let Some(normalised) = UnicodeEscape::new(next, !prefix.is_byte())
|
||||
.and_then(|escape| {
|
||||
escape.normalize(&input[index + c.len_utf8() + next.len_utf8()..])
|
||||
})
|
||||
{
|
||||
// Length of the `\` plus the length of the escape sequence character (`u` | `U` | `x`)
|
||||
let escape_start_len = '\\'.len_utf8() + next.len_utf8();
|
||||
let escape_start_offset = index + escape_start_len;
|
||||
if let Cow::Owned(normalised) = &normalised {
|
||||
output.push_str(&input[last_index..escape_start_offset]);
|
||||
output.push_str(normalised);
|
||||
last_index = escape_start_offset + normalised.len();
|
||||
};
|
||||
|
||||
// Move the `chars` iterator passed the escape sequence.
|
||||
// Simply reassigning `chars` doesn't work because the indices` would
|
||||
// then be off.
|
||||
for _ in 0..next.len_utf8() + normalised.len() {
|
||||
chars.next();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if !quotes.triple {
|
||||
#[allow(clippy::if_same_then_else)]
|
||||
if next == opposite_quote && formatted_value_nesting == 0 {
|
||||
// Remove the escape by ending before the backslash and starting again with the quote
|
||||
chars.next();
|
||||
output.push_str(&input[last_index..index]);
|
||||
last_index = index + '\\'.len_utf8();
|
||||
} else if next == preferred_quote {
|
||||
// Quote is already escaped, skip over it.
|
||||
chars.next();
|
||||
}
|
||||
}
|
||||
}
|
||||
} else if !quotes.triple && c == preferred_quote && formatted_value_nesting == 0 {
|
||||
// Escape the quote
|
||||
output.push_str(&input[last_index..index]);
|
||||
output.push('\\');
|
||||
output.push(c);
|
||||
last_index = index + preferred_quote.len_utf8();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let normalized = if last_index == 0 {
|
||||
Cow::Borrowed(input)
|
||||
} else {
|
||||
output.push_str(&input[last_index..]);
|
||||
Cow::Owned(output)
|
||||
};
|
||||
|
||||
normalized
|
||||
}
|
||||
|
||||
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
|
||||
enum UnicodeEscape {
|
||||
/// A hex escape sequence of either 2 (`\x`), 4 (`\u`) or 8 (`\U`) hex characters.
|
||||
Hex(usize),
|
||||
|
||||
/// An escaped unicode name (`\N{name}`)
|
||||
CharacterName,
|
||||
}
|
||||
|
||||
impl UnicodeEscape {
|
||||
fn new(first: char, allow_unicode: bool) -> Option<UnicodeEscape> {
|
||||
Some(match first {
|
||||
'x' => UnicodeEscape::Hex(2),
|
||||
'u' if allow_unicode => UnicodeEscape::Hex(4),
|
||||
'U' if allow_unicode => UnicodeEscape::Hex(8),
|
||||
'N' if allow_unicode => UnicodeEscape::CharacterName,
|
||||
_ => return None,
|
||||
})
|
||||
}
|
||||
|
||||
/// Normalises `\u..`, `\U..`, `\x..` and `\N{..}` escape sequences to:
|
||||
///
|
||||
/// * `\u`, `\U'` and `\x`: To use lower case for the characters `a-f`.
|
||||
/// * `\N`: To use uppercase letters
|
||||
fn normalize(self, input: &str) -> Option<Cow<str>> {
|
||||
let mut normalised = String::new();
|
||||
|
||||
let len = match self {
|
||||
UnicodeEscape::Hex(len) => {
|
||||
// It's not a valid escape sequence if the input string has fewer characters
|
||||
// left than required by the escape sequence.
|
||||
if input.len() < len {
|
||||
return None;
|
||||
}
|
||||
|
||||
for (index, c) in input.char_indices().take(len) {
|
||||
match c {
|
||||
'0'..='9' | 'a'..='f' => {
|
||||
if !normalised.is_empty() {
|
||||
normalised.push(c);
|
||||
}
|
||||
}
|
||||
'A'..='F' => {
|
||||
if normalised.is_empty() {
|
||||
normalised.reserve(len);
|
||||
normalised.push_str(&input[..index]);
|
||||
normalised.push(c.to_ascii_lowercase());
|
||||
} else {
|
||||
normalised.push(c.to_ascii_lowercase());
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
// not a valid escape sequence
|
||||
return None;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
len
|
||||
}
|
||||
UnicodeEscape::CharacterName => {
|
||||
let mut char_indices = input.char_indices();
|
||||
|
||||
if !matches!(char_indices.next(), Some((_, '{'))) {
|
||||
return None;
|
||||
}
|
||||
|
||||
loop {
|
||||
if let Some((index, c)) = char_indices.next() {
|
||||
match c {
|
||||
'}' => {
|
||||
if !normalised.is_empty() {
|
||||
normalised.push('}');
|
||||
}
|
||||
|
||||
// Name must be at least two characters long.
|
||||
if index < 3 {
|
||||
return None;
|
||||
}
|
||||
|
||||
break index + '}'.len_utf8();
|
||||
}
|
||||
'0'..='9' | 'A'..='Z' | ' ' | '-' => {
|
||||
if !normalised.is_empty() {
|
||||
normalised.push(c);
|
||||
}
|
||||
}
|
||||
'a'..='z' => {
|
||||
if normalised.is_empty() {
|
||||
normalised.reserve(c.len_utf8() + '}'.len_utf8());
|
||||
normalised.push_str(&input[..index]);
|
||||
normalised.push(c.to_ascii_uppercase());
|
||||
} else {
|
||||
normalised.push(c.to_ascii_uppercase());
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
// Seems like an invalid escape sequence, don't normalise it.
|
||||
return None;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Unterminated escape sequence, don't normalise it.
|
||||
return None;
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
Some(if normalised.is_empty() {
|
||||
Cow::Borrowed(&input[..len])
|
||||
} else {
|
||||
Cow::Owned(normalised)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::string::{normalize_string, QuoteChar, StringPrefix, StringQuotes, UnicodeEscape};
|
||||
use std::borrow::Cow;
|
||||
|
||||
#[test]
|
||||
fn normalize_32_escape() {
|
||||
let escape_sequence = UnicodeEscape::new('U', true).unwrap();
|
||||
|
||||
assert_eq!(
|
||||
Some(Cow::Owned("0001f60e".to_string())),
|
||||
escape_sequence.normalize("0001F60E")
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn normalize_hex_in_byte_string() {
|
||||
let input = r"\x89\x50\x4E\x47\x0D\x0A\x1A\x0A";
|
||||
|
||||
let normalized = normalize_string(
|
||||
input,
|
||||
StringQuotes {
|
||||
triple: false,
|
||||
quote_char: QuoteChar::Double,
|
||||
},
|
||||
StringPrefix::BYTE,
|
||||
true,
|
||||
);
|
||||
|
||||
assert_eq!(r"\x89\x50\x4e\x47\x0d\x0a\x1a\x0a", &normalized);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,622 +0,0 @@
|
||||
use std::borrow::Cow;
|
||||
|
||||
use ruff_source_file::Locator;
|
||||
use ruff_text_size::{Ranged, TextRange};
|
||||
|
||||
use crate::prelude::*;
|
||||
use crate::preview::is_hex_codes_in_unicode_sequences_enabled;
|
||||
use crate::string::{QuoteChar, Quoting, StringPart, StringPrefix, StringQuotes};
|
||||
use crate::QuoteStyle;
|
||||
|
||||
pub(crate) struct StringNormalizer {
|
||||
quoting: Quoting,
|
||||
preferred_quote_style: QuoteStyle,
|
||||
parent_docstring_quote_char: Option<QuoteChar>,
|
||||
normalize_hex: bool,
|
||||
}
|
||||
|
||||
impl StringNormalizer {
|
||||
pub(crate) fn from_context(context: &PyFormatContext<'_>) -> Self {
|
||||
Self {
|
||||
quoting: Quoting::default(),
|
||||
preferred_quote_style: QuoteStyle::default(),
|
||||
parent_docstring_quote_char: context.docstring(),
|
||||
normalize_hex: is_hex_codes_in_unicode_sequences_enabled(context),
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn with_preferred_quote_style(mut self, quote_style: QuoteStyle) -> Self {
|
||||
self.preferred_quote_style = quote_style;
|
||||
self
|
||||
}
|
||||
|
||||
pub(crate) fn with_quoting(mut self, quoting: Quoting) -> Self {
|
||||
self.quoting = quoting;
|
||||
self
|
||||
}
|
||||
|
||||
/// Computes the strings preferred quotes.
|
||||
pub(crate) fn choose_quotes(&self, string: &StringPart, locator: &Locator) -> StringQuotes {
|
||||
// Per PEP 8, always prefer double quotes for triple-quoted strings.
|
||||
// Except when using quote-style-preserve.
|
||||
let preferred_style = if string.quotes().triple {
|
||||
// ... unless we're formatting a code snippet inside a docstring,
|
||||
// then we specifically want to invert our quote style to avoid
|
||||
// writing out invalid Python.
|
||||
//
|
||||
// It's worth pointing out that we can actually wind up being
|
||||
// somewhat out of sync with PEP8 in this case. Consider this
|
||||
// example:
|
||||
//
|
||||
// def foo():
|
||||
// '''
|
||||
// Something.
|
||||
//
|
||||
// >>> """tricksy"""
|
||||
// '''
|
||||
// pass
|
||||
//
|
||||
// Ideally, this would be reformatted as:
|
||||
//
|
||||
// def foo():
|
||||
// """
|
||||
// Something.
|
||||
//
|
||||
// >>> '''tricksy'''
|
||||
// """
|
||||
// pass
|
||||
//
|
||||
// But the logic here results in the original quoting being
|
||||
// preserved. This is because the quoting style of the outer
|
||||
// docstring is determined, in part, by looking at its contents. In
|
||||
// this case, it notices that it contains a `"""` and thus infers
|
||||
// that using `'''` would overall read better because it avoids
|
||||
// the need to escape the interior `"""`. Except... in this case,
|
||||
// the `"""` is actually part of a code snippet that could get
|
||||
// reformatted to using a different quoting style itself.
|
||||
//
|
||||
// Fixing this would, I believe, require some fairly seismic
|
||||
// changes to how formatting strings works. Namely, we would need
|
||||
// to look for code snippets before normalizing the docstring, and
|
||||
// then figure out the quoting style more holistically by looking
|
||||
// at the various kinds of quotes used in the code snippets and
|
||||
// what reformatting them might look like.
|
||||
//
|
||||
// Overall this is a bit of a corner case and just inverting the
|
||||
// style from what the parent ultimately decided upon works, even
|
||||
// if it doesn't have perfect alignment with PEP8.
|
||||
if let Some(quote) = self.parent_docstring_quote_char {
|
||||
QuoteStyle::from(quote.invert())
|
||||
} else if self.preferred_quote_style.is_preserve() {
|
||||
QuoteStyle::Preserve
|
||||
} else {
|
||||
QuoteStyle::Double
|
||||
}
|
||||
} else {
|
||||
self.preferred_quote_style
|
||||
};
|
||||
|
||||
match self.quoting {
|
||||
Quoting::Preserve => string.quotes(),
|
||||
Quoting::CanChange => {
|
||||
if let Some(preferred_quote) = QuoteChar::from_style(preferred_style) {
|
||||
let raw_content = locator.slice(string.content_range());
|
||||
if string.prefix().is_raw_string() {
|
||||
choose_quotes_for_raw_string(raw_content, string.quotes(), preferred_quote)
|
||||
} else {
|
||||
choose_quotes_impl(raw_content, string.quotes(), preferred_quote)
|
||||
}
|
||||
} else {
|
||||
string.quotes()
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Computes the strings preferred quotes and normalizes its content.
|
||||
pub(crate) fn normalize<'a>(
|
||||
&self,
|
||||
string: &StringPart,
|
||||
locator: &'a Locator,
|
||||
) -> NormalizedString<'a> {
|
||||
let raw_content = locator.slice(string.content_range());
|
||||
|
||||
let quotes = self.choose_quotes(string, locator);
|
||||
|
||||
let normalized = normalize_string(raw_content, quotes, string.prefix(), self.normalize_hex);
|
||||
|
||||
NormalizedString {
|
||||
prefix: string.prefix(),
|
||||
content_range: string.content_range(),
|
||||
text: normalized,
|
||||
quotes,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub(crate) struct NormalizedString<'a> {
|
||||
prefix: crate::string::StringPrefix,
|
||||
|
||||
/// The quotes of the normalized string (preferred quotes)
|
||||
quotes: StringQuotes,
|
||||
|
||||
/// The range of the string's content in the source (minus prefix and quotes).
|
||||
content_range: TextRange,
|
||||
|
||||
/// The normalized text
|
||||
text: Cow<'a, str>,
|
||||
}
|
||||
|
||||
impl<'a> NormalizedString<'a> {
|
||||
pub(crate) fn text(&self) -> &Cow<'a, str> {
|
||||
&self.text
|
||||
}
|
||||
|
||||
pub(crate) fn quotes(&self) -> StringQuotes {
|
||||
self.quotes
|
||||
}
|
||||
|
||||
pub(crate) fn prefix(&self) -> StringPrefix {
|
||||
self.prefix
|
||||
}
|
||||
}
|
||||
|
||||
impl Ranged for NormalizedString<'_> {
|
||||
fn range(&self) -> TextRange {
|
||||
self.content_range
|
||||
}
|
||||
}
|
||||
|
||||
impl Format<PyFormatContext<'_>> for NormalizedString<'_> {
|
||||
fn fmt(&self, f: &mut Formatter<PyFormatContext<'_>>) -> FormatResult<()> {
|
||||
ruff_formatter::write!(f, [self.prefix, self.quotes])?;
|
||||
match &self.text {
|
||||
Cow::Borrowed(_) => {
|
||||
source_text_slice(self.range()).fmt(f)?;
|
||||
}
|
||||
Cow::Owned(normalized) => {
|
||||
text(normalized).fmt(f)?;
|
||||
}
|
||||
}
|
||||
self.quotes.fmt(f)
|
||||
}
|
||||
}
|
||||
|
||||
/// Choose the appropriate quote style for a raw string.
|
||||
///
|
||||
/// The preferred quote style is chosen unless the string contains unescaped quotes of the
|
||||
/// preferred style. For example, `r"foo"` is chosen over `r'foo'` if the preferred quote
|
||||
/// style is double quotes.
|
||||
fn choose_quotes_for_raw_string(
|
||||
input: &str,
|
||||
quotes: StringQuotes,
|
||||
preferred_quote: QuoteChar,
|
||||
) -> StringQuotes {
|
||||
let preferred_quote_char = preferred_quote.as_char();
|
||||
let mut chars = input.chars().peekable();
|
||||
let contains_unescaped_configured_quotes = loop {
|
||||
match chars.next() {
|
||||
Some('\\') => {
|
||||
// Ignore escaped characters
|
||||
chars.next();
|
||||
}
|
||||
// `"` or `'`
|
||||
Some(c) if c == preferred_quote_char => {
|
||||
if !quotes.triple {
|
||||
break true;
|
||||
}
|
||||
|
||||
match chars.peek() {
|
||||
// We can't turn `r'''\""'''` into `r"""\"""""`, this would confuse the parser
|
||||
// about where the closing triple quotes start
|
||||
None => break true,
|
||||
Some(next) if *next == preferred_quote_char => {
|
||||
// `""` or `''`
|
||||
chars.next();
|
||||
|
||||
// We can't turn `r'''""'''` into `r""""""""`, nor can we have
|
||||
// `"""` or `'''` respectively inside the string
|
||||
if chars.peek().is_none() || chars.peek() == Some(&preferred_quote_char) {
|
||||
break true;
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
Some(_) => continue,
|
||||
None => break false,
|
||||
}
|
||||
};
|
||||
|
||||
StringQuotes {
|
||||
triple: quotes.triple,
|
||||
quote_char: if contains_unescaped_configured_quotes {
|
||||
quotes.quote_char
|
||||
} else {
|
||||
preferred_quote
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
/// Choose the appropriate quote style for a string.
|
||||
///
|
||||
/// For single quoted strings, the preferred quote style is used, unless the alternative quote style
|
||||
/// would require fewer escapes.
|
||||
///
|
||||
/// For triple quoted strings, the preferred quote style is always used, unless the string contains
|
||||
/// a triplet of the quote character (e.g., if double quotes are preferred, double quotes will be
|
||||
/// used unless the string contains `"""`).
|
||||
fn choose_quotes_impl(
|
||||
input: &str,
|
||||
quotes: StringQuotes,
|
||||
preferred_quote: QuoteChar,
|
||||
) -> StringQuotes {
|
||||
let quote = if quotes.triple {
|
||||
// True if the string contains a triple quote sequence of the configured quote style.
|
||||
let mut uses_triple_quotes = false;
|
||||
let mut chars = input.chars().peekable();
|
||||
|
||||
while let Some(c) = chars.next() {
|
||||
let preferred_quote_char = preferred_quote.as_char();
|
||||
match c {
|
||||
'\\' => {
|
||||
if matches!(chars.peek(), Some('"' | '\\')) {
|
||||
chars.next();
|
||||
}
|
||||
}
|
||||
// `"` or `'`
|
||||
c if c == preferred_quote_char => {
|
||||
match chars.peek().copied() {
|
||||
Some(c) if c == preferred_quote_char => {
|
||||
// `""` or `''`
|
||||
chars.next();
|
||||
|
||||
match chars.peek().copied() {
|
||||
Some(c) if c == preferred_quote_char => {
|
||||
// `"""` or `'''`
|
||||
chars.next();
|
||||
uses_triple_quotes = true;
|
||||
break;
|
||||
}
|
||||
Some(_) => {}
|
||||
None => {
|
||||
// Handle `''' ""'''`. At this point we have consumed both
|
||||
// double quotes, so on the next iteration the iterator is empty
|
||||
// and we'd miss the string ending with a preferred quote
|
||||
uses_triple_quotes = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
Some(_) => {
|
||||
// A single quote char, this is ok
|
||||
}
|
||||
None => {
|
||||
// Trailing quote at the end of the comment
|
||||
uses_triple_quotes = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
_ => continue,
|
||||
}
|
||||
}
|
||||
|
||||
if uses_triple_quotes {
|
||||
// String contains a triple quote sequence of the configured quote style.
|
||||
// Keep the existing quote style.
|
||||
quotes.quote_char
|
||||
} else {
|
||||
preferred_quote
|
||||
}
|
||||
} else {
|
||||
let mut single_quotes = 0u32;
|
||||
let mut double_quotes = 0u32;
|
||||
|
||||
for c in input.chars() {
|
||||
match c {
|
||||
'\'' => {
|
||||
single_quotes += 1;
|
||||
}
|
||||
|
||||
'"' => {
|
||||
double_quotes += 1;
|
||||
}
|
||||
|
||||
_ => continue,
|
||||
}
|
||||
}
|
||||
|
||||
match preferred_quote {
|
||||
QuoteChar::Single => {
|
||||
if single_quotes > double_quotes {
|
||||
QuoteChar::Double
|
||||
} else {
|
||||
QuoteChar::Single
|
||||
}
|
||||
}
|
||||
QuoteChar::Double => {
|
||||
if double_quotes > single_quotes {
|
||||
QuoteChar::Single
|
||||
} else {
|
||||
QuoteChar::Double
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
StringQuotes {
|
||||
triple: quotes.triple,
|
||||
quote_char: quote,
|
||||
}
|
||||
}
|
||||
|
||||
/// Adds the necessary quote escapes and removes unnecessary escape sequences when quoting `input`
|
||||
/// with the provided [`StringQuotes`] style.
|
||||
///
|
||||
/// Returns the normalized string and whether it contains new lines.
|
||||
pub(crate) fn normalize_string(
|
||||
input: &str,
|
||||
quotes: StringQuotes,
|
||||
prefix: StringPrefix,
|
||||
normalize_hex: bool,
|
||||
) -> Cow<str> {
|
||||
// The normalized string if `input` is not yet normalized.
|
||||
// `output` must remain empty if `input` is already normalized.
|
||||
let mut output = String::new();
|
||||
// Tracks the last index of `input` that has been written to `output`.
|
||||
// If `last_index` is `0` at the end, then the input is already normalized and can be returned as is.
|
||||
let mut last_index = 0;
|
||||
|
||||
let quote = quotes.quote_char;
|
||||
let preferred_quote = quote.as_char();
|
||||
let opposite_quote = quote.invert().as_char();
|
||||
|
||||
let mut chars = input.char_indices().peekable();
|
||||
|
||||
let is_raw = prefix.is_raw_string();
|
||||
let is_fstring = prefix.is_fstring();
|
||||
let mut formatted_value_nesting = 0u32;
|
||||
|
||||
while let Some((index, c)) = chars.next() {
|
||||
if is_fstring && matches!(c, '{' | '}') {
|
||||
if chars.peek().copied().is_some_and(|(_, next)| next == c) {
|
||||
// Skip over the second character of the double braces
|
||||
chars.next();
|
||||
} else if c == '{' {
|
||||
formatted_value_nesting += 1;
|
||||
} else {
|
||||
// Safe to assume that `c == '}'` here because of the matched pattern above
|
||||
formatted_value_nesting = formatted_value_nesting.saturating_sub(1);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
if c == '\r' {
|
||||
output.push_str(&input[last_index..index]);
|
||||
|
||||
// Skip over the '\r' character, keep the `\n`
|
||||
if chars.peek().copied().is_some_and(|(_, next)| next == '\n') {
|
||||
chars.next();
|
||||
}
|
||||
// Replace the `\r` with a `\n`
|
||||
else {
|
||||
output.push('\n');
|
||||
}
|
||||
|
||||
last_index = index + '\r'.len_utf8();
|
||||
} else if !is_raw {
|
||||
if c == '\\' {
|
||||
if let Some((_, next)) = chars.clone().next() {
|
||||
if next == '\\' {
|
||||
// Skip over escaped backslashes
|
||||
chars.next();
|
||||
} else if normalize_hex {
|
||||
if let Some(normalised) = UnicodeEscape::new(next, !prefix.is_byte())
|
||||
.and_then(|escape| {
|
||||
escape.normalize(&input[index + c.len_utf8() + next.len_utf8()..])
|
||||
})
|
||||
{
|
||||
// Length of the `\` plus the length of the escape sequence character (`u` | `U` | `x`)
|
||||
let escape_start_len = '\\'.len_utf8() + next.len_utf8();
|
||||
let escape_start_offset = index + escape_start_len;
|
||||
if let Cow::Owned(normalised) = &normalised {
|
||||
output.push_str(&input[last_index..escape_start_offset]);
|
||||
output.push_str(normalised);
|
||||
last_index = escape_start_offset + normalised.len();
|
||||
};
|
||||
|
||||
// Move the `chars` iterator passed the escape sequence.
|
||||
// Simply reassigning `chars` doesn't work because the indices` would
|
||||
// then be off.
|
||||
for _ in 0..next.len_utf8() + normalised.len() {
|
||||
chars.next();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if !quotes.triple {
|
||||
#[allow(clippy::if_same_then_else)]
|
||||
if next == opposite_quote && formatted_value_nesting == 0 {
|
||||
// Remove the escape by ending before the backslash and starting again with the quote
|
||||
chars.next();
|
||||
output.push_str(&input[last_index..index]);
|
||||
last_index = index + '\\'.len_utf8();
|
||||
} else if next == preferred_quote {
|
||||
// Quote is already escaped, skip over it.
|
||||
chars.next();
|
||||
}
|
||||
}
|
||||
}
|
||||
} else if !quotes.triple && c == preferred_quote && formatted_value_nesting == 0 {
|
||||
// Escape the quote
|
||||
output.push_str(&input[last_index..index]);
|
||||
output.push('\\');
|
||||
output.push(c);
|
||||
last_index = index + preferred_quote.len_utf8();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let normalized = if last_index == 0 {
|
||||
Cow::Borrowed(input)
|
||||
} else {
|
||||
output.push_str(&input[last_index..]);
|
||||
Cow::Owned(output)
|
||||
};
|
||||
|
||||
normalized
|
||||
}
|
||||
|
||||
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
|
||||
enum UnicodeEscape {
|
||||
/// A hex escape sequence of either 2 (`\x`), 4 (`\u`) or 8 (`\U`) hex characters.
|
||||
Hex(usize),
|
||||
|
||||
/// An escaped unicode name (`\N{name}`)
|
||||
CharacterName,
|
||||
}
|
||||
|
||||
impl UnicodeEscape {
|
||||
fn new(first: char, allow_unicode: bool) -> Option<UnicodeEscape> {
|
||||
Some(match first {
|
||||
'x' => UnicodeEscape::Hex(2),
|
||||
'u' if allow_unicode => UnicodeEscape::Hex(4),
|
||||
'U' if allow_unicode => UnicodeEscape::Hex(8),
|
||||
'N' if allow_unicode => UnicodeEscape::CharacterName,
|
||||
_ => return None,
|
||||
})
|
||||
}
|
||||
|
||||
/// Normalises `\u..`, `\U..`, `\x..` and `\N{..}` escape sequences to:
|
||||
///
|
||||
/// * `\u`, `\U'` and `\x`: To use lower case for the characters `a-f`.
|
||||
/// * `\N`: To use uppercase letters
|
||||
fn normalize(self, input: &str) -> Option<Cow<str>> {
|
||||
let mut normalised = String::new();
|
||||
|
||||
let len = match self {
|
||||
UnicodeEscape::Hex(len) => {
|
||||
// It's not a valid escape sequence if the input string has fewer characters
|
||||
// left than required by the escape sequence.
|
||||
if input.len() < len {
|
||||
return None;
|
||||
}
|
||||
|
||||
for (index, c) in input.char_indices().take(len) {
|
||||
match c {
|
||||
'0'..='9' | 'a'..='f' => {
|
||||
if !normalised.is_empty() {
|
||||
normalised.push(c);
|
||||
}
|
||||
}
|
||||
'A'..='F' => {
|
||||
if normalised.is_empty() {
|
||||
normalised.reserve(len);
|
||||
normalised.push_str(&input[..index]);
|
||||
normalised.push(c.to_ascii_lowercase());
|
||||
} else {
|
||||
normalised.push(c.to_ascii_lowercase());
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
// not a valid escape sequence
|
||||
return None;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
len
|
||||
}
|
||||
UnicodeEscape::CharacterName => {
|
||||
let mut char_indices = input.char_indices();
|
||||
|
||||
if !matches!(char_indices.next(), Some((_, '{'))) {
|
||||
return None;
|
||||
}
|
||||
|
||||
loop {
|
||||
if let Some((index, c)) = char_indices.next() {
|
||||
match c {
|
||||
'}' => {
|
||||
if !normalised.is_empty() {
|
||||
normalised.push('}');
|
||||
}
|
||||
|
||||
// Name must be at least two characters long.
|
||||
if index < 3 {
|
||||
return None;
|
||||
}
|
||||
|
||||
break index + '}'.len_utf8();
|
||||
}
|
||||
'0'..='9' | 'A'..='Z' | ' ' | '-' => {
|
||||
if !normalised.is_empty() {
|
||||
normalised.push(c);
|
||||
}
|
||||
}
|
||||
'a'..='z' => {
|
||||
if normalised.is_empty() {
|
||||
normalised.reserve(c.len_utf8() + '}'.len_utf8());
|
||||
normalised.push_str(&input[..index]);
|
||||
normalised.push(c.to_ascii_uppercase());
|
||||
} else {
|
||||
normalised.push(c.to_ascii_uppercase());
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
// Seems like an invalid escape sequence, don't normalise it.
|
||||
return None;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Unterminated escape sequence, don't normalise it.
|
||||
return None;
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
Some(if normalised.is_empty() {
|
||||
Cow::Borrowed(&input[..len])
|
||||
} else {
|
||||
Cow::Owned(normalised)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::borrow::Cow;
|
||||
|
||||
use crate::string::{QuoteChar, StringPrefix, StringQuotes};
|
||||
|
||||
use super::{normalize_string, UnicodeEscape};
|
||||
|
||||
#[test]
|
||||
fn normalize_32_escape() {
|
||||
let escape_sequence = UnicodeEscape::new('U', true).unwrap();
|
||||
|
||||
assert_eq!(
|
||||
Some(Cow::Owned("0001f60e".to_string())),
|
||||
escape_sequence.normalize("0001F60E")
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn normalize_hex_in_byte_string() {
|
||||
let input = r"\x89\x50\x4E\x47\x0D\x0A\x1A\x0A";
|
||||
|
||||
let normalized = normalize_string(
|
||||
input,
|
||||
StringQuotes {
|
||||
triple: false,
|
||||
quote_char: QuoteChar::Double,
|
||||
},
|
||||
StringPrefix::BYTE,
|
||||
true,
|
||||
);
|
||||
|
||||
assert_eq!(r"\x89\x50\x4e\x47\x0d\x0a\x1a\x0a", &normalized);
|
||||
}
|
||||
}
|
||||
@@ -401,23 +401,22 @@ fn ensure_unchanged_ast(
|
||||
Normalizer.visit_module(&mut formatted_ast);
|
||||
let formatted_ast = ComparableMod::from(&formatted_ast);
|
||||
|
||||
// FIXME
|
||||
// if formatted_ast != unformatted_ast {
|
||||
// let diff = TextDiff::from_lines(
|
||||
// &format!("{unformatted_ast:#?}"),
|
||||
// &format!("{formatted_ast:#?}"),
|
||||
// )
|
||||
// .unified_diff()
|
||||
// .header("Unformatted", "Formatted")
|
||||
// .to_string();
|
||||
// panic!(
|
||||
// r#"Reformatting the unformatted code of {} resulted in AST changes.
|
||||
// ---
|
||||
// {diff}
|
||||
// "#,
|
||||
// input_path.display(),
|
||||
// );
|
||||
// }
|
||||
if formatted_ast != unformatted_ast {
|
||||
let diff = TextDiff::from_lines(
|
||||
&format!("{unformatted_ast:#?}"),
|
||||
&format!("{formatted_ast:#?}"),
|
||||
)
|
||||
.unified_diff()
|
||||
.header("Unformatted", "Formatted")
|
||||
.to_string();
|
||||
panic!(
|
||||
r#"Reformatting the unformatted code of {} resulted in AST changes.
|
||||
---
|
||||
{diff}
|
||||
"#,
|
||||
input_path.display(),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
struct Header<'a> {
|
||||
|
||||
@@ -104,7 +104,7 @@ elif unformatted:
|
||||
- "=foo.bar.:main",
|
||||
- # fmt: on
|
||||
- ] # Includes an formatted indentation.
|
||||
+ "foo-bar=foo.bar.:main",
|
||||
+ "foo-bar" "=foo.bar.:main",
|
||||
+ # fmt: on
|
||||
+ ] # Includes an formatted indentation.
|
||||
},
|
||||
@@ -128,7 +128,7 @@ setup(
|
||||
entry_points={
|
||||
# fmt: off
|
||||
"console_scripts": [
|
||||
"foo-bar=foo.bar.:main",
|
||||
"foo-bar" "=foo.bar.:main",
|
||||
# fmt: on
|
||||
] # Includes an formatted indentation.
|
||||
},
|
||||
|
||||
@@ -320,21 +320,6 @@ long_unmergable_string_with_pragma = (
|
||||
"formatting"
|
||||
)
|
||||
|
||||
@@ -263,11 +259,11 @@
|
||||
backslashes = "This is a really long string with \"embedded\" double quotes and 'single' quotes that also handles checking for an even number of backslashes \\\\"
|
||||
backslashes = "This is a really 'long' string with \"embedded double quotes\" and 'single' quotes that also handles checking for an odd number of backslashes \\\", like this...\\\\\\"
|
||||
|
||||
-short_string = "Hi" " there."
|
||||
+short_string = "Hi there."
|
||||
|
||||
-func_call(short_string=("Hi" " there."))
|
||||
+func_call(short_string=("Hi there."))
|
||||
|
||||
-raw_strings = r"Don't" " get" r" merged" " unless they are all raw."
|
||||
+raw_strings = r"Don't get merged unless they are all raw."
|
||||
|
||||
|
||||
def foo():
|
||||
```
|
||||
|
||||
## Ruff Output
|
||||
@@ -601,11 +586,11 @@ backslashes = "This is a really long string with \"embedded\" double quotes and
|
||||
backslashes = "This is a really long string with \"embedded\" double quotes and 'single' quotes that also handles checking for an even number of backslashes \\\\"
|
||||
backslashes = "This is a really 'long' string with \"embedded double quotes\" and 'single' quotes that also handles checking for an odd number of backslashes \\\", like this...\\\\\\"
|
||||
|
||||
short_string = "Hi there."
|
||||
short_string = "Hi" " there."
|
||||
|
||||
func_call(short_string=("Hi there."))
|
||||
func_call(short_string=("Hi" " there."))
|
||||
|
||||
raw_strings = r"Don't get merged unless they are all raw."
|
||||
raw_strings = r"Don't" " get" r" merged" " unless they are all raw."
|
||||
|
||||
|
||||
def foo():
|
||||
|
||||
@@ -813,13 +813,13 @@ log.info(f"""Skipping: {'a' == 'b'} {desc['ms_name']} {money=} {dte=} {pos_share
|
||||
+backslashes = "This is a really long string with \"embedded\" double quotes and 'single' quotes that also handles checking for an even number of backslashes \\\\"
|
||||
+backslashes = "This is a really 'long' string with \"embedded double quotes\" and 'single' quotes that also handles checking for an odd number of backslashes \\\", like this...\\\\\\"
|
||||
|
||||
short_string = "Hi there."
|
||||
-short_string = "Hi there."
|
||||
+short_string = "Hi" " there."
|
||||
|
||||
-func_call(short_string="Hi there.")
|
||||
+func_call(short_string=("Hi there."))
|
||||
+func_call(short_string=("Hi" " there."))
|
||||
|
||||
-raw_strings = r"Don't" " get" r" merged" " unless they are all raw."
|
||||
+raw_strings = r"Don't get merged unless they are all raw."
|
||||
raw_strings = r"Don't" " get" r" merged" " unless they are all raw."
|
||||
|
||||
|
||||
def foo():
|
||||
@@ -1314,11 +1314,11 @@ backslashes = "This is a really long string with \"embedded\" double quotes and
|
||||
backslashes = "This is a really long string with \"embedded\" double quotes and 'single' quotes that also handles checking for an even number of backslashes \\\\"
|
||||
backslashes = "This is a really 'long' string with \"embedded double quotes\" and 'single' quotes that also handles checking for an odd number of backslashes \\\", like this...\\\\\\"
|
||||
|
||||
short_string = "Hi there."
|
||||
short_string = "Hi" " there."
|
||||
|
||||
func_call(short_string=("Hi there."))
|
||||
func_call(short_string=("Hi" " there."))
|
||||
|
||||
raw_strings = r"Don't get merged unless they are all raw."
|
||||
raw_strings = r"Don't" " get" r" merged" " unless they are all raw."
|
||||
|
||||
|
||||
def foo():
|
||||
|
||||
@@ -256,7 +256,7 @@ class IndentMeSome:
|
||||
|
||||
|
||||
class IgnoreImplicitlyConcatenatedStrings:
|
||||
""""""
|
||||
"""""" ""
|
||||
|
||||
|
||||
def docstring_that_ends_with_quote_and_a_line_break1():
|
||||
@@ -432,7 +432,7 @@ class IndentMeSome:
|
||||
|
||||
|
||||
class IgnoreImplicitlyConcatenatedStrings:
|
||||
""""""
|
||||
"""""" ""
|
||||
|
||||
|
||||
def docstring_that_ends_with_quote_and_a_line_break1():
|
||||
@@ -608,7 +608,7 @@ class IndentMeSome:
|
||||
|
||||
|
||||
class IgnoreImplicitlyConcatenatedStrings:
|
||||
""""""
|
||||
"""""" ""
|
||||
|
||||
|
||||
def docstring_that_ends_with_quote_and_a_line_break1():
|
||||
@@ -784,7 +784,7 @@ class IndentMeSome:
|
||||
|
||||
|
||||
class IgnoreImplicitlyConcatenatedStrings:
|
||||
""""""
|
||||
"""""" ""
|
||||
|
||||
|
||||
def docstring_that_ends_with_quote_and_a_line_break1():
|
||||
@@ -960,7 +960,7 @@ class IndentMeSome:
|
||||
|
||||
|
||||
class IgnoreImplicitlyConcatenatedStrings:
|
||||
""""""
|
||||
"""""" ''
|
||||
|
||||
|
||||
def docstring_that_ends_with_quote_and_a_line_break1():
|
||||
|
||||
@@ -398,11 +398,11 @@ c = (
|
||||
"dddddddddddddddddddddddddd" % aaaaaaaaaaaa + x
|
||||
)
|
||||
|
||||
"abc" + "de" + "fg" + "hij"
|
||||
"a" "b" "c" + "d" "e" + "f" "g" + "h" "i" "j"
|
||||
|
||||
|
||||
class EC2REPATH:
|
||||
f.write("Pathway name" + "\tDatabase Identifier" + "\tSource database" + "\n")
|
||||
f.write("Pathway name" + "\t" "Database Identifier" + "\t" "Source database" + "\n")
|
||||
```
|
||||
|
||||
|
||||
|
||||
@@ -406,7 +406,7 @@ where
|
||||
}
|
||||
|
||||
/// Abstraction for a type checker, conservatively checks for the intended type(s).
|
||||
pub trait TypeChecker {
|
||||
trait TypeChecker {
|
||||
/// Check annotation expression to match the intended type(s).
|
||||
fn match_annotation(annotation: &Expr, semantic: &SemanticModel) -> bool;
|
||||
/// Check initializer expression to match the intended type(s).
|
||||
@@ -421,17 +421,14 @@ pub trait TypeChecker {
|
||||
fn check_type<T: TypeChecker>(binding: &Binding, semantic: &SemanticModel) -> bool {
|
||||
match binding.kind {
|
||||
BindingKind::Assignment => match binding.statement(semantic) {
|
||||
// Given:
|
||||
//
|
||||
// ```python
|
||||
// x = init_expr
|
||||
// ```
|
||||
//
|
||||
// The type checker might know how to infer the type based on `init_expr`.
|
||||
Some(Stmt::Assign(ast::StmtAssign { targets, value, .. })) => targets
|
||||
.iter()
|
||||
.find_map(|target| match_value(binding, target, value.as_ref()))
|
||||
.is_some_and(|value| T::match_initializer(value, semantic)),
|
||||
Some(Stmt::Assign(ast::StmtAssign { value, .. })) => {
|
||||
T::match_initializer(value.as_ref(), semantic)
|
||||
}
|
||||
|
||||
// ```python
|
||||
// x: annotation = some_expr
|
||||
@@ -441,40 +438,6 @@ fn check_type<T: TypeChecker>(binding: &Binding, semantic: &SemanticModel) -> bo
|
||||
Some(Stmt::AnnAssign(ast::StmtAnnAssign { annotation, .. })) => {
|
||||
T::match_annotation(annotation.as_ref(), semantic)
|
||||
}
|
||||
|
||||
_ => false,
|
||||
},
|
||||
|
||||
BindingKind::NamedExprAssignment => {
|
||||
// ```python
|
||||
// if (x := some_expr) is not None:
|
||||
// ...
|
||||
// ```
|
||||
binding.source.is_some_and(|source| {
|
||||
semantic
|
||||
.expressions(source)
|
||||
.find_map(|expr| expr.as_named_expr_expr())
|
||||
.and_then(|ast::ExprNamedExpr { target, value, .. }| {
|
||||
match_value(binding, target.as_ref(), value.as_ref())
|
||||
})
|
||||
.is_some_and(|value| T::match_initializer(value, semantic))
|
||||
})
|
||||
}
|
||||
|
||||
BindingKind::WithItemVar => match binding.statement(semantic) {
|
||||
// ```python
|
||||
// with open("file.txt") as x:
|
||||
// ...
|
||||
// ```
|
||||
Some(Stmt::With(ast::StmtWith { items, .. })) => items
|
||||
.iter()
|
||||
.find_map(|item| {
|
||||
let target = item.optional_vars.as_ref()?;
|
||||
let value = &item.context_expr;
|
||||
match_value(binding, target, value)
|
||||
})
|
||||
.is_some_and(|value| T::match_initializer(value, semantic)),
|
||||
|
||||
_ => false,
|
||||
},
|
||||
|
||||
@@ -494,7 +457,6 @@ fn check_type<T: TypeChecker>(binding: &Binding, semantic: &SemanticModel) -> bo
|
||||
};
|
||||
T::match_annotation(annotation.as_ref(), semantic)
|
||||
}
|
||||
|
||||
_ => false,
|
||||
},
|
||||
|
||||
@@ -603,125 +565,35 @@ impl BuiltinTypeChecker for TupleChecker {
|
||||
const EXPR_TYPE: PythonType = PythonType::Tuple;
|
||||
}
|
||||
|
||||
pub struct IoBaseChecker;
|
||||
|
||||
impl TypeChecker for IoBaseChecker {
|
||||
fn match_annotation(annotation: &Expr, semantic: &SemanticModel) -> bool {
|
||||
semantic
|
||||
.resolve_call_path(annotation)
|
||||
.is_some_and(|call_path| {
|
||||
if semantic.match_typing_call_path(&call_path, "IO") {
|
||||
return true;
|
||||
}
|
||||
if semantic.match_typing_call_path(&call_path, "BinaryIO") {
|
||||
return true;
|
||||
}
|
||||
if semantic.match_typing_call_path(&call_path, "TextIO") {
|
||||
return true;
|
||||
}
|
||||
matches!(
|
||||
call_path.as_slice(),
|
||||
[
|
||||
"io",
|
||||
"IOBase"
|
||||
| "RawIOBase"
|
||||
| "BufferedIOBase"
|
||||
| "TextIOBase"
|
||||
| "BytesIO"
|
||||
| "StringIO"
|
||||
| "BufferedReader"
|
||||
| "BufferedWriter"
|
||||
| "BufferedRandom"
|
||||
| "BufferedRWPair"
|
||||
| "TextIOWrapper"
|
||||
] | ["os", "Path" | "PathLike"]
|
||||
| [
|
||||
"pathlib",
|
||||
"Path" | "PurePath" | "PurePosixPath" | "PureWindowsPath"
|
||||
]
|
||||
)
|
||||
})
|
||||
}
|
||||
|
||||
fn match_initializer(initializer: &Expr, semantic: &SemanticModel) -> bool {
|
||||
let Expr::Call(ast::ExprCall { func, .. }) = initializer else {
|
||||
return false;
|
||||
};
|
||||
|
||||
// Ex) `pathlib.Path("file.txt")`
|
||||
if let Expr::Attribute(ast::ExprAttribute { value, attr, .. }) = func.as_ref() {
|
||||
if attr.as_str() == "open" {
|
||||
if let Expr::Call(ast::ExprCall { func, .. }) = value.as_ref() {
|
||||
return semantic.resolve_call_path(func).is_some_and(|call_path| {
|
||||
matches!(
|
||||
call_path.as_slice(),
|
||||
[
|
||||
"pathlib",
|
||||
"Path" | "PurePath" | "PurePosixPath" | "PureWindowsPath"
|
||||
]
|
||||
)
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Ex) `open("file.txt")`
|
||||
semantic
|
||||
.resolve_call_path(func.as_ref())
|
||||
.is_some_and(|call_path| {
|
||||
matches!(
|
||||
call_path.as_slice(),
|
||||
["io", "open" | "open_code"] | ["os" | "", "open"]
|
||||
)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
/// Test whether the given binding can be considered a list.
|
||||
///
|
||||
/// Test whether the given binding (and the given name) can be considered a list.
|
||||
/// For this, we check what value might be associated with it through it's initialization and
|
||||
/// what annotation it has (we consider `list` and `typing.List`).
|
||||
pub fn is_list(binding: &Binding, semantic: &SemanticModel) -> bool {
|
||||
check_type::<ListChecker>(binding, semantic)
|
||||
}
|
||||
|
||||
/// Test whether the given binding can be considered a dictionary.
|
||||
///
|
||||
/// Test whether the given binding (and the given name) can be considered a dictionary.
|
||||
/// For this, we check what value might be associated with it through it's initialization and
|
||||
/// what annotation it has (we consider `dict` and `typing.Dict`).
|
||||
pub fn is_dict(binding: &Binding, semantic: &SemanticModel) -> bool {
|
||||
check_type::<DictChecker>(binding, semantic)
|
||||
}
|
||||
|
||||
/// Test whether the given binding can be considered a set.
|
||||
///
|
||||
/// Test whether the given binding (and the given name) can be considered a set.
|
||||
/// For this, we check what value might be associated with it through it's initialization and
|
||||
/// what annotation it has (we consider `set` and `typing.Set`).
|
||||
pub fn is_set(binding: &Binding, semantic: &SemanticModel) -> bool {
|
||||
check_type::<SetChecker>(binding, semantic)
|
||||
}
|
||||
|
||||
/// Test whether the given binding can be considered a tuple.
|
||||
///
|
||||
/// For this, we check what value might be associated with it through
|
||||
/// Test whether the given binding (and the given name) can be considered a
|
||||
/// tuple. For this, we check what value might be associated with it through
|
||||
/// it's initialization and what annotation it has (we consider `tuple` and
|
||||
/// `typing.Tuple`).
|
||||
pub fn is_tuple(binding: &Binding, semantic: &SemanticModel) -> bool {
|
||||
check_type::<TupleChecker>(binding, semantic)
|
||||
}
|
||||
|
||||
/// Test whether the given binding can be considered a file-like object (i.e., a type that
|
||||
/// implements `io.IOBase`).
|
||||
pub fn is_io_base(binding: &Binding, semantic: &SemanticModel) -> bool {
|
||||
check_type::<IoBaseChecker>(binding, semantic)
|
||||
}
|
||||
|
||||
/// Test whether the given expression can be considered a file-like object (i.e., a type that
|
||||
/// implements `io.IOBase`).
|
||||
pub fn is_io_base_expr(expr: &Expr, semantic: &SemanticModel) -> bool {
|
||||
IoBaseChecker::match_initializer(expr, semantic)
|
||||
}
|
||||
|
||||
/// Find the [`ParameterWithDefault`] corresponding to the given [`Binding`].
|
||||
#[inline]
|
||||
fn find_parameter<'a>(
|
||||
@@ -795,7 +667,6 @@ pub fn find_assigned_value<'a>(symbol: &str, semantic: &'a SemanticModel<'a>) ->
|
||||
///
|
||||
/// This function will return a `NumberLiteral` with value `Int(42)` when called with `foo` and a
|
||||
/// `StringLiteral` with value `"str"` when called with `bla`.
|
||||
#[allow(clippy::single_match)]
|
||||
pub fn find_binding_value<'a>(binding: &Binding, semantic: &'a SemanticModel) -> Option<&'a Expr> {
|
||||
match binding.kind {
|
||||
// Ex) `x := 1`
|
||||
@@ -809,32 +680,25 @@ pub fn find_binding_value<'a>(binding: &Binding, semantic: &'a SemanticModel) ->
|
||||
}
|
||||
}
|
||||
// Ex) `x = 1`
|
||||
BindingKind::Assignment => match binding.statement(semantic) {
|
||||
Some(Stmt::Assign(ast::StmtAssign { value, targets, .. })) => {
|
||||
return targets
|
||||
.iter()
|
||||
.find_map(|target| match_value(binding, target, value.as_ref()))
|
||||
BindingKind::Assignment => {
|
||||
let parent_id = binding.source?;
|
||||
let parent = semantic.statement(parent_id);
|
||||
match parent {
|
||||
Stmt::Assign(ast::StmtAssign { value, targets, .. }) => {
|
||||
return targets
|
||||
.iter()
|
||||
.find_map(|target| match_value(binding, target, value.as_ref()))
|
||||
}
|
||||
Stmt::AnnAssign(ast::StmtAnnAssign {
|
||||
value: Some(value),
|
||||
target,
|
||||
..
|
||||
}) => {
|
||||
return match_value(binding, target, value.as_ref());
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
Some(Stmt::AnnAssign(ast::StmtAnnAssign {
|
||||
value: Some(value),
|
||||
target,
|
||||
..
|
||||
})) => {
|
||||
return match_value(binding, target, value.as_ref());
|
||||
}
|
||||
_ => {}
|
||||
},
|
||||
// Ex) `with open("file.txt") as f:`
|
||||
BindingKind::WithItemVar => match binding.statement(semantic) {
|
||||
Some(Stmt::With(ast::StmtWith { items, .. })) => {
|
||||
return items.iter().find_map(|item| {
|
||||
let target = item.optional_vars.as_ref()?;
|
||||
let value = &item.context_expr;
|
||||
match_value(binding, target, value)
|
||||
});
|
||||
}
|
||||
_ => {}
|
||||
},
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
None
|
||||
|
||||
@@ -640,9 +640,13 @@ impl<'a> SemanticModel<'a> {
|
||||
/// only binding to that name in its scope.
|
||||
pub fn only_binding(&self, name: &ast::ExprName) -> Option<BindingId> {
|
||||
self.resolve_name(name).filter(|id| {
|
||||
// Find the correct scope.
|
||||
let binding = self.binding(*id);
|
||||
let scope = &self.scopes[binding.scope];
|
||||
scope.shadowed_binding(*id).is_none()
|
||||
|
||||
// Ensure that the name is only bound once in the scope.
|
||||
let mut bindings = scope.get_all(&name.id);
|
||||
bindings.next().is_some_and(|binding| binding == *id) && bindings.next().is_none()
|
||||
})
|
||||
}
|
||||
|
||||
@@ -1489,11 +1493,6 @@ impl<'a> SemanticModel<'a> {
|
||||
.intersects(SemanticModelFlags::TYPE_CHECKING_BLOCK)
|
||||
}
|
||||
|
||||
/// Return `true` if the model is in a docstring.
|
||||
pub const fn in_docstring(&self) -> bool {
|
||||
self.flags.intersects(SemanticModelFlags::DOCSTRING)
|
||||
}
|
||||
|
||||
/// Return `true` if the model has traversed past the "top-of-file" import boundary.
|
||||
pub const fn seen_import_boundary(&self) -> bool {
|
||||
self.flags.intersects(SemanticModelFlags::IMPORT_BOUNDARY)
|
||||
@@ -1858,26 +1857,6 @@ bitflags! {
|
||||
/// ```
|
||||
const COMPREHENSION_ASSIGNMENT = 1 << 19;
|
||||
|
||||
|
||||
/// The model is in a module / class / function docstring.
|
||||
///
|
||||
/// For example, the model could be visiting either the module, class,
|
||||
/// or function docstring in:
|
||||
/// ```python
|
||||
/// """Module docstring."""
|
||||
///
|
||||
///
|
||||
/// class Foo:
|
||||
/// """Class docstring."""
|
||||
/// pass
|
||||
///
|
||||
///
|
||||
/// def foo():
|
||||
/// """Function docstring."""
|
||||
/// pass
|
||||
/// ```
|
||||
const DOCSTRING = 1 << 20;
|
||||
|
||||
/// The context is in any type annotation.
|
||||
const ANNOTATION = Self::TYPING_ONLY_ANNOTATION.bits() | Self::RUNTIME_EVALUATED_ANNOTATION.bits() | Self::RUNTIME_REQUIRED_ANNOTATION.bits();
|
||||
|
||||
|
||||
@@ -268,9 +268,6 @@ Instead, apply the `# fmt: off` comment to the entire statement:
|
||||
# fmt: on
|
||||
```
|
||||
|
||||
Like Black, Ruff will _also_ recognize [YAPF](https://github.com/google/yapf)'s `# yapf: disable` and `# yapf: enable` pragma
|
||||
comments, which are treated equivalently to `# fmt: off` and `# fmt: on`, respectively.
|
||||
|
||||
`# fmt: skip` comments suppress formatting for a preceding statement, case header, decorator,
|
||||
function definition, or class definition:
|
||||
|
||||
@@ -290,30 +287,8 @@ def test(a, b, c, d, e, f) -> int: # fmt: skip
|
||||
pass
|
||||
```
|
||||
|
||||
As such, adding `# fmt: skip` comments at the end of an expressions will have no effect. In
|
||||
the following example, the list entry `'1'` will be formatted, despite the `# fmt: skip`:
|
||||
|
||||
```python
|
||||
a = call(
|
||||
[
|
||||
'1', # fmt: skip
|
||||
'2',
|
||||
],
|
||||
b
|
||||
)
|
||||
```
|
||||
|
||||
Instead, apply the `# fmt: skip` comment to the entire statement:
|
||||
|
||||
```python
|
||||
a = call(
|
||||
[
|
||||
'1',
|
||||
'2',
|
||||
],
|
||||
b
|
||||
) # fmt: skip
|
||||
```
|
||||
Like Black, Ruff will _also_ recognize [YAPF](https://github.com/google/yapf)'s `# yapf: disable` and `# yapf: enable` pragma
|
||||
comments, which are treated equivalently to `# fmt: off` and `# fmt: on`, respectively.
|
||||
|
||||
## Conflicting lint rules
|
||||
|
||||
|
||||
2
ruff.schema.json
generated
2
ruff.schema.json
generated
@@ -2988,8 +2988,6 @@
|
||||
"FURB11",
|
||||
"FURB113",
|
||||
"FURB118",
|
||||
"FURB12",
|
||||
"FURB129",
|
||||
"FURB13",
|
||||
"FURB131",
|
||||
"FURB132",
|
||||
|
||||
Reference in New Issue
Block a user