Simplify formatting of strings by using flags from the AST nodes (#10489)
This commit is contained in:
@@ -2,7 +2,7 @@ use std::borrow::Cow;
|
||||
use std::iter::FusedIterator;
|
||||
|
||||
use ruff_formatter::FormatContext;
|
||||
use ruff_python_ast::str::Quote;
|
||||
use ruff_python_ast::{str::Quote, AnyStringKind};
|
||||
use ruff_source_file::Locator;
|
||||
use ruff_text_size::{Ranged, TextRange};
|
||||
|
||||
@@ -10,7 +10,7 @@ use crate::context::FStringState;
|
||||
use crate::options::PythonVersion;
|
||||
use crate::prelude::*;
|
||||
use crate::preview::is_f_string_formatting_enabled;
|
||||
use crate::string::{Quoting, StringPart, StringPrefix, StringQuotes};
|
||||
use crate::string::{Quoting, StringPart, StringQuotes};
|
||||
use crate::QuoteStyle;
|
||||
|
||||
pub(crate) struct StringNormalizer {
|
||||
@@ -44,7 +44,7 @@ impl StringNormalizer {
|
||||
self
|
||||
}
|
||||
|
||||
fn quoting(&self, string: &StringPart) -> Quoting {
|
||||
fn quoting(&self, string: StringPart) -> Quoting {
|
||||
if let FStringState::InsideExpressionElement(context) = self.f_string_state {
|
||||
// If we're inside an f-string, we need to make sure to preserve the
|
||||
// existing quotes unless we're inside a triple-quoted f-string and
|
||||
@@ -60,7 +60,7 @@ impl StringNormalizer {
|
||||
// The reason to preserve the quotes is based on the assumption that
|
||||
// the original f-string is valid in terms of quoting, and we don't
|
||||
// want to change that to make it invalid.
|
||||
if (context.quotes().is_triple() && !string.quotes().is_triple())
|
||||
if (context.kind().is_triple_quoted() && !string.kind().is_triple_quoted())
|
||||
|| self.target_version.supports_pep_701()
|
||||
{
|
||||
self.quoting
|
||||
@@ -73,18 +73,19 @@ impl StringNormalizer {
|
||||
}
|
||||
|
||||
/// Computes the strings preferred quotes.
|
||||
pub(crate) fn choose_quotes(&self, string: &StringPart, locator: &Locator) -> QuoteSelection {
|
||||
pub(crate) fn choose_quotes(&self, string: StringPart, locator: &Locator) -> QuoteSelection {
|
||||
let raw_content = locator.slice(string.content_range());
|
||||
let first_quote_or_normalized_char_offset = raw_content
|
||||
.bytes()
|
||||
.position(|b| matches!(b, b'\\' | b'"' | b'\'' | b'\r' | b'{'));
|
||||
let string_kind = string.kind();
|
||||
|
||||
let quotes = match self.quoting(string) {
|
||||
Quoting::Preserve => string.quotes(),
|
||||
let new_kind = match self.quoting(string) {
|
||||
Quoting::Preserve => string_kind,
|
||||
Quoting::CanChange => {
|
||||
// Per PEP 8, always prefer double quotes for triple-quoted strings.
|
||||
// Except when using quote-style-preserve.
|
||||
let preferred_style = if string.quotes().triple {
|
||||
let preferred_style = if string_kind.is_triple_quoted() {
|
||||
// ... unless we're formatting a code snippet inside a docstring,
|
||||
// then we specifically want to invert our quote style to avoid
|
||||
// writing out invalid Python.
|
||||
@@ -145,33 +146,30 @@ impl StringNormalizer {
|
||||
if let Some(first_quote_or_normalized_char_offset) =
|
||||
first_quote_or_normalized_char_offset
|
||||
{
|
||||
if string.prefix().is_raw_string() {
|
||||
if string_kind.is_raw_string() {
|
||||
choose_quotes_for_raw_string(
|
||||
&raw_content[first_quote_or_normalized_char_offset..],
|
||||
string.quotes(),
|
||||
string_kind,
|
||||
preferred_quote,
|
||||
)
|
||||
} else {
|
||||
choose_quotes_impl(
|
||||
&raw_content[first_quote_or_normalized_char_offset..],
|
||||
string.quotes(),
|
||||
string_kind,
|
||||
preferred_quote,
|
||||
)
|
||||
}
|
||||
} else {
|
||||
StringQuotes {
|
||||
quote_char: preferred_quote,
|
||||
triple: string.quotes().is_triple(),
|
||||
}
|
||||
string_kind.with_quote_style(preferred_quote)
|
||||
}
|
||||
} else {
|
||||
string.quotes()
|
||||
string_kind
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
QuoteSelection {
|
||||
quotes,
|
||||
kind: new_kind,
|
||||
first_quote_or_normalized_char_offset,
|
||||
}
|
||||
}
|
||||
@@ -179,11 +177,10 @@ impl StringNormalizer {
|
||||
/// Computes the strings preferred quotes and normalizes its content.
|
||||
pub(crate) fn normalize<'a>(
|
||||
&self,
|
||||
string: &StringPart,
|
||||
string: StringPart,
|
||||
locator: &'a Locator,
|
||||
) -> NormalizedString<'a> {
|
||||
let raw_content = locator.slice(string.content_range());
|
||||
|
||||
let quote_selection = self.choose_quotes(string, locator);
|
||||
|
||||
let normalized = if let Some(first_quote_or_escape_offset) =
|
||||
@@ -192,8 +189,7 @@ impl StringNormalizer {
|
||||
normalize_string(
|
||||
raw_content,
|
||||
first_quote_or_escape_offset,
|
||||
quote_selection.quotes,
|
||||
string.prefix(),
|
||||
quote_selection.kind,
|
||||
// TODO: Remove the `b'{'` in `choose_quotes` when promoting the
|
||||
// `format_fstring` preview style
|
||||
self.format_fstring,
|
||||
@@ -203,34 +199,31 @@ impl StringNormalizer {
|
||||
};
|
||||
|
||||
NormalizedString {
|
||||
prefix: string.prefix(),
|
||||
kind: quote_selection.kind,
|
||||
content_range: string.content_range(),
|
||||
text: normalized,
|
||||
quotes: quote_selection.quotes,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub(crate) struct QuoteSelection {
|
||||
quotes: StringQuotes,
|
||||
kind: AnyStringKind,
|
||||
|
||||
/// Offset to the first quote character or character that needs special handling in [`normalize_string`].
|
||||
first_quote_or_normalized_char_offset: Option<usize>,
|
||||
}
|
||||
|
||||
impl QuoteSelection {
|
||||
pub(crate) fn quotes(&self) -> StringQuotes {
|
||||
self.quotes
|
||||
pub(crate) fn kind(&self) -> AnyStringKind {
|
||||
self.kind
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub(crate) struct NormalizedString<'a> {
|
||||
prefix: crate::string::StringPrefix,
|
||||
|
||||
/// The quotes of the normalized string (preferred quotes)
|
||||
quotes: StringQuotes,
|
||||
/// Holds data about the quotes and prefix of the string
|
||||
kind: AnyStringKind,
|
||||
|
||||
/// The range of the string's content in the source (minus prefix and quotes).
|
||||
content_range: TextRange,
|
||||
@@ -244,12 +237,8 @@ impl<'a> NormalizedString<'a> {
|
||||
&self.text
|
||||
}
|
||||
|
||||
pub(crate) fn quotes(&self) -> StringQuotes {
|
||||
self.quotes
|
||||
}
|
||||
|
||||
pub(crate) fn prefix(&self) -> StringPrefix {
|
||||
self.prefix
|
||||
pub(crate) fn kind(&self) -> AnyStringKind {
|
||||
self.kind
|
||||
}
|
||||
}
|
||||
|
||||
@@ -261,7 +250,8 @@ impl Ranged for NormalizedString<'_> {
|
||||
|
||||
impl Format<PyFormatContext<'_>> for NormalizedString<'_> {
|
||||
fn fmt(&self, f: &mut Formatter<PyFormatContext<'_>>) -> FormatResult<()> {
|
||||
ruff_formatter::write!(f, [self.prefix, self.quotes])?;
|
||||
let quotes = StringQuotes::from(self.kind);
|
||||
ruff_formatter::write!(f, [self.kind.prefix(), quotes])?;
|
||||
match &self.text {
|
||||
Cow::Borrowed(_) => {
|
||||
source_text_slice(self.range()).fmt(f)?;
|
||||
@@ -270,7 +260,7 @@ impl Format<PyFormatContext<'_>> for NormalizedString<'_> {
|
||||
text(normalized).fmt(f)?;
|
||||
}
|
||||
}
|
||||
self.quotes.fmt(f)
|
||||
quotes.fmt(f)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -281,9 +271,9 @@ impl Format<PyFormatContext<'_>> for NormalizedString<'_> {
|
||||
/// style is double quotes.
|
||||
fn choose_quotes_for_raw_string(
|
||||
input: &str,
|
||||
quotes: StringQuotes,
|
||||
kind: AnyStringKind,
|
||||
preferred_quote: Quote,
|
||||
) -> StringQuotes {
|
||||
) -> AnyStringKind {
|
||||
let preferred_quote_char = preferred_quote.as_char();
|
||||
let mut chars = input.chars().peekable();
|
||||
let contains_unescaped_configured_quotes = loop {
|
||||
@@ -294,7 +284,7 @@ fn choose_quotes_for_raw_string(
|
||||
}
|
||||
// `"` or `'`
|
||||
Some(c) if c == preferred_quote_char => {
|
||||
if !quotes.triple {
|
||||
if !kind.is_triple_quoted() {
|
||||
break true;
|
||||
}
|
||||
|
||||
@@ -319,14 +309,10 @@ fn choose_quotes_for_raw_string(
|
||||
None => break false,
|
||||
}
|
||||
};
|
||||
|
||||
StringQuotes {
|
||||
triple: quotes.triple,
|
||||
quote_char: if contains_unescaped_configured_quotes {
|
||||
quotes.quote_char
|
||||
} else {
|
||||
preferred_quote
|
||||
},
|
||||
if contains_unescaped_configured_quotes {
|
||||
kind
|
||||
} else {
|
||||
kind.with_quote_style(preferred_quote)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -338,8 +324,8 @@ fn choose_quotes_for_raw_string(
|
||||
/// For triple quoted strings, the preferred quote style is always used, unless the string contains
|
||||
/// a triplet of the quote character (e.g., if double quotes are preferred, double quotes will be
|
||||
/// used unless the string contains `"""`).
|
||||
fn choose_quotes_impl(input: &str, quotes: StringQuotes, preferred_quote: Quote) -> StringQuotes {
|
||||
let quote = if quotes.triple {
|
||||
fn choose_quotes_impl(input: &str, kind: AnyStringKind, preferred_quote: Quote) -> AnyStringKind {
|
||||
let quote = if kind.is_triple_quoted() {
|
||||
// True if the string contains a triple quote sequence of the configured quote style.
|
||||
let mut uses_triple_quotes = false;
|
||||
let mut chars = input.chars().peekable();
|
||||
@@ -393,7 +379,7 @@ fn choose_quotes_impl(input: &str, quotes: StringQuotes, preferred_quote: Quote)
|
||||
if uses_triple_quotes {
|
||||
// String contains a triple quote sequence of the configured quote style.
|
||||
// Keep the existing quote style.
|
||||
quotes.quote_char
|
||||
kind.quote_style()
|
||||
} else {
|
||||
preferred_quote
|
||||
}
|
||||
@@ -433,10 +419,7 @@ fn choose_quotes_impl(input: &str, quotes: StringQuotes, preferred_quote: Quote)
|
||||
}
|
||||
};
|
||||
|
||||
StringQuotes {
|
||||
triple: quotes.triple,
|
||||
quote_char: quote,
|
||||
}
|
||||
kind.with_quote_style(quote)
|
||||
}
|
||||
|
||||
/// Adds the necessary quote escapes and removes unnecessary escape sequences when quoting `input`
|
||||
@@ -446,8 +429,7 @@ fn choose_quotes_impl(input: &str, quotes: StringQuotes, preferred_quote: Quote)
|
||||
pub(crate) fn normalize_string(
|
||||
input: &str,
|
||||
start_offset: usize,
|
||||
quotes: StringQuotes,
|
||||
prefix: StringPrefix,
|
||||
kind: AnyStringKind,
|
||||
format_fstring: bool,
|
||||
) -> Cow<str> {
|
||||
// The normalized string if `input` is not yet normalized.
|
||||
@@ -457,14 +439,14 @@ pub(crate) fn normalize_string(
|
||||
// If `last_index` is `0` at the end, then the input is already normalized and can be returned as is.
|
||||
let mut last_index = 0;
|
||||
|
||||
let quote = quotes.quote_char;
|
||||
let quote = kind.quote_style();
|
||||
let preferred_quote = quote.as_char();
|
||||
let opposite_quote = quote.opposite().as_char();
|
||||
|
||||
let mut chars = CharIndicesWithOffset::new(input, start_offset).peekable();
|
||||
|
||||
let is_raw = prefix.is_raw_string();
|
||||
let is_fstring = !format_fstring && prefix.is_fstring();
|
||||
let is_raw = kind.is_raw_string();
|
||||
let is_fstring = !format_fstring && kind.is_f_string();
|
||||
let mut formatted_value_nesting = 0u32;
|
||||
|
||||
while let Some((index, c)) = chars.next() {
|
||||
@@ -502,7 +484,7 @@ pub(crate) fn normalize_string(
|
||||
} else {
|
||||
// Length of the `\` plus the length of the escape sequence character (`u` | `U` | `x`)
|
||||
let escape_start_len = '\\'.len_utf8() + next.len_utf8();
|
||||
if let Some(normalised) = UnicodeEscape::new(next, !prefix.is_byte())
|
||||
if let Some(normalised) = UnicodeEscape::new(next, !kind.is_byte_string())
|
||||
.and_then(|escape| escape.normalize(&input[index + escape_start_len..]))
|
||||
{
|
||||
let escape_start_offset = index + escape_start_len;
|
||||
@@ -521,7 +503,7 @@ pub(crate) fn normalize_string(
|
||||
}
|
||||
}
|
||||
|
||||
if !quotes.triple {
|
||||
if !kind.is_triple_quoted() {
|
||||
#[allow(clippy::if_same_then_else)]
|
||||
if next == opposite_quote && formatted_value_nesting == 0 {
|
||||
// Remove the escape by ending before the backslash and starting again with the quote
|
||||
@@ -534,7 +516,10 @@ pub(crate) fn normalize_string(
|
||||
}
|
||||
}
|
||||
}
|
||||
} else if !quotes.triple && c == preferred_quote && formatted_value_nesting == 0 {
|
||||
} else if !kind.is_triple_quoted()
|
||||
&& c == preferred_quote
|
||||
&& formatted_value_nesting == 0
|
||||
{
|
||||
// Escape the quote
|
||||
output.push_str(&input[last_index..index]);
|
||||
output.push('\\');
|
||||
@@ -704,9 +689,7 @@ impl UnicodeEscape {
|
||||
mod tests {
|
||||
use std::borrow::Cow;
|
||||
|
||||
use ruff_python_ast::str::Quote;
|
||||
|
||||
use crate::string::{StringPrefix, StringQuotes};
|
||||
use ruff_python_ast::{str::Quote, AnyStringKind, AnyStringPrefix, ByteStringPrefix};
|
||||
|
||||
use super::{normalize_string, UnicodeEscape};
|
||||
|
||||
@@ -727,11 +710,11 @@ mod tests {
|
||||
let normalized = normalize_string(
|
||||
input,
|
||||
0,
|
||||
StringQuotes {
|
||||
triple: false,
|
||||
quote_char: Quote::Double,
|
||||
},
|
||||
StringPrefix::BYTE,
|
||||
AnyStringKind::new(
|
||||
AnyStringPrefix::Bytes(ByteStringPrefix::Regular),
|
||||
Quote::Double,
|
||||
false,
|
||||
),
|
||||
true,
|
||||
);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user