Compare commits

...

2 Commits

Author SHA1 Message Date
Micha Reiser
5d2513e5a9 POC of Black's string preview style formatting 2023-09-21 12:38:03 +02:00
Micha Reiser
272306bf5a Introduce StringContinuation data structure 2023-09-21 08:01:39 +02:00
3 changed files with 233 additions and 80 deletions

View File

@@ -1,4 +1,4 @@
use ruff_formatter::FormatRuleWithOptions; use ruff_formatter::{FormatContext, FormatRuleWithOptions};
use ruff_python_ast::node::AnyNodeRef; use ruff_python_ast::node::AnyNodeRef;
use ruff_python_ast::{Constant, ExprConstant}; use ruff_python_ast::{Constant, ExprConstant};
use ruff_text_size::{Ranged, TextLen, TextRange}; use ruff_text_size::{Ranged, TextLen, TextRange};
@@ -78,6 +78,8 @@ impl NeedsParentheses for ExprConstant {
OptionalParentheses::Multiline OptionalParentheses::Multiline
} else if is_multiline_string(self, context.source()) { } else if is_multiline_string(self, context.source()) {
OptionalParentheses::Never OptionalParentheses::Never
} else if context.options().preview().is_enabled() {
OptionalParentheses::Multiline
} else { } else {
OptionalParentheses::BestFit OptionalParentheses::BestFit
} }

View File

@@ -1,6 +1,7 @@
use std::borrow::Cow; use std::borrow::Cow;
use bitflags::bitflags; use bitflags::bitflags;
use smallvec::SmallVec;
use ruff_formatter::{format_args, write, FormatError}; use ruff_formatter::{format_args, write, FormatError};
use ruff_python_ast::node::AnyNodeRef; use ruff_python_ast::node::AnyNodeRef;
@@ -139,20 +140,133 @@ impl<'a> FormatString<'a> {
impl<'a> Format<PyFormatContext<'_>> for FormatString<'a> { impl<'a> Format<PyFormatContext<'_>> for FormatString<'a> {
fn fmt(&self, f: &mut PyFormatter) -> FormatResult<()> { fn fmt(&self, f: &mut PyFormatter) -> FormatResult<()> {
let locator = f.context().locator(); let locator = f.context().locator();
let quote_style = f.options().quote_style();
match self.layout { match self.layout {
StringLayout::Default => { StringLayout::Default => {
if self.string.is_implicit_concatenated() { if self.string.is_implicit_concatenated() {
in_parentheses_only_group(&FormatStringContinuation::new(self.string)).fmt(f) let continuation = StringContinuation::from_string(self.string, &locator)?;
if f.options().preview().is_enabled() {
if let Some((first, rest)) = continuation.parts.split_first() {
let first_normalized =
first.normalize(Quoting::CanChange, &locator, quote_style);
let quotes = first_normalized.quotes;
let mut normalized = SmallVec::with_capacity(continuation.parts.len());
normalized.push(first_normalized);
for part in rest {
normalized.push(part.normalize_with_quotes(&locator, quotes));
}
let continuation = NormalizedStringContinuation {
string: self.string,
parts: normalized,
};
let format_flat = format_with(|f| {
quotes.fmt(f)?;
// TODO comments
for part in &continuation.parts {
match &part.text {
Cow::Borrowed(_) => {
source_text_slice(part.range()).fmt(f)?
}
Cow::Owned(content) => {
text(&content, Some(part.start())).fmt(f)?
}
}
}
quotes.fmt(f)
});
let format_multiline = format_with(|f| {
// TODO won't format comments again
group(&continuation).should_expand(true).fmt(f)
});
let format_joined = format_with(|f| {
quotes.fmt(f)?;
let mut fill = f.fill();
let separator = format_with(|f| {
group(&format_args![
if_group_breaks(&quotes),
soft_line_break_or_space(),
if_group_breaks(&format_args![quotes, space()])
])
.fmt(f)
});
for part in &continuation.parts {
let mut words = part.text.split(' ').peekable();
while let Some(word) = words.next() {
let is_last = words.peek().is_none();
let format_word =
format_with(|f| write!(f, [text(word, None)]));
fill.entry(&separator, &format_word);
}
}
fill.finish()?;
quotes.fmt(f)
});
best_fitting![format_flat, format_multiline, format_joined]
.with_mode(BestFittingMode::AllLines)
.fmt(f)?;
}
Ok(())
} else {
in_parentheses_only_group(&continuation.normalize(quote_style, &locator))
.fmt(f)
}
} else {
// Joining/ splitting does not apply to triple quoted strings or expression statement strings
// Splitting only applies in parenthesized contexts
// Joins strings in non parenthesized contexts
// Does not join/split if string has a prefix other than `u` or `f`.
if f.options().preview().is_enabled() {
let normalized = StringPart::from_source(self.string.range(), &locator)
.normalize(self.string.quoting(&locator), &locator, quote_style);
// TODO how to optimize to avoid allocating a string for every word?
write!(f, [normalized.prefix, normalized.quotes])?;
// TODO split by words longer with a length of at least 6 characters.
let mut words = normalized.text.split(' ');
let mut fill = f.fill();
let separator = format_with(|f| {
group(&format_args![
if_group_breaks(&normalized.quotes),
soft_line_break_or_space(),
if_group_breaks(&format_args![normalized.quotes, space()])
])
.fmt(f)
});
while let Some(word) = words.next() {
let format_word = format_with(|f| write!(f, [text(word, None)]));
fill.entry(&separator, &format_word);
}
fill.finish()?;
normalized.quotes.fmt(f)
} else { } else {
StringPart::from_source(self.string.range(), &locator) StringPart::from_source(self.string.range(), &locator)
.normalize( .normalize(self.string.quoting(&locator), &locator, quote_style)
self.string.quoting(&locator),
&locator,
f.options().quote_style(),
)
.fmt(f) .fmt(f)
} }
} }
}
StringLayout::DocString => { StringLayout::DocString => {
let string_part = StringPart::from_source(self.string.range(), &locator); let string_part = StringPart::from_source(self.string.range(), &locator);
let normalized = let normalized =
@@ -160,33 +274,24 @@ impl<'a> Format<PyFormatContext<'_>> for FormatString<'a> {
format_docstring(&normalized, f) format_docstring(&normalized, f)
} }
StringLayout::ImplicitConcatenatedStringInBinaryLike => { StringLayout::ImplicitConcatenatedStringInBinaryLike => {
FormatStringContinuation::new(self.string).fmt(f) StringContinuation::from_string(self.string, &locator)?
.normalize(f.options().quote_style(), &locator)
.fmt(f)
} }
} }
} }
} }
struct FormatStringContinuation<'a> { struct StringContinuation<'a> {
parts: SmallVec<[StringPart; 4]>,
string: &'a AnyString<'a>, string: &'a AnyString<'a>,
} }
impl<'a> FormatStringContinuation<'a> { impl<'a> StringContinuation<'a> {
fn new(string: &'a AnyString<'a>) -> Self { fn from_string(string: &'a AnyString<'a>, locator: &Locator) -> FormatResult<Self> {
if let AnyString::Constant(constant) = string { debug_assert!(string.is_implicit_concatenated());
debug_assert!(constant.value.is_str() || constant.value.is_bytes());
}
Self { string }
}
}
impl Format<PyFormatContext<'_>> for FormatStringContinuation<'_> { let string_range = string.range();
fn fmt(&self, f: &mut PyFormatter) -> FormatResult<()> {
let comments = f.context().comments().clone();
let locator = f.context().locator();
let quote_style = f.options().quote_style();
let mut dangling_comments = comments.dangling(self.string);
let string_range = self.string.range();
let string_content = locator.slice(string_range); let string_content = locator.slice(string_range);
// The AST parses implicit concatenation as a single string. // The AST parses implicit concatenation as a single string.
@@ -195,7 +300,7 @@ impl Format<PyFormatContext<'_>> for FormatStringContinuation<'_> {
// because this is a black preview style. // because this is a black preview style.
let lexer = lex_starts_at(string_content, Mode::Expression, string_range.start()); let lexer = lex_starts_at(string_content, Mode::Expression, string_range.start());
let mut joiner = f.join_with(in_parentheses_only_soft_line_break_or_space()); let mut parts = SmallVec::new();
for token in lexer { for token in lexer {
let (token, token_range) = match token { let (token, token_range) = match token {
@@ -228,6 +333,55 @@ impl Format<PyFormatContext<'_>> for FormatStringContinuation<'_> {
match token { match token {
Tok::String { .. } => { Tok::String { .. } => {
parts.push(StringPart::from_source(token_range, locator));
}
Tok::Comment(_)
| Tok::NonLogicalNewline
| Tok::Newline
| Tok::Indent
| Tok::Dedent => continue,
token => unreachable!("Unexpected token {token:?}"),
}
}
Ok(Self { parts, string })
}
fn normalize(
self,
quote_style: QuoteStyle,
locator: &Locator<'a>,
) -> NormalizedStringContinuation<'a> {
let quoting = self.string.quoting(locator);
let normalized = self
.parts
.into_iter()
.map(|part| part.normalize(quoting, &locator, quote_style))
.collect();
NormalizedStringContinuation {
parts: normalized,
string: self.string,
}
}
}
struct NormalizedStringContinuation<'a> {
parts: SmallVec<[NormalizedString<'a>; 4]>,
string: &'a AnyString<'a>,
}
impl Format<PyFormatContext<'_>> for NormalizedStringContinuation<'_> {
fn fmt(&self, f: &mut PyFormatter) -> FormatResult<()> {
let comments = f.context().comments().clone();
let locator = f.context().locator();
let quote_style = f.options().quote_style();
let quoting = self.string.quoting(&locator);
let mut dangling_comments = comments.dangling(self.string);
let mut joiner = f.join_with(in_parentheses_only_soft_line_break_or_space());
for part in &self.parts {
// ```python // ```python
// ( // (
// "a" // "a"
@@ -235,11 +389,10 @@ impl Format<PyFormatContext<'_>> for FormatStringContinuation<'_> {
// "the comment above" // "the comment above"
// ) // )
// ``` // ```
let leading_comments_end = dangling_comments let leading_comments_end =
.partition_point(|comment| comment.start() <= token_range.start()); dangling_comments.partition_point(|comment| comment.start() <= part.start());
let (leading_part_comments, rest) = let (leading_part_comments, rest) = dangling_comments.split_at(leading_comments_end);
dangling_comments.split_at(leading_comments_end);
// ```python // ```python
// ( // (
@@ -249,34 +402,20 @@ impl Format<PyFormatContext<'_>> for FormatStringContinuation<'_> {
// ``` // ```
let trailing_comments_end = rest.partition_point(|comment| { let trailing_comments_end = rest.partition_point(|comment| {
comment.line_position().is_end_of_line() comment.line_position().is_end_of_line()
&& !locator.contains_line_break(TextRange::new( && !locator.contains_line_break(TextRange::new(part.end(), comment.start()))
token_range.end(),
comment.start(),
))
}); });
let (trailing_part_comments, rest) = rest.split_at(trailing_comments_end); let (trailing_part_comments, rest) = rest.split_at(trailing_comments_end);
let part = StringPart::from_source(token_range, &locator);
let normalized =
part.normalize(self.string.quoting(&locator), &locator, quote_style);
joiner.entry(&format_args![ joiner.entry(&format_args![
line_suffix_boundary(), line_suffix_boundary(),
leading_comments(leading_part_comments), leading_comments(leading_part_comments),
normalized, part,
trailing_comments(trailing_part_comments) trailing_comments(trailing_part_comments)
]); ]);
dangling_comments = rest; dangling_comments = rest;
} }
Tok::Comment(_)
| Tok::NonLogicalNewline
| Tok::Newline
| Tok::Indent
| Tok::Dedent => continue,
token => unreachable!("Unexpected token {token:?}"),
}
}
debug_assert!(dangling_comments.is_empty()); debug_assert!(dangling_comments.is_empty());
@@ -320,10 +459,10 @@ impl StringPart {
/// Computes the strings preferred quotes and normalizes its content. /// Computes the strings preferred quotes and normalizes its content.
fn normalize<'a>( fn normalize<'a>(
self, &self,
quoting: Quoting, quoting: Quoting,
locator: &'a Locator, locator: &Locator<'a>,
quote_style: QuoteStyle, configured_quote_style: QuoteStyle,
) -> NormalizedString<'a> { ) -> NormalizedString<'a> {
let raw_content = locator.slice(self.content_range); let raw_content = locator.slice(self.content_range);
@@ -331,28 +470,40 @@ impl StringPart {
Quoting::Preserve => self.quotes, Quoting::Preserve => self.quotes,
Quoting::CanChange => { Quoting::CanChange => {
if self.prefix.is_raw_string() { if self.prefix.is_raw_string() {
preferred_quotes_raw(raw_content, self.quotes, quote_style) preferred_quotes_raw(raw_content, self.quotes, configured_quote_style)
} else { } else {
preferred_quotes(raw_content, self.quotes, quote_style) preferred_quotes(raw_content, self.quotes, configured_quote_style)
} }
} }
}; };
let normalized = normalize_string( self.normalize_with_quotes(locator, preferred_quotes)
locator.slice(self.content_range), }
preferred_quotes,
self.prefix.is_raw_string(), fn normalize_with_quotes<'a>(
); &self,
locator: &Locator<'a>,
quotes: StringQuotes,
) -> NormalizedString<'a> {
let raw_content = locator.slice(self.content_range);
let normalized = normalize_string(raw_content, quotes, self.prefix.is_raw_string());
NormalizedString { NormalizedString {
prefix: self.prefix, prefix: self.prefix,
content_range: self.content_range, content_range: self.content_range,
text: normalized, text: normalized,
quotes: preferred_quotes, quotes,
} }
} }
} }
impl Ranged for StringPart {
fn range(&self) -> TextRange {
self.content_range
}
}
#[derive(Debug)] #[derive(Debug)]
struct NormalizedString<'a> { struct NormalizedString<'a> {
prefix: StringPrefix, prefix: StringPrefix,

View File

@@ -268,9 +268,9 @@ impl FromStr for MagicTrailingComma {
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
#[cfg_attr(feature = "serde", serde(rename_all = "lowercase"))] #[cfg_attr(feature = "serde", serde(rename_all = "lowercase"))]
pub enum PreviewMode { pub enum PreviewMode {
#[default]
Disabled, Disabled,
#[default]
Enabled, Enabled,
} }