POC of Black's string preview style formatting

Introduce StringContinuation data structure
2023-09-21 12:38:03 +02:00 · 2023-09-21 08:01:39 +02:00
3 changed files with 233 additions and 80 deletions
--- a/crates/ruff_python_formatter/src/expression/expr_constant.rs
+++ b/crates/ruff_python_formatter/src/expression/expr_constant.rs
@@ -1,4 +1,4 @@
-use ruff_formatter::FormatRuleWithOptions;
+use ruff_formatter::{FormatContext, FormatRuleWithOptions};
 use ruff_python_ast::node::AnyNodeRef;
 use ruff_python_ast::{Constant, ExprConstant};
 use ruff_text_size::{Ranged, TextLen, TextRange};
@@ -78,6 +78,8 @@ impl NeedsParentheses for ExprConstant {
            OptionalParentheses::Multiline
        } else if is_multiline_string(self, context.source()) {
            OptionalParentheses::Never
+        } else if context.options().preview().is_enabled() {
+            OptionalParentheses::Multiline
        } else {
            OptionalParentheses::BestFit
        }
--- a/crates/ruff_python_formatter/src/expression/string.rs
+++ b/crates/ruff_python_formatter/src/expression/string.rs
@@ -1,6 +1,7 @@
 use std::borrow::Cow;

 use bitflags::bitflags;
+use smallvec::SmallVec;

 use ruff_formatter::{format_args, write, FormatError};
 use ruff_python_ast::node::AnyNodeRef;
@@ -139,18 +140,131 @@ impl<'a> FormatString<'a> {
 impl<'a> Format<PyFormatContext<'_>> for FormatString<'a> {
    fn fmt(&self, f: &mut PyFormatter) -> FormatResult<()> {
        let locator = f.context().locator();
+        let quote_style = f.options().quote_style();
        match self.layout {
            StringLayout::Default => {
                if self.string.is_implicit_concatenated() {
-                    in_parentheses_only_group(&FormatStringContinuation::new(self.string)).fmt(f)
+                    let continuation = StringContinuation::from_string(self.string, &locator)?;
+                    if f.options().preview().is_enabled() {
+                        if let Some((first, rest)) = continuation.parts.split_first() {
+                            let first_normalized =
+                                first.normalize(Quoting::CanChange, &locator, quote_style);
+
+                            let quotes = first_normalized.quotes;
+                            let mut normalized = SmallVec::with_capacity(continuation.parts.len());
+                            normalized.push(first_normalized);
+
+                            for part in rest {
+                                normalized.push(part.normalize_with_quotes(&locator, quotes));
+                            }
+
+                            let continuation = NormalizedStringContinuation {
+                                string: self.string,
+                                parts: normalized,
+                            };
+
+                            let format_flat = format_with(|f| {
+                                quotes.fmt(f)?;
+
+                                // TODO comments
+                                for part in &continuation.parts {
+                                    match &part.text {
+                                        Cow::Borrowed(_) => {
+                                            source_text_slice(part.range()).fmt(f)?
+                                        }
+                                        Cow::Owned(content) => {
+                                            text(&content, Some(part.start())).fmt(f)?
+                                        }
+                                    }
+                                }
+
+                                quotes.fmt(f)
+                            });
+
+                            let format_multiline = format_with(|f| {
+                                // TODO won't format comments again
+                                group(&continuation).should_expand(true).fmt(f)
+                            });
+
+                            let format_joined = format_with(|f| {
+                                quotes.fmt(f)?;
+
+                                let mut fill = f.fill();
+
+                                let separator = format_with(|f| {
+                                    group(&format_args![
+                                        if_group_breaks(&quotes),
+                                        soft_line_break_or_space(),
+                                        if_group_breaks(&format_args![quotes, space()])
+                                    ])
+                                    .fmt(f)
+                                });
+
+                                for part in &continuation.parts {
+                                    let mut words = part.text.split(' ').peekable();
+
+                                    while let Some(word) = words.next() {
+                                        let is_last = words.peek().is_none();
+                                        let format_word =
+                                            format_with(|f| write!(f, [text(word, None)]));
+
+                                        fill.entry(&separator, &format_word);
+                                    }
+                                }
+
+                                fill.finish()?;
+
+                                quotes.fmt(f)
+                            });
+                            best_fitting![format_flat, format_multiline, format_joined]
+                                .with_mode(BestFittingMode::AllLines)
+                                .fmt(f)?;
+                        }
+
+                        Ok(())
+                    } else {
+                        in_parentheses_only_group(&continuation.normalize(quote_style, &locator))
+                            .fmt(f)
+                    }
                } else {
-                    StringPart::from_source(self.string.range(), &locator)
-                        .normalize(
-                            self.string.quoting(&locator),
-                            &locator,
-                            f.options().quote_style(),
-                        )
-                        .fmt(f)
+                    // Joining/ splitting does not apply to triple quoted strings or expression statement strings
+                    // Splitting only applies in parenthesized contexts
+                    // Joins strings in non parenthesized contexts
+                    // Does not join/split if string has a prefix other than `u` or `f`.
+                    if f.options().preview().is_enabled() {
+                        let normalized = StringPart::from_source(self.string.range(), &locator)
+                            .normalize(self.string.quoting(&locator), &locator, quote_style);
+
+                        // TODO how to optimize to avoid allocating a string for every word?
+                        write!(f, [normalized.prefix, normalized.quotes])?;
+
+                        // TODO split by words longer with a length of at least 6 characters.
+                        let mut words = normalized.text.split(' ');
+                        let mut fill = f.fill();
+
+                        let separator = format_with(|f| {
+                            group(&format_args![
+                                if_group_breaks(&normalized.quotes),
+                                soft_line_break_or_space(),
+                                if_group_breaks(&format_args![normalized.quotes, space()])
+                            ])
+                            .fmt(f)
+                        });
+
+                        while let Some(word) = words.next() {
+                            let format_word = format_with(|f| write!(f, [text(word, None)]));
+
+                            fill.entry(&separator, &format_word);
+                        }
+
+                        fill.finish()?;
+
+                        normalized.quotes.fmt(f)
+                    } else {
+                        StringPart::from_source(self.string.range(), &locator)
+                            .normalize(self.string.quoting(&locator), &locator, quote_style)
+                            .fmt(f)
+                    }
                }
            }
            StringLayout::DocString => {
@@ -160,33 +274,24 @@ impl<'a> Format<PyFormatContext<'_>> for FormatString<'a> {
                format_docstring(&normalized, f)
            }
            StringLayout::ImplicitConcatenatedStringInBinaryLike => {
-                FormatStringContinuation::new(self.string).fmt(f)
+                StringContinuation::from_string(self.string, &locator)?
+                    .normalize(f.options().quote_style(), &locator)
+                    .fmt(f)
            }
        }
    }
 }

-struct FormatStringContinuation<'a> {
+struct StringContinuation<'a> {
+    parts: SmallVec<[StringPart; 4]>,
    string: &'a AnyString<'a>,
 }

-impl<'a> FormatStringContinuation<'a> {
-    fn new(string: &'a AnyString<'a>) -> Self {
-        if let AnyString::Constant(constant) = string {
-            debug_assert!(constant.value.is_str() || constant.value.is_bytes());
-        }
-        Self { string }
-    }
-}
+impl<'a> StringContinuation<'a> {
+    fn from_string(string: &'a AnyString<'a>, locator: &Locator) -> FormatResult<Self> {
+        debug_assert!(string.is_implicit_concatenated());

-impl Format<PyFormatContext<'_>> for FormatStringContinuation<'_> {
-    fn fmt(&self, f: &mut PyFormatter) -> FormatResult<()> {
-        let comments = f.context().comments().clone();
-        let locator = f.context().locator();
-        let quote_style = f.options().quote_style();
-        let mut dangling_comments = comments.dangling(self.string);
-
-        let string_range = self.string.range();
+        let string_range = string.range();
        let string_content = locator.slice(string_range);

        // The AST parses implicit concatenation as a single string.
@@ -195,7 +300,7 @@ impl Format<PyFormatContext<'_>> for FormatStringContinuation<'_> {
        // because this is a black preview style.
        let lexer = lex_starts_at(string_content, Mode::Expression, string_range.start());

-        let mut joiner = f.join_with(in_parentheses_only_soft_line_break_or_space());
+        let mut parts = SmallVec::new();

        for token in lexer {
            let (token, token_range) = match token {
@@ -228,46 +333,7 @@ impl Format<PyFormatContext<'_>> for FormatStringContinuation<'_> {

            match token {
                Tok::String { .. } => {
-                    // ```python
-                    // (
-                    //      "a"
-                    //      # leading
-                    //      "the comment above"
-                    // )
-                    // ```
-                    let leading_comments_end = dangling_comments
-                        .partition_point(|comment| comment.start() <= token_range.start());
-
-                    let (leading_part_comments, rest) =
-                        dangling_comments.split_at(leading_comments_end);
-
-                    // ```python
-                    // (
-                    //      "a" # trailing comment
-                    //      "the comment above"
-                    // )
-                    // ```
-                    let trailing_comments_end = rest.partition_point(|comment| {
-                        comment.line_position().is_end_of_line()
-                            && !locator.contains_line_break(TextRange::new(
-                                token_range.end(),
-                                comment.start(),
-                            ))
-                    });
-
-                    let (trailing_part_comments, rest) = rest.split_at(trailing_comments_end);
-                    let part = StringPart::from_source(token_range, &locator);
-                    let normalized =
-                        part.normalize(self.string.quoting(&locator), &locator, quote_style);
-
-                    joiner.entry(&format_args![
-                        line_suffix_boundary(),
-                        leading_comments(leading_part_comments),
-                        normalized,
-                        trailing_comments(trailing_part_comments)
-                    ]);
-
-                    dangling_comments = rest;
+                    parts.push(StringPart::from_source(token_range, locator));
                }
                Tok::Comment(_)
                | Tok::NonLogicalNewline
@@ -278,6 +344,79 @@ impl Format<PyFormatContext<'_>> for FormatStringContinuation<'_> {
            }
        }

+        Ok(Self { parts, string })
+    }
+
+    fn normalize(
+        self,
+        quote_style: QuoteStyle,
+        locator: &Locator<'a>,
+    ) -> NormalizedStringContinuation<'a> {
+        let quoting = self.string.quoting(locator);
+        let normalized = self
+            .parts
+            .into_iter()
+            .map(|part| part.normalize(quoting, &locator, quote_style))
+            .collect();
+
+        NormalizedStringContinuation {
+            parts: normalized,
+            string: self.string,
+        }
+    }
+}
+
+struct NormalizedStringContinuation<'a> {
+    parts: SmallVec<[NormalizedString<'a>; 4]>,
+    string: &'a AnyString<'a>,
+}
+
+impl Format<PyFormatContext<'_>> for NormalizedStringContinuation<'_> {
+    fn fmt(&self, f: &mut PyFormatter) -> FormatResult<()> {
+        let comments = f.context().comments().clone();
+        let locator = f.context().locator();
+        let quote_style = f.options().quote_style();
+        let quoting = self.string.quoting(&locator);
+
+        let mut dangling_comments = comments.dangling(self.string);
+        let mut joiner = f.join_with(in_parentheses_only_soft_line_break_or_space());
+
+        for part in &self.parts {
+            // ```python
+            // (
+            //      "a"
+            //      # leading
+            //      "the comment above"
+            // )
+            // ```
+            let leading_comments_end =
+                dangling_comments.partition_point(|comment| comment.start() <= part.start());
+
+            let (leading_part_comments, rest) = dangling_comments.split_at(leading_comments_end);
+
+            // ```python
+            // (
+            //      "a" # trailing comment
+            //      "the comment above"
+            // )
+            // ```
+            let trailing_comments_end = rest.partition_point(|comment| {
+                comment.line_position().is_end_of_line()
+                    && !locator.contains_line_break(TextRange::new(part.end(), comment.start()))
+            });
+
+            let (trailing_part_comments, rest) = rest.split_at(trailing_comments_end);
+
+            joiner.entry(&format_args![
+                line_suffix_boundary(),
+                leading_comments(leading_part_comments),
+                part,
+                trailing_comments(trailing_part_comments)
+            ]);
+
+            dangling_comments = rest;
+        }
+
        debug_assert!(dangling_comments.is_empty());

        joiner.finish()
@@ -320,10 +459,10 @@ impl StringPart {

    /// Computes the strings preferred quotes and normalizes its content.
    fn normalize<'a>(
-        self,
+        &self,
        quoting: Quoting,
-        locator: &'a Locator,
-        quote_style: QuoteStyle,
+        locator: &Locator<'a>,
+        configured_quote_style: QuoteStyle,
    ) -> NormalizedString<'a> {
        let raw_content = locator.slice(self.content_range);

@@ -331,28 +470,40 @@ impl StringPart {
            Quoting::Preserve => self.quotes,
            Quoting::CanChange => {
                if self.prefix.is_raw_string() {
-                    preferred_quotes_raw(raw_content, self.quotes, quote_style)
+                    preferred_quotes_raw(raw_content, self.quotes, configured_quote_style)
                } else {
-                    preferred_quotes(raw_content, self.quotes, quote_style)
+                    preferred_quotes(raw_content, self.quotes, configured_quote_style)
                }
            }
        };

-        let normalized = normalize_string(
-            locator.slice(self.content_range),
-            preferred_quotes,
-            self.prefix.is_raw_string(),
-        );
+        self.normalize_with_quotes(locator, preferred_quotes)
+    }
+
+    fn normalize_with_quotes<'a>(
+        &self,
+        locator: &Locator<'a>,
+        quotes: StringQuotes,
+    ) -> NormalizedString<'a> {
+        let raw_content = locator.slice(self.content_range);
+
+        let normalized = normalize_string(raw_content, quotes, self.prefix.is_raw_string());

        NormalizedString {
            prefix: self.prefix,
            content_range: self.content_range,
            text: normalized,
-            quotes: preferred_quotes,
+            quotes,
        }
    }
 }

+impl Ranged for StringPart {
+    fn range(&self) -> TextRange {
+        self.content_range
+    }
+}
+
 #[derive(Debug)]
 struct NormalizedString<'a> {
    prefix: StringPrefix,
--- a/crates/ruff_python_formatter/src/options.rs
+++ b/crates/ruff_python_formatter/src/options.rs
@@ -268,9 +268,9 @@ impl FromStr for MagicTrailingComma {
 #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
 #[cfg_attr(feature = "serde", serde(rename_all = "lowercase"))]
 pub enum PreviewMode {
-    #[default]
    Disabled,

+    #[default]
    Enabled,
 }
Author	SHA1	Message	Date
Micha Reiser	5d2513e5a9	POC of Black's string preview style formatting	2023-09-21 12:38:03 +02:00
Micha Reiser	272306bf5a	Introduce `StringContinuation` data structure	2023-09-21 08:01:39 +02:00