Fluent formatting of method chains (#21369)
This PR implements a modification (in preview) to fluent formatting for
method chains: We break _at_ the first call instead of _after_.
For example, we have the following diff between `main` and this PR (with
`line-length=8` so I don't have to stretch out the text):
```diff
x = (
- df.merge()
+ df
+ .merge()
.groupby()
.agg()
.filter()
)
```
## Explanation of current implementation
Recall that we traverse the AST to apply formatting. A method chain,
while read left-to-right, is stored in the AST "in reverse". So if we
start with something like
```python
a.b.c.d().e.f()
```
then the first syntax node we meet is essentially `.f()`. So we have to
peek ahead. And we actually _already_ do this in our current fluent
formatting logic: we peek ahead to count how many calls we have in the
chain to see whether we should be using fluent formatting or now.
In this implementation, we actually _record_ this number inside the enum
for `CallChainLayout`. That is, we make the variant `Fluent` hold an
`AttributeState`. This state can either be:
- The number of call-like attributes preceding the current attribute
- The state `FirstCallOrSubscript` which means we are at the first
call-like attribute in the chain (reading from left to right)
- The state `BeforeFirstCallOrSubscript` which means we are in the
"first group" of attributes, preceding that first call.
In our example, here's what it looks like at each attribute:
```
a.b.c.d().e.f @ Fluent(CallsOrSubscriptsPreceding(1))
a.b.c.d().e @ Fluent(CallsOrSubscriptsPreceding(1))
a.b.c.d @ Fluent(FirstCallOrSubscript)
a.b.c @ Fluent(BeforeFirstCallOrSubscript)
a.b @ Fluent(BeforeFirstCallOrSubscript)
```
Now, as we descend down from the parent expression, we pass along this
little piece of state and modify it as we go to track where we are. This
state doesn't do anything except when we are in `FirstCallOrSubscript`,
in which case we add a soft line break.
Closes #8598
---------
Co-authored-by: Brent Westbrook <36778786+ntBre@users.noreply.github.com>
This commit is contained in:
@@ -10,6 +10,7 @@ use crate::expression::parentheses::{
|
||||
NeedsParentheses, OptionalParentheses, Parentheses, is_expression_parenthesized,
|
||||
};
|
||||
use crate::prelude::*;
|
||||
use crate::preview::is_fluent_layout_split_first_call_enabled;
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct FormatExprAttribute {
|
||||
@@ -47,20 +48,26 @@ impl FormatNodeRule<ExprAttribute> for FormatExprAttribute {
|
||||
)
|
||||
};
|
||||
|
||||
if call_chain_layout == CallChainLayout::Fluent {
|
||||
if call_chain_layout.is_fluent() {
|
||||
if parenthesize_value {
|
||||
// Don't propagate the call chain layout.
|
||||
value.format().with_options(Parentheses::Always).fmt(f)?;
|
||||
} else {
|
||||
match value.as_ref() {
|
||||
Expr::Attribute(expr) => {
|
||||
expr.format().with_options(call_chain_layout).fmt(f)?;
|
||||
expr.format()
|
||||
.with_options(call_chain_layout.transition_after_attribute())
|
||||
.fmt(f)?;
|
||||
}
|
||||
Expr::Call(expr) => {
|
||||
expr.format().with_options(call_chain_layout).fmt(f)?;
|
||||
expr.format()
|
||||
.with_options(call_chain_layout.transition_after_attribute())
|
||||
.fmt(f)?;
|
||||
}
|
||||
Expr::Subscript(expr) => {
|
||||
expr.format().with_options(call_chain_layout).fmt(f)?;
|
||||
expr.format()
|
||||
.with_options(call_chain_layout.transition_after_attribute())
|
||||
.fmt(f)?;
|
||||
}
|
||||
_ => {
|
||||
value.format().with_options(Parentheses::Never).fmt(f)?;
|
||||
@@ -105,8 +112,30 @@ impl FormatNodeRule<ExprAttribute> for FormatExprAttribute {
|
||||
// Allow the `.` on its own line if this is a fluent call chain
|
||||
// and the value either requires parenthesizing or is a call or subscript expression
|
||||
// (it's a fluent chain but not the first element).
|
||||
else if call_chain_layout == CallChainLayout::Fluent {
|
||||
if parenthesize_value || value.is_call_expr() || value.is_subscript_expr() {
|
||||
//
|
||||
// In preview we also break _at_ the first call in the chain.
|
||||
// For example:
|
||||
//
|
||||
// ```diff
|
||||
// # stable formatting vs. preview
|
||||
// x = (
|
||||
// - df.merge()
|
||||
// + df
|
||||
// + .merge()
|
||||
// .groupby()
|
||||
// .agg()
|
||||
// .filter()
|
||||
// )
|
||||
// ```
|
||||
else if call_chain_layout.is_fluent() {
|
||||
if parenthesize_value
|
||||
|| value.is_call_expr()
|
||||
|| value.is_subscript_expr()
|
||||
// Remember to update the doc-comment above when
|
||||
// stabilizing this behavior.
|
||||
|| (is_fluent_layout_split_first_call_enabled(f.context())
|
||||
&& call_chain_layout.is_first_call_like())
|
||||
{
|
||||
soft_line_break().fmt(f)?;
|
||||
}
|
||||
}
|
||||
@@ -148,8 +177,8 @@ impl FormatNodeRule<ExprAttribute> for FormatExprAttribute {
|
||||
)
|
||||
});
|
||||
|
||||
let is_call_chain_root = self.call_chain_layout == CallChainLayout::Default
|
||||
&& call_chain_layout == CallChainLayout::Fluent;
|
||||
let is_call_chain_root =
|
||||
self.call_chain_layout == CallChainLayout::Default && call_chain_layout.is_fluent();
|
||||
if is_call_chain_root {
|
||||
write!(f, [group(&format_inner)])
|
||||
} else {
|
||||
@@ -169,7 +198,8 @@ impl NeedsParentheses for ExprAttribute {
|
||||
self.into(),
|
||||
context.comments().ranges(),
|
||||
context.source(),
|
||||
) == CallChainLayout::Fluent
|
||||
)
|
||||
.is_fluent()
|
||||
{
|
||||
OptionalParentheses::Multiline
|
||||
} else if context.comments().has_dangling(self) {
|
||||
|
||||
@@ -47,7 +47,10 @@ impl FormatNodeRule<ExprCall> for FormatExprCall {
|
||||
func.format().with_options(Parentheses::Always).fmt(f)
|
||||
} else {
|
||||
match func.as_ref() {
|
||||
Expr::Attribute(expr) => expr.format().with_options(call_chain_layout).fmt(f),
|
||||
Expr::Attribute(expr) => expr
|
||||
.format()
|
||||
.with_options(call_chain_layout.decrement_call_like_count())
|
||||
.fmt(f),
|
||||
Expr::Call(expr) => expr.format().with_options(call_chain_layout).fmt(f),
|
||||
Expr::Subscript(expr) => expr.format().with_options(call_chain_layout).fmt(f),
|
||||
_ => func.format().with_options(Parentheses::Never).fmt(f),
|
||||
@@ -67,9 +70,7 @@ impl FormatNodeRule<ExprCall> for FormatExprCall {
|
||||
// queryset.distinct().order_by(field.name).values_list(field_name_flat_long_long=True)
|
||||
// )
|
||||
// ```
|
||||
if call_chain_layout == CallChainLayout::Fluent
|
||||
&& self.call_chain_layout == CallChainLayout::Default
|
||||
{
|
||||
if call_chain_layout.is_fluent() && self.call_chain_layout == CallChainLayout::Default {
|
||||
group(&fmt_func).fmt(f)
|
||||
} else {
|
||||
fmt_func.fmt(f)
|
||||
@@ -87,7 +88,8 @@ impl NeedsParentheses for ExprCall {
|
||||
self.into(),
|
||||
context.comments().ranges(),
|
||||
context.source(),
|
||||
) == CallChainLayout::Fluent
|
||||
)
|
||||
.is_fluent()
|
||||
{
|
||||
OptionalParentheses::Multiline
|
||||
} else if context.comments().has_dangling(self) {
|
||||
|
||||
@@ -397,7 +397,8 @@ impl Format<PyFormatContext<'_>> for FormatBody<'_> {
|
||||
body.into(),
|
||||
comments.ranges(),
|
||||
f.context().source(),
|
||||
) == CallChainLayout::Fluent
|
||||
)
|
||||
.is_fluent()
|
||||
{
|
||||
parenthesize_if_expands(&unparenthesized).fmt(f)
|
||||
} else {
|
||||
|
||||
@@ -51,7 +51,10 @@ impl FormatNodeRule<ExprSubscript> for FormatExprSubscript {
|
||||
value.format().with_options(Parentheses::Always).fmt(f)
|
||||
} else {
|
||||
match value.as_ref() {
|
||||
Expr::Attribute(expr) => expr.format().with_options(call_chain_layout).fmt(f),
|
||||
Expr::Attribute(expr) => expr
|
||||
.format()
|
||||
.with_options(call_chain_layout.decrement_call_like_count())
|
||||
.fmt(f),
|
||||
Expr::Call(expr) => expr.format().with_options(call_chain_layout).fmt(f),
|
||||
Expr::Subscript(expr) => expr.format().with_options(call_chain_layout).fmt(f),
|
||||
_ => value.format().with_options(Parentheses::Never).fmt(f),
|
||||
@@ -71,8 +74,8 @@ impl FormatNodeRule<ExprSubscript> for FormatExprSubscript {
|
||||
.fmt(f)
|
||||
});
|
||||
|
||||
let is_call_chain_root = self.call_chain_layout == CallChainLayout::Default
|
||||
&& call_chain_layout == CallChainLayout::Fluent;
|
||||
let is_call_chain_root =
|
||||
self.call_chain_layout == CallChainLayout::Default && call_chain_layout.is_fluent();
|
||||
if is_call_chain_root {
|
||||
write!(f, [group(&format_inner)])
|
||||
} else {
|
||||
@@ -92,7 +95,8 @@ impl NeedsParentheses for ExprSubscript {
|
||||
self.into(),
|
||||
context.comments().ranges(),
|
||||
context.source(),
|
||||
) == CallChainLayout::Fluent
|
||||
)
|
||||
.is_fluent()
|
||||
{
|
||||
OptionalParentheses::Multiline
|
||||
} else if is_expression_parenthesized(
|
||||
|
||||
@@ -876,6 +876,22 @@ impl<'a> First<'a> {
|
||||
/// )
|
||||
/// ).all()
|
||||
/// ```
|
||||
///
|
||||
/// In [`preview`](crate::preview::is_fluent_layout_split_first_call_enabled), we also track the position of the leftmost call or
|
||||
/// subscript on an attribute in the chain and break just before the dot.
|
||||
///
|
||||
/// So, for example, the right-hand summand in the above expression
|
||||
/// would get formatted as:
|
||||
/// ```python
|
||||
/// Blog.objects
|
||||
/// .filter(
|
||||
/// entry__headline__contains="McCartney",
|
||||
/// )
|
||||
/// .limit_results[:10]
|
||||
/// .filter(
|
||||
/// entry__pub_date__year=2010,
|
||||
/// )
|
||||
/// ```
|
||||
#[derive(Copy, Clone, Debug, Default, PartialEq, Eq)]
|
||||
pub enum CallChainLayout {
|
||||
/// The root of a call chain
|
||||
@@ -883,19 +899,149 @@ pub enum CallChainLayout {
|
||||
Default,
|
||||
|
||||
/// A nested call chain element that uses fluent style.
|
||||
Fluent,
|
||||
Fluent(AttributeState),
|
||||
|
||||
/// A nested call chain element not using fluent style.
|
||||
NonFluent,
|
||||
}
|
||||
|
||||
/// Records information about the current position within
|
||||
/// a call chain.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub enum AttributeState {
|
||||
/// Stores the number of calls or subscripts
|
||||
/// to the left of the current position in a chain.
|
||||
///
|
||||
/// Consecutive calls/subscripts on a single
|
||||
/// object only count once. For example, if we are at
|
||||
/// `c` in `a.b()[0]()().c()` then this number would be 1.
|
||||
///
|
||||
/// Caveat: If the root of the chain is parenthesized,
|
||||
/// it contributes +1 to this count, even if it is not
|
||||
/// a call or subscript. But the name
|
||||
/// `CallLikeOrParenthesizedRootPreceding`
|
||||
/// is a tad unwieldy, and this also rarely occurs.
|
||||
CallLikePreceding(u32),
|
||||
/// Indicates that we are at the first called or
|
||||
/// subscripted object in the chain
|
||||
///
|
||||
/// For example, if we are at `b` in `a.b()[0]()().c()`
|
||||
FirstCallLike,
|
||||
/// Indicates that we are to the left of the first
|
||||
/// called or subscripted object in the chain, and therefore
|
||||
/// need not break.
|
||||
///
|
||||
/// For example, if we are at `a` in `a.b()[0]()().c()`
|
||||
BeforeFirstCallLike,
|
||||
}
|
||||
|
||||
impl CallChainLayout {
|
||||
/// Returns new state decreasing count of remaining calls/subscripts
|
||||
/// to traverse, or the state `FirstCallOrSubscript`, as appropriate.
|
||||
#[must_use]
|
||||
pub(crate) fn decrement_call_like_count(self) -> Self {
|
||||
match self {
|
||||
Self::Fluent(AttributeState::CallLikePreceding(x)) => {
|
||||
if x > 1 {
|
||||
// Recall that we traverse call chains from right to
|
||||
// left. So after moving from a call/subscript into
|
||||
// an attribute, we _decrease_ the count of
|
||||
// _remaining_ calls or subscripts to the left of our
|
||||
// current position.
|
||||
Self::Fluent(AttributeState::CallLikePreceding(x - 1))
|
||||
} else {
|
||||
Self::Fluent(AttributeState::FirstCallLike)
|
||||
}
|
||||
}
|
||||
_ => self,
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns with state change
|
||||
/// `FirstCallOrSubscript` -> `BeforeFirstCallOrSubscript`
|
||||
/// and otherwise returns unchanged.
|
||||
#[must_use]
|
||||
pub(crate) fn transition_after_attribute(self) -> Self {
|
||||
match self {
|
||||
Self::Fluent(AttributeState::FirstCallLike) => {
|
||||
Self::Fluent(AttributeState::BeforeFirstCallLike)
|
||||
}
|
||||
_ => self,
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn is_first_call_like(self) -> bool {
|
||||
matches!(self, Self::Fluent(AttributeState::FirstCallLike))
|
||||
}
|
||||
|
||||
/// Returns either `Fluent` or `NonFluent` depending on a
|
||||
/// heuristic computed for the whole chain.
|
||||
///
|
||||
/// Explicitly, the criterion to return `Fluent` is
|
||||
/// as follows:
|
||||
///
|
||||
/// 1. Beginning from the right (i.e. the `expr` itself),
|
||||
/// traverse inwards past calls, subscripts, and attribute
|
||||
/// expressions until we meet the first expression that is
|
||||
/// either none of these or else is parenthesized. This will
|
||||
/// be the _root_ of the call chain.
|
||||
/// 2. Count the number of _attribute values_ that are _called
|
||||
/// or subscripted_ in the chain (note that this includes the
|
||||
/// root but excludes the rightmost attribute in the chain since
|
||||
/// it is not the _value_ of some attribute).
|
||||
/// 3. If the root is parenthesized, add 1 to that value.
|
||||
/// 4. If the total is at least 2, return `Fluent`. Otherwise
|
||||
/// return `NonFluent`
|
||||
pub(crate) fn from_expression(
|
||||
mut expr: ExprRef,
|
||||
comment_ranges: &CommentRanges,
|
||||
source: &str,
|
||||
) -> Self {
|
||||
let mut attributes_after_parentheses = 0;
|
||||
// TODO(dylan): Once the fluent layout preview style is
|
||||
// stabilized, see if it is possible to simplify some of
|
||||
// the logic around parenthesized roots. (While supporting
|
||||
// both styles it is more difficult to do this.)
|
||||
|
||||
// Count of attribute _values_ which are called or
|
||||
// subscripted, after the leftmost parenthesized
|
||||
// value.
|
||||
//
|
||||
// Examples:
|
||||
// ```
|
||||
// # Count of 3 - notice that .d()
|
||||
// # does not contribute
|
||||
// a().b().c[0]()().d()
|
||||
// # Count of 2 - notice that a()
|
||||
// # does not contribute
|
||||
// (a()).b().c[0].d
|
||||
// ```
|
||||
let mut computed_attribute_values_after_parentheses = 0;
|
||||
|
||||
// Similar to the above, but instead looks at all calls
|
||||
// and subscripts rather than looking only at those on
|
||||
// _attribute values_. So this count can differ from the
|
||||
// above.
|
||||
//
|
||||
// Examples of `computed_attribute_values_after_parentheses` vs
|
||||
// `call_like_count`:
|
||||
//
|
||||
// a().b ---> 1 vs 1
|
||||
// a.b().c --> 1 vs 1
|
||||
// a.b() ---> 0 vs 1
|
||||
let mut call_like_count = 0;
|
||||
|
||||
// Going from right to left, we traverse calls, subscripts,
|
||||
// and attributes until we get to an expression of a different
|
||||
// kind _or_ to a parenthesized expression. This records
|
||||
// the case where we end the traversal at a parenthesized expression.
|
||||
//
|
||||
// In these cases, the inferred semantics of the chain are different.
|
||||
// We interpret this as the user indicating:
|
||||
// "this parenthesized value is the object of interest and we are
|
||||
// doing transformations on it". This increases our confidence that
|
||||
// this should be fluently formatted, and also means we should make
|
||||
// our first break after this value.
|
||||
let mut root_value_parenthesized = false;
|
||||
loop {
|
||||
match expr {
|
||||
ExprRef::Attribute(ast::ExprAttribute { value, .. }) => {
|
||||
@@ -907,10 +1053,10 @@ impl CallChainLayout {
|
||||
// ```
|
||||
if is_expression_parenthesized(value.into(), comment_ranges, source) {
|
||||
// `(a).b`. We preserve these parentheses so don't recurse
|
||||
attributes_after_parentheses += 1;
|
||||
root_value_parenthesized = true;
|
||||
break;
|
||||
} else if matches!(value.as_ref(), Expr::Call(_) | Expr::Subscript(_)) {
|
||||
attributes_after_parentheses += 1;
|
||||
computed_attribute_values_after_parentheses += 1;
|
||||
}
|
||||
|
||||
expr = ExprRef::from(value.as_ref());
|
||||
@@ -925,31 +1071,68 @@ impl CallChainLayout {
|
||||
// ```
|
||||
ExprRef::Call(ast::ExprCall { func: inner, .. })
|
||||
| ExprRef::Subscript(ast::ExprSubscript { value: inner, .. }) => {
|
||||
// We preserve these parentheses so don't recurse
|
||||
// e.g. (a)[0].x().y().z()
|
||||
// ^stop here
|
||||
if is_expression_parenthesized(inner.into(), comment_ranges, source) {
|
||||
break;
|
||||
}
|
||||
|
||||
// Accumulate the `call_like_count`, but we only
|
||||
// want to count things like `a()[0]()()` once.
|
||||
if !inner.is_call_expr() && !inner.is_subscript_expr() {
|
||||
call_like_count += 1;
|
||||
}
|
||||
|
||||
expr = ExprRef::from(inner.as_ref());
|
||||
}
|
||||
_ => {
|
||||
// We to format the following in fluent style:
|
||||
// ```
|
||||
// f2 = (a).w().t(1,)
|
||||
// ^ expr
|
||||
// ```
|
||||
if is_expression_parenthesized(expr, comment_ranges, source) {
|
||||
attributes_after_parentheses += 1;
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// We preserve these parentheses so don't recurse
|
||||
if is_expression_parenthesized(expr, comment_ranges, source) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if attributes_after_parentheses < 2 {
|
||||
|
||||
if computed_attribute_values_after_parentheses + u32::from(root_value_parenthesized) < 2 {
|
||||
CallChainLayout::NonFluent
|
||||
} else {
|
||||
CallChainLayout::Fluent
|
||||
CallChainLayout::Fluent(AttributeState::CallLikePreceding(
|
||||
// We count a parenthesized root value as an extra
|
||||
// call for the purposes of tracking state.
|
||||
//
|
||||
// The reason is that, in this case, we want the first
|
||||
// "special" break to happen right after the root, as
|
||||
// opposed to right after the first called/subscripted
|
||||
// attribute.
|
||||
//
|
||||
// For example:
|
||||
//
|
||||
// ```
|
||||
// (object_of_interest)
|
||||
// .data.filter()
|
||||
// .agg()
|
||||
// .etc()
|
||||
// ```
|
||||
//
|
||||
// instead of (in preview):
|
||||
//
|
||||
// ```
|
||||
// (object_of_interest)
|
||||
// .data
|
||||
// .filter()
|
||||
// .etc()
|
||||
// ```
|
||||
//
|
||||
// For comparison, if we didn't have parentheses around
|
||||
// the root, we want (and get, in preview):
|
||||
//
|
||||
// ```
|
||||
// object_of_interest.data
|
||||
// .filter()
|
||||
// .agg()
|
||||
// .etc()
|
||||
// ```
|
||||
call_like_count + u32::from(root_value_parenthesized),
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -972,9 +1155,13 @@ impl CallChainLayout {
|
||||
CallChainLayout::NonFluent
|
||||
}
|
||||
}
|
||||
layout @ (CallChainLayout::Fluent | CallChainLayout::NonFluent) => layout,
|
||||
layout @ (CallChainLayout::Fluent(_) | CallChainLayout::NonFluent) => layout,
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn is_fluent(self) -> bool {
|
||||
matches!(self, CallChainLayout::Fluent(_))
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
|
||||
|
||||
@@ -59,3 +59,10 @@ pub(crate) const fn is_avoid_parens_for_long_as_captures_enabled(
|
||||
pub(crate) const fn is_parenthesize_lambda_bodies_enabled(context: &PyFormatContext) -> bool {
|
||||
context.is_preview()
|
||||
}
|
||||
|
||||
/// Returns `true` if the
|
||||
/// [`fluent_layout_split_first_call`](https://github.com/astral-sh/ruff/pull/21369) preview
|
||||
/// style is enabled.
|
||||
pub(crate) const fn is_fluent_layout_split_first_call_enabled(context: &PyFormatContext) -> bool {
|
||||
context.is_preview()
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user