From e07670ad97a9b5952b7d48b8ee2807b40045f28e Mon Sep 17 00:00:00 2001 From: Charlie Marsh Date: Tue, 19 Sep 2023 15:17:21 -0400 Subject: [PATCH] Add dangling comment handling to dictionary key-value pairs (#7495) ## Summary This PR fixes a formatting instability by changing the comment handling around the `:` in a dictionary to mirror that of the `:` in a lambda: we treat comments around the `:` as dangling, then format them after the `:`. Closes https://github.com/astral-sh/ruff/issues/7458. ## Test Plan `cargo test` No change in similarity. Before: | project | similarity index | total files | changed files | |--------------|------------------:|------------------:|------------------:| | cpython | 0.76083 | 1789 | 1631 | | django | 0.99983 | 2760 | 36 | | transformers | 0.99956 | 2587 | 404 | | twine | 1.00000 | 33 | 0 | | typeshed | 0.99983 | 3496 | 18 | | warehouse | 0.99929 | 648 | 16 | | zulip | 0.99969 | 1437 | 21 | After: | project | similarity index | total files | changed files | |--------------|------------------:|------------------:|------------------:| | cpython | 0.76083 | 1789 | 1631 | | django | 0.99983 | 2760 | 36 | | transformers | 0.99956 | 2587 | 404 | | twine | 1.00000 | 33 | 0 | | typeshed | 0.99983 | 3496 | 18 | | warehouse | 0.99929 | 648 | 16 | | zulip | 0.99969 | 1437 | 21 | --- .../test/fixtures/ruff/expression/dict.py | 73 ++++++++ .../fixtures/ruff/expression/dict_comp.py | 68 ++++++++ .../src/comments/placement.rs | 158 ++++++++++++------ .../src/expression/expr_dict.rs | 63 +++++-- .../src/expression/expr_dict_comp.rs | 48 ++++-- .../snapshots/format@expression__dict.py.snap | 122 +++++++++++++- .../format@expression__dict_comp.py.snap | 112 +++++++++++++ 7 files changed, 567 insertions(+), 77 deletions(-) diff --git a/crates/ruff_python_formatter/resources/test/fixtures/ruff/expression/dict.py b/crates/ruff_python_formatter/resources/test/fixtures/ruff/expression/dict.py index a9a527ec4e..f1fcf56c54 100644 --- a/crates/ruff_python_formatter/resources/test/fixtures/ruff/expression/dict.py +++ b/crates/ruff_python_formatter/resources/test/fixtures/ruff/expression/dict.py @@ -59,3 +59,76 @@ a = { x={ # dangling end of line comment } + +# Comments between keys and values. +query = { + "must": + # queries => map(pluck("fragment")) => flatten() + [ + clause + for kf_pair in queries + for clause in kf_pair["fragment"] + ], +} + +query = { + "must": # queries => map(pluck("fragment")) => flatten() + [ + clause + for kf_pair in queries + for clause in kf_pair["fragment"] + ], +} + +query = { + "must": ( + # queries => map(pluck("fragment")) => flatten() + [ + clause + for kf_pair in queries + for clause in kf_pair["fragment"] + ] + ) +} + +query = { + "must": ( # queries => map(pluck("fragment")) => flatten() + [ + clause + for kf_pair in queries + for clause in kf_pair["fragment"] + ] + ) +} + +query = { + "must": ( + # queries => map(pluck("fragment")) => flatten() + [ + clause + for kf_pair in queries + for clause in kf_pair["fragment"] + ] + ) +} + +query = { + ( + "must" # queries => map(pluck("fragment")) => flatten() + ) : [ + clause + for kf_pair in queries + for clause in kf_pair["fragment"] + ] +} + +query = { + ( + "must" + # queries => map(pluck("fragment")) => flatten() + ) : [ + clause + for kf_pair in queries + for clause in kf_pair["fragment"] + ] +} diff --git a/crates/ruff_python_formatter/resources/test/fixtures/ruff/expression/dict_comp.py b/crates/ruff_python_formatter/resources/test/fixtures/ruff/expression/dict_comp.py index 2f6f9a3c48..d7d631b138 100644 --- a/crates/ruff_python_formatter/resources/test/fixtures/ruff/expression/dict_comp.py +++ b/crates/ruff_python_formatter/resources/test/fixtures/ruff/expression/dict_comp.py @@ -109,3 +109,71 @@ a = { aaaaaaaaaaaaaaaaaaaaa = { k: o for o in self.registry.values if o.__class__ is not ModelAdmin } + +# Comments between keys and values. +query = { + key: + # queries => map(pluck("fragment")) => flatten() + [ + clause + for kf_pair in queries + for clause in kf_pair["fragment"] + ] + for key, queries in self._filters.items() +} + +query = { + key: # queries => map(pluck("fragment")) => flatten() + [ + clause + for kf_pair in queries + for clause in kf_pair["fragment"] + ] + for key, queries in self._filters.items() +} + +query = { + key: ( + # queries => map(pluck("fragment")) => flatten() + [ + clause + for kf_pair in queries + for clause in kf_pair["fragment"] + ] + ) + for key, queries in self._filters.items() +} + +query = { + key: ( # queries => map(pluck("fragment")) => flatten() + [ + clause + for kf_pair in queries + for clause in kf_pair["fragment"] + ] + ) + for key, queries in self._filters.items() +} + +query = { + ( + key + # queries => map(pluck("fragment")) => flatten() + ): [ + clause + for kf_pair in queries + for clause in kf_pair["fragment"] + ] + for key, queries in self._filters.items() +} + +query = { + ( + key # queries => map(pluck("fragment")) => flatten() + ): [ + clause + for kf_pair in queries + for clause in kf_pair["fragment"] + ] + for key, queries in self._filters.items() +} diff --git a/crates/ruff_python_formatter/src/comments/placement.rs b/crates/ruff_python_formatter/src/comments/placement.rs index dad69e7779..af762ce921 100644 --- a/crates/ruff_python_formatter/src/comments/placement.rs +++ b/crates/ruff_python_formatter/src/comments/placement.rs @@ -121,51 +121,67 @@ fn handle_parenthesized_comment<'a>( // // For now, we _can_ assert, but to do so, we stop lexing when we hit a token that precedes an // identifier. - if comment.line_position().is_end_of_line() { - let range = TextRange::new(preceding.end(), comment.start()); - let tokenizer = SimpleTokenizer::new(locator.contents(), range); - if tokenizer - .skip_trivia() - .take_while(|token| { - !matches!( - token.kind, - SimpleTokenKind::As | SimpleTokenKind::Def | SimpleTokenKind::Class - ) - }) - .any(|token| { - debug_assert!( - !matches!(token.kind, SimpleTokenKind::Bogus), - "Unexpected token between nodes: `{:?}`", - locator.slice(range) - ); - token.kind() == SimpleTokenKind::LParen - }) - { - return CommentPlacement::leading(following, comment); - } - } else { - let range = TextRange::new(comment.end(), following.start()); - let tokenizer = SimpleTokenizer::new(locator.contents(), range); - if tokenizer - .skip_trivia() - .take_while(|token| { - !matches!( - token.kind, - SimpleTokenKind::As | SimpleTokenKind::Def | SimpleTokenKind::Class - ) - }) - .any(|token| { - debug_assert!( - !matches!(token.kind, SimpleTokenKind::Bogus), - "Unexpected token between nodes: `{:?}`", - locator.slice(range) - ); - token.kind() == SimpleTokenKind::RParen - }) - { - return CommentPlacement::trailing(preceding, comment); - } + // Search for comments that to the right of a parenthesized node, e.g.: + // ```python + // [ + // x # comment, + // ( + // y, + // ), + // ] + // ``` + let range = TextRange::new(preceding.end(), comment.start()); + let tokenizer = SimpleTokenizer::new(locator.contents(), range); + if tokenizer + .skip_trivia() + .take_while(|token| { + !matches!( + token.kind, + SimpleTokenKind::As | SimpleTokenKind::Def | SimpleTokenKind::Class + ) + }) + .any(|token| { + debug_assert!( + !matches!(token.kind, SimpleTokenKind::Bogus), + "Unexpected token between nodes: `{:?}`", + locator.slice(range) + ); + token.kind() == SimpleTokenKind::LParen + }) + { + return CommentPlacement::leading(following, comment); + } + + // Search for comments that to the right of a parenthesized node, e.g.: + // ```python + // [ + // ( + // x # comment, + // ), + // y + // ] + // ``` + let range = TextRange::new(comment.end(), following.start()); + let tokenizer = SimpleTokenizer::new(locator.contents(), range); + if tokenizer + .skip_trivia() + .take_while(|token| { + !matches!( + token.kind, + SimpleTokenKind::As | SimpleTokenKind::Def | SimpleTokenKind::Class + ) + }) + .any(|token| { + debug_assert!( + !matches!(token.kind, SimpleTokenKind::Bogus), + "Unexpected token between nodes: `{:?}`", + locator.slice(range) + ); + token.kind() == SimpleTokenKind::RParen + }) + { + return CommentPlacement::trailing(preceding, comment); } CommentPlacement::Default(comment) @@ -214,6 +230,9 @@ fn handle_enclosed_comment<'a>( AnyNodeRef::ExprUnaryOp(unary_op) => handle_unary_op_comment(comment, unary_op, locator), AnyNodeRef::ExprNamedExpr(_) => handle_named_expr_comment(comment, locator), AnyNodeRef::ExprDict(_) => handle_dict_unpacking_comment(comment, locator) + .or_else(|comment| handle_bracketed_end_of_line_comment(comment, locator)) + .or_else(|comment| handle_key_value_comment(comment, locator)), + AnyNodeRef::ExprDictComp(_) => handle_key_value_comment(comment, locator) .or_else(|comment| handle_bracketed_end_of_line_comment(comment, locator)), AnyNodeRef::ExprIfExp(expr_if) => handle_expr_if_comment(comment, expr_if, locator), AnyNodeRef::ExprSlice(expr_slice) => { @@ -272,8 +291,7 @@ fn handle_enclosed_comment<'a>( | AnyNodeRef::ExprSet(_) | AnyNodeRef::ExprGeneratorExp(_) | AnyNodeRef::ExprListComp(_) - | AnyNodeRef::ExprSetComp(_) - | AnyNodeRef::ExprDictComp(_) => handle_bracketed_end_of_line_comment(comment, locator), + | AnyNodeRef::ExprSetComp(_) => handle_bracketed_end_of_line_comment(comment, locator), AnyNodeRef::ExprTuple(tuple) if is_tuple_parenthesized(tuple, locator.contents()) => { handle_bracketed_end_of_line_comment(comment, locator) } @@ -1207,7 +1225,7 @@ fn handle_dict_unpacking_comment<'a>( .skip_while(|token| token.kind == SimpleTokenKind::RParen); // if the remaining tokens from the previous node are exactly `**`, - // re-assign the comment to the one that follows the stars + // re-assign the comment to the one that follows the stars. if tokens.any(|token| token.kind == SimpleTokenKind::DoubleStar) { CommentPlacement::leading(following, comment) } else { @@ -1215,6 +1233,52 @@ fn handle_dict_unpacking_comment<'a>( } } +/// Handles comments around the `:` in a key-value pair: +/// +/// ```python +/// { +/// key # dangling +/// : # dangling +/// # dangling +/// value +/// } +/// ``` +fn handle_key_value_comment<'a>( + comment: DecoratedComment<'a>, + locator: &Locator, +) -> CommentPlacement<'a> { + debug_assert!(matches!( + comment.enclosing_node(), + AnyNodeRef::ExprDict(_) | AnyNodeRef::ExprDictComp(_) + )); + + let (Some(following), Some(preceding)) = (comment.following_node(), comment.preceding_node()) + else { + return CommentPlacement::Default(comment); + }; + + // Ensure that the comment is between the key and the value by detecting the colon: + // ```python + // { + // key # comment + // : value + // } + // ``` + // This prevents against detecting comments on starred expressions as key-value comments. + let tokens = SimpleTokenizer::new( + locator.contents(), + TextRange::new(preceding.end(), following.start()), + ); + if tokens + .skip_trivia() + .any(|token| token.kind == SimpleTokenKind::Colon) + { + CommentPlacement::dangling(comment.enclosing_node(), comment) + } else { + CommentPlacement::Default(comment) + } +} + /// Handle comments between a function call and its arguments. For example, attach the following as /// dangling on the call: /// ```python diff --git a/crates/ruff_python_formatter/src/expression/expr_dict.rs b/crates/ruff_python_formatter/src/expression/expr_dict.rs index 1f2878c9b1..351505fe3a 100644 --- a/crates/ruff_python_formatter/src/expression/expr_dict.rs +++ b/crates/ruff_python_formatter/src/expression/expr_dict.rs @@ -3,7 +3,7 @@ use ruff_python_ast::node::AnyNodeRef; use ruff_python_ast::{Expr, ExprDict}; use ruff_text_size::{Ranged, TextRange}; -use crate::comments::{leading_comments, SourceComment}; +use crate::comments::{dangling_comments, leading_comments, SourceComment}; use crate::expression::parentheses::{ empty_parenthesized, parenthesized, NeedsParentheses, OptionalParentheses, }; @@ -25,15 +25,35 @@ impl FormatNodeRule for FormatExprDict { let comments = f.context().comments().clone(); let dangling = comments.dangling(item); - if values.is_empty() { + let (Some(key), Some(value)) = (keys.first(), values.first()) else { return empty_parenthesized("{", dangling, "}").fmt(f); - } + }; + + // Dangling comments can either appear after the open bracket, or around the key-value + // pairs: + // ```python + // { # open_parenthesis_comments + // x: # key_value_comments + // y + // } + // ``` + let (open_parenthesis_comments, key_value_comments) = dangling.split_at( + dangling + .partition_point(|comment| comment.end() < KeyValuePair::new(key, value).start()), + ); let format_pairs = format_with(|f| { let mut joiner = f.join_comma_separated(item.end()); + let mut key_value_comments = key_value_comments; for (key, value) in keys.iter().zip(values) { - let key_value_pair = KeyValuePair { key, value }; + let mut key_value_pair = KeyValuePair::new(key, value); + + let partition = key_value_comments + .partition_point(|comment| comment.start() < key_value_pair.end()); + key_value_pair = key_value_pair.with_comments(&key_value_comments[..partition]); + key_value_comments = &key_value_comments[partition..]; + joiner.entry(&key_value_pair, &key_value_pair); } @@ -41,7 +61,7 @@ impl FormatNodeRule for FormatExprDict { }); parenthesized("{", &format_pairs, "}") - .with_dangling_comments(dangling) + .with_dangling_comments(open_parenthesis_comments) .fmt(f) } @@ -69,6 +89,21 @@ impl NeedsParentheses for ExprDict { struct KeyValuePair<'a> { key: &'a Option, value: &'a Expr, + comments: &'a [SourceComment], +} + +impl<'a> KeyValuePair<'a> { + fn new(key: &'a Option, value: &'a Expr) -> Self { + Self { + key, + value, + comments: &[], + } + } + + fn with_comments(self, comments: &'a [SourceComment]) -> Self { + Self { comments, ..self } + } } impl Ranged for KeyValuePair<'_> { @@ -86,12 +121,18 @@ impl Format> for KeyValuePair<'_> { if let Some(key) = self.key { write!( f, - [group(&format_args![ - key.format(), - token(":"), - space(), - self.value.format() - ])] + [group(&format_with(|f| { + key.format().fmt(f)?; + token(":").fmt(f)?; + + if self.comments.is_empty() { + space().fmt(f)?; + } else { + dangling_comments(self.comments).fmt(f)?; + } + + self.value.format().fmt(f) + }))] ) } else { // TODO(charlie): Make these dangling comments on the `ExprDict`, and identify them diff --git a/crates/ruff_python_formatter/src/expression/expr_dict_comp.rs b/crates/ruff_python_formatter/src/expression/expr_dict_comp.rs index 4fcbbe35e9..33c3450af8 100644 --- a/crates/ruff_python_formatter/src/expression/expr_dict_comp.rs +++ b/crates/ruff_python_formatter/src/expression/expr_dict_comp.rs @@ -1,10 +1,9 @@ -use ruff_formatter::prelude::{ - format_args, format_with, group, soft_line_break_or_space, space, token, -}; use ruff_formatter::write; use ruff_python_ast::node::AnyNodeRef; use ruff_python_ast::ExprDictComp; +use ruff_text_size::Ranged; +use crate::comments::dangling_comments; use crate::expression::parentheses::{parenthesized, NeedsParentheses, OptionalParentheses}; use crate::prelude::*; @@ -20,30 +19,43 @@ impl FormatNodeRule for FormatExprDictComp { generators, } = item; - let joined = format_with(|f| { - f.join_with(soft_line_break_or_space()) - .entries(generators.iter().formatted()) - .finish() - }); - let comments = f.context().comments().clone(); let dangling = comments.dangling(item); + // Dangling comments can either appear after the open bracket, or around the key-value + // pairs: + // ```python + // { # open_parenthesis_comments + // x: # key_value_comments + // y + // for (x, y) in z + // } + // ``` + let (open_parenthesis_comments, key_value_comments) = + dangling.split_at(dangling.partition_point(|comment| comment.end() < key.start())); + write!( f, [parenthesized( "{", - &group(&format_args!( - group(&key.format()), - token(":"), - space(), - value.format(), - soft_line_break_or_space(), - joined - )), + &group(&format_with(|f| { + write!(f, [group(&key.format()), token(":")])?; + + if key_value_comments.is_empty() { + space().fmt(f)?; + } else { + dangling_comments(key_value_comments).fmt(f)?; + } + + write!(f, [value.format(), soft_line_break_or_space()])?; + + f.join_with(soft_line_break_or_space()) + .entries(generators.iter().formatted()) + .finish() + })), "}" ) - .with_dangling_comments(dangling)] + .with_dangling_comments(open_parenthesis_comments)] ) } } diff --git a/crates/ruff_python_formatter/tests/snapshots/format@expression__dict.py.snap b/crates/ruff_python_formatter/tests/snapshots/format@expression__dict.py.snap index a1e535b1c2..3745b5bdce 100644 --- a/crates/ruff_python_formatter/tests/snapshots/format@expression__dict.py.snap +++ b/crates/ruff_python_formatter/tests/snapshots/format@expression__dict.py.snap @@ -65,13 +65,88 @@ a = { x={ # dangling end of line comment } + +# Comments between keys and values. +query = { + "must": + # queries => map(pluck("fragment")) => flatten() + [ + clause + for kf_pair in queries + for clause in kf_pair["fragment"] + ], +} + +query = { + "must": # queries => map(pluck("fragment")) => flatten() + [ + clause + for kf_pair in queries + for clause in kf_pair["fragment"] + ], +} + +query = { + "must": ( + # queries => map(pluck("fragment")) => flatten() + [ + clause + for kf_pair in queries + for clause in kf_pair["fragment"] + ] + ) +} + +query = { + "must": ( # queries => map(pluck("fragment")) => flatten() + [ + clause + for kf_pair in queries + for clause in kf_pair["fragment"] + ] + ) +} + +query = { + "must": ( + # queries => map(pluck("fragment")) => flatten() + [ + clause + for kf_pair in queries + for clause in kf_pair["fragment"] + ] + ) +} + +query = { + ( + "must" # queries => map(pluck("fragment")) => flatten() + ) : [ + clause + for kf_pair in queries + for clause in kf_pair["fragment"] + ] +} + +query = { + ( + "must" + # queries => map(pluck("fragment")) => flatten() + ) : [ + clause + for kf_pair in queries + for clause in kf_pair["fragment"] + ] +} ``` ## Output ```py # before { # open - key: value # key # colon # value + key: # key + # colon + value # value } # close # after @@ -135,6 +210,51 @@ a = { x = { # dangling end of line comment } + +# Comments between keys and values. +query = { + "must": + # queries => map(pluck("fragment")) => flatten() + [clause for kf_pair in queries for clause in kf_pair["fragment"]], +} + +query = { + "must": # queries => map(pluck("fragment")) => flatten() + [clause for kf_pair in queries for clause in kf_pair["fragment"]], +} + +query = { + "must": ( + # queries => map(pluck("fragment")) => flatten() + [clause for kf_pair in queries for clause in kf_pair["fragment"]] + ) +} + +query = { + "must": ( # queries => map(pluck("fragment")) => flatten() + [clause for kf_pair in queries for clause in kf_pair["fragment"]] + ) +} + +query = { + "must": ( + # queries => map(pluck("fragment")) => flatten() + [clause for kf_pair in queries for clause in kf_pair["fragment"]] + ) +} + +query = { + ( + "must" # queries => map(pluck("fragment")) => flatten() + ): [clause for kf_pair in queries for clause in kf_pair["fragment"]] +} + +query = { + ( + "must" + # queries => map(pluck("fragment")) => flatten() + ): [clause for kf_pair in queries for clause in kf_pair["fragment"]] +} ``` diff --git a/crates/ruff_python_formatter/tests/snapshots/format@expression__dict_comp.py.snap b/crates/ruff_python_formatter/tests/snapshots/format@expression__dict_comp.py.snap index 4d725fc27c..df00b778e8 100644 --- a/crates/ruff_python_formatter/tests/snapshots/format@expression__dict_comp.py.snap +++ b/crates/ruff_python_formatter/tests/snapshots/format@expression__dict_comp.py.snap @@ -115,6 +115,74 @@ a = { aaaaaaaaaaaaaaaaaaaaa = { k: o for o in self.registry.values if o.__class__ is not ModelAdmin } + +# Comments between keys and values. +query = { + key: + # queries => map(pluck("fragment")) => flatten() + [ + clause + for kf_pair in queries + for clause in kf_pair["fragment"] + ] + for key, queries in self._filters.items() +} + +query = { + key: # queries => map(pluck("fragment")) => flatten() + [ + clause + for kf_pair in queries + for clause in kf_pair["fragment"] + ] + for key, queries in self._filters.items() +} + +query = { + key: ( + # queries => map(pluck("fragment")) => flatten() + [ + clause + for kf_pair in queries + for clause in kf_pair["fragment"] + ] + ) + for key, queries in self._filters.items() +} + +query = { + key: ( # queries => map(pluck("fragment")) => flatten() + [ + clause + for kf_pair in queries + for clause in kf_pair["fragment"] + ] + ) + for key, queries in self._filters.items() +} + +query = { + ( + key + # queries => map(pluck("fragment")) => flatten() + ): [ + clause + for kf_pair in queries + for clause in kf_pair["fragment"] + ] + for key, queries in self._filters.items() +} + +query = { + ( + key # queries => map(pluck("fragment")) => flatten() + ): [ + clause + for kf_pair in queries + for clause in kf_pair["fragment"] + ] + for key, queries in self._filters.items() +} ``` ## Output @@ -290,6 +358,50 @@ a = { aaaaaaaaaaaaaaaaaaaaa = { k: o for o in self.registry.values if o.__class__ is not ModelAdmin } + +# Comments between keys and values. +query = { + key: + # queries => map(pluck("fragment")) => flatten() + [clause for kf_pair in queries for clause in kf_pair["fragment"]] + for key, queries in self._filters.items() +} + +query = { + key: # queries => map(pluck("fragment")) => flatten() + [clause for kf_pair in queries for clause in kf_pair["fragment"]] + for key, queries in self._filters.items() +} + +query = { + key: ( + # queries => map(pluck("fragment")) => flatten() + [clause for kf_pair in queries for clause in kf_pair["fragment"]] + ) + for key, queries in self._filters.items() +} + +query = { + key: ( # queries => map(pluck("fragment")) => flatten() + [clause for kf_pair in queries for clause in kf_pair["fragment"]] + ) + for key, queries in self._filters.items() +} + +query = { + ( + key + # queries => map(pluck("fragment")) => flatten() + ): [clause for kf_pair in queries for clause in kf_pair["fragment"]] + for key, queries in self._filters.items() +} + +query = { + ( + key # queries => map(pluck("fragment")) => flatten() + ): [clause for kf_pair in queries for clause in kf_pair["fragment"]] + for key, queries in self._filters.items() +} ```