Files
ruff/crates/ruff_python_formatter/src/context.rs
Brent Westbrook 0115fd3757 Avoid reusing nested, interpolated quotes before Python 3.12 (#20930)
## Summary

Fixes #20774 by tracking whether an `InterpolatedStringState` element is
nested inside of another interpolated element. This feels like kind of a
naive fix, so I'm welcome to other ideas. But it resolves the problem in
the issue and clears up the syntax error in the black compatibility
test, without affecting many other cases.

The other affected case is actually interesting too because the
[input](96b156303b/crates/ruff_python_formatter/resources/test/fixtures/ruff/expression/fstring.py (L707))
is invalid, but the previous quote selection fixed the invalid syntax:

```pycon
Python 3.11.13 (main, Sep  2 2025, 14:20:25) [Clang 20.1.4 ] on linux
Type "help", "copyright", "credits" or "license" for more information.
>>> f'{1: abcd "{'aa'}" }'  # input
  File "<stdin>", line 1
    f'{1: abcd "{'aa'}" }'
                  ^^
SyntaxError: f-string: expecting '}'
>>> f'{1: abcd "{"aa"}" }'  # old output
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
ValueError: Invalid format specifier ' abcd "aa" ' for object of type 'int'
>>> f'{1: abcd "{'aa'}" }'  # new output
  File "<stdin>", line 1
    f'{1: abcd "{'aa'}" }'
                  ^^
SyntaxError: f-string: expecting '}'
```

We now preserve the invalid syntax in the input.

Unfortunately, this also seems to be another edge case I didn't consider
in https://github.com/astral-sh/ruff/pull/20867 because we don't flag
this as a syntax error after 0.14.1:

<details><summary>Shell output</summary>
<p>

```
> uvx ruff@0.14.0 check --ignore ALL --target-version py311 - <<EOF
f'{1: abcd "{'aa'}" }'
EOF
invalid-syntax: Cannot reuse outer quote character in f-strings on Python 3.11 (syntax was added in Python 3.12)
 --> -:1:14
  |
1 | f'{1: abcd "{'aa'}" }'
  |              ^
  |

Found 1 error.
> uvx ruff@0.14.1 check --ignore ALL --target-version py311 - <<EOF
f'{1: abcd "{'aa'}" }'
EOF
All checks passed!
> uvx python@3.11 -m ast <<EOF
f'{1: abcd "{'aa'}" }'
EOF
Traceback (most recent call last):
  File "<frozen runpy>", line 198, in _run_module_as_main
  File "<frozen runpy>", line 88, in _run_code
  File "/home/brent/.local/share/uv/python/cpython-3.11.13-linux-x86_64-gnu/lib/python3.11/ast.py", line 1752, in <module>
    main()
  File "/home/brent/.local/share/uv/python/cpython-3.11.13-linux-x86_64-gnu/lib/python3.11/ast.py", line 1748, in main
    tree = parse(source, args.infile.name, args.mode, type_comments=args.no_type_comments)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/brent/.local/share/uv/python/cpython-3.11.13-linux-x86_64-gnu/lib/python3.11/ast.py", line 50, in parse
    return compile(source, filename, mode, flags,
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "<stdin>", line 1
    f'{1: abcd "{'aa'}" }'
                  ^^
SyntaxError: f-string: expecting '}'
```

</p>
</details> 


I assumed that was the same `ParseError` as the one caused by
`f"{1:""}"`, but this is a nested interpolation inside of the format
spec.

## Test Plan

New test copied from the black compatibility test. I guess this is a
duplicate now, I started working on this branch before the new black
tests were imported, so I could delete the separate test in our fixtures
if that's preferable.
2025-10-17 08:49:16 -04:00

454 lines
13 KiB
Rust

use std::fmt::{Debug, Formatter};
use std::ops::{Deref, DerefMut};
use ruff_formatter::{Buffer, FormatContext, GroupId, IndentWidth, SourceCode};
use ruff_python_ast::str::Quote;
use ruff_python_parser::Tokens;
use crate::PyFormatOptions;
use crate::comments::Comments;
use crate::other::interpolated_string::InterpolatedStringContext;
pub struct PyFormatContext<'a> {
options: PyFormatOptions,
contents: &'a str,
comments: Comments<'a>,
tokens: &'a Tokens,
node_level: NodeLevel,
indent_level: IndentLevel,
/// Set to a non-None value when the formatter is running on a code
/// snippet within a docstring. The value should be the quote character of the
/// docstring containing the code snippet.
///
/// Various parts of the formatter may inspect this state to change how it
/// works. For example, multi-line strings will always be written with a
/// quote style that is inverted from the one here in order to ensure that
/// the formatted Python code will be valid.
docstring: Option<Quote>,
/// The state of the formatter with respect to f-strings and t-strings.
interpolated_string_state: InterpolatedStringState,
}
impl<'a> PyFormatContext<'a> {
pub(crate) fn new(
options: PyFormatOptions,
contents: &'a str,
comments: Comments<'a>,
tokens: &'a Tokens,
) -> Self {
Self {
options,
contents,
comments,
tokens,
node_level: NodeLevel::TopLevel(TopLevelStatementPosition::Other),
indent_level: IndentLevel::new(0),
docstring: None,
interpolated_string_state: InterpolatedStringState::Outside,
}
}
pub(crate) fn source(&self) -> &'a str {
self.contents
}
pub(crate) fn set_node_level(&mut self, level: NodeLevel) {
self.node_level = level;
}
pub(crate) fn node_level(&self) -> NodeLevel {
self.node_level
}
pub(crate) fn set_indent_level(&mut self, level: IndentLevel) {
self.indent_level = level;
}
pub(crate) fn indent_level(&self) -> IndentLevel {
self.indent_level
}
pub(crate) fn comments(&self) -> &Comments<'a> {
&self.comments
}
pub(crate) fn tokens(&self) -> &'a Tokens {
self.tokens
}
/// Returns a non-None value only if the formatter is running on a code
/// snippet within a docstring.
///
/// The quote character returned corresponds to the quoting used for the
/// docstring containing the code snippet currently being formatted.
pub(crate) fn docstring(&self) -> Option<Quote> {
self.docstring
}
/// Return a new context suitable for formatting code snippets within a
/// docstring.
///
/// The quote character given should correspond to the quote character used
/// for the docstring containing the code snippets.
pub(crate) fn in_docstring(self, quote: Quote) -> PyFormatContext<'a> {
PyFormatContext {
docstring: Some(quote),
..self
}
}
pub(crate) fn interpolated_string_state(&self) -> InterpolatedStringState {
self.interpolated_string_state
}
pub(crate) fn set_interpolated_string_state(
&mut self,
interpolated_string_state: InterpolatedStringState,
) {
self.interpolated_string_state = interpolated_string_state;
}
/// Returns `true` if preview mode is enabled.
pub(crate) const fn is_preview(&self) -> bool {
self.options.preview().is_enabled()
}
}
impl FormatContext for PyFormatContext<'_> {
type Options = PyFormatOptions;
fn options(&self) -> &Self::Options {
&self.options
}
fn source_code(&self) -> SourceCode<'_> {
SourceCode::new(self.contents)
}
}
impl Debug for PyFormatContext<'_> {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
f.debug_struct("PyFormatContext")
.field("options", &self.options)
.field("comments", &self.comments.debug(self.source_code()))
.field("node_level", &self.node_level)
.field("source", &self.contents)
.finish()
}
}
#[derive(Clone, Copy, Debug, Default)]
pub(crate) enum InterpolatedStringState {
/// The formatter is inside an f-string expression element i.e., between the
/// curly brace in `f"foo {x}"`.
///
/// The containing `FStringContext` is the surrounding f-string context.
InsideInterpolatedElement(InterpolatedStringContext),
/// The formatter is inside more than one nested f-string, such as in `nested` in:
///
/// ```py
/// f"{f'''{'nested'} inner'''} outer"
/// ```
NestedInterpolatedElement(InterpolatedStringContext),
/// The formatter is outside an f-string.
#[default]
Outside,
}
impl InterpolatedStringState {
pub(crate) fn can_contain_line_breaks(self) -> Option<bool> {
match self {
InterpolatedStringState::InsideInterpolatedElement(context)
| InterpolatedStringState::NestedInterpolatedElement(context) => {
Some(context.is_multiline())
}
InterpolatedStringState::Outside => None,
}
}
/// Returns `true` if the interpolated string state is [`NestedInterpolatedElement`].
pub(crate) fn is_nested(self) -> bool {
matches!(self, Self::NestedInterpolatedElement(..))
}
}
/// The position of a top-level statement in the module.
#[derive(Copy, Clone, Debug, Eq, PartialEq, Default)]
pub(crate) enum TopLevelStatementPosition {
/// This is the last top-level statement in the module.
Last,
/// Any other top-level statement.
#[default]
Other,
}
/// What's the enclosing level of the outer node.
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
pub(crate) enum NodeLevel {
/// Formatting statements on the module level.
TopLevel(TopLevelStatementPosition),
/// Formatting the body statements of a [compound statement](https://docs.python.org/3/reference/compound_stmts.html#compound-statements)
/// (`if`, `while`, `match`, etc.).
CompoundStatement,
/// The root or any sub-expression.
Expression(Option<GroupId>),
/// Formatting nodes that are enclosed by a parenthesized (any `[]`, `{}` or `()`) expression.
ParenthesizedExpression,
}
impl Default for NodeLevel {
fn default() -> Self {
Self::TopLevel(TopLevelStatementPosition::Other)
}
}
impl NodeLevel {
/// Returns `true` if the expression is in a parenthesized context.
pub(crate) const fn is_parenthesized(self) -> bool {
matches!(
self,
NodeLevel::Expression(Some(_)) | NodeLevel::ParenthesizedExpression
)
}
/// Returns `true` if this is the last top-level statement in the module.
pub(crate) const fn is_last_top_level_statement(self) -> bool {
matches!(self, NodeLevel::TopLevel(TopLevelStatementPosition::Last))
}
}
/// Change the [`NodeLevel`] of the formatter for the lifetime of this struct
pub(crate) struct WithNodeLevel<'ast, 'buf, B>
where
B: Buffer<Context = PyFormatContext<'ast>>,
{
buffer: &'buf mut B,
saved_level: NodeLevel,
}
impl<'ast, 'buf, B> WithNodeLevel<'ast, 'buf, B>
where
B: Buffer<Context = PyFormatContext<'ast>>,
{
pub(crate) fn new(level: NodeLevel, buffer: &'buf mut B) -> Self {
let context = buffer.state_mut().context_mut();
let saved_level = context.node_level();
context.set_node_level(level);
Self {
buffer,
saved_level,
}
}
}
impl<'ast, B> Deref for WithNodeLevel<'ast, '_, B>
where
B: Buffer<Context = PyFormatContext<'ast>>,
{
type Target = B;
fn deref(&self) -> &Self::Target {
self.buffer
}
}
impl<'ast, B> DerefMut for WithNodeLevel<'ast, '_, B>
where
B: Buffer<Context = PyFormatContext<'ast>>,
{
fn deref_mut(&mut self) -> &mut Self::Target {
self.buffer
}
}
impl<'ast, B> Drop for WithNodeLevel<'ast, '_, B>
where
B: Buffer<Context = PyFormatContext<'ast>>,
{
fn drop(&mut self) {
self.buffer
.state_mut()
.context_mut()
.set_node_level(self.saved_level);
}
}
/// The current indent level of the formatter.
///
/// One can determine the width of the indent itself (in number of ASCII
/// space characters) by multiplying the indent level by the configured indent
/// width.
///
/// This is specifically used inside the docstring code formatter for
/// implementing its "dynamic" line width mode. Namely, in the nested call to
/// the formatter, when "dynamic" mode is enabled, the line width is set to
/// `min(1, line_width - indent_level * indent_width)`, where `line_width` in
/// this context is the global line width setting.
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
pub(crate) struct IndentLevel {
/// The numeric level. It is incremented for every whole indent in Python
/// source code.
///
/// Note that the first indentation level is actually 1, since this starts
/// at 0 and is incremented when the first top-level statement is seen. So
/// even though the first top-level statement in Python source will have no
/// indentation, its indentation level is 1.
level: u16,
}
impl IndentLevel {
/// Returns a new indent level for the given value.
pub(crate) fn new(level: u16) -> IndentLevel {
IndentLevel { level }
}
/// Returns the next indent level.
pub(crate) fn increment(self) -> IndentLevel {
IndentLevel {
level: self.level.saturating_add(1),
}
}
/// Convert this indent level into a specific number of ASCII whitespace
/// characters based on the given indent width.
pub(crate) fn to_ascii_spaces(self, width: IndentWidth) -> u16 {
let width = u16::try_from(width.value()).unwrap_or(u16::MAX);
// Why the subtraction? IndentLevel starts at 0 and asks for the "next"
// indent level before seeing the first top-level statement. So it's
// always 1 more than what we expect it to be.
let level = self.level.saturating_sub(1);
width.saturating_mul(level)
}
}
/// Change the [`IndentLevel`] of the formatter for the lifetime of this
/// struct.
pub(crate) struct WithIndentLevel<'a, B, D>
where
D: DerefMut<Target = B>,
B: Buffer<Context = PyFormatContext<'a>>,
{
buffer: D,
saved_level: IndentLevel,
}
impl<'a, B, D> WithIndentLevel<'a, B, D>
where
D: DerefMut<Target = B>,
B: Buffer<Context = PyFormatContext<'a>>,
{
pub(crate) fn new(level: IndentLevel, mut buffer: D) -> Self {
let context = buffer.state_mut().context_mut();
let saved_level = context.indent_level();
context.set_indent_level(level);
Self {
buffer,
saved_level,
}
}
}
impl<'a, B, D> Deref for WithIndentLevel<'a, B, D>
where
D: DerefMut<Target = B>,
B: Buffer<Context = PyFormatContext<'a>>,
{
type Target = B;
fn deref(&self) -> &Self::Target {
&self.buffer
}
}
impl<'a, B, D> DerefMut for WithIndentLevel<'a, B, D>
where
D: DerefMut<Target = B>,
B: Buffer<Context = PyFormatContext<'a>>,
{
fn deref_mut(&mut self) -> &mut Self::Target {
&mut self.buffer
}
}
impl<'a, B, D> Drop for WithIndentLevel<'a, B, D>
where
D: DerefMut<Target = B>,
B: Buffer<Context = PyFormatContext<'a>>,
{
fn drop(&mut self) {
self.buffer
.state_mut()
.context_mut()
.set_indent_level(self.saved_level);
}
}
pub(crate) struct WithInterpolatedStringState<'a, B, D>
where
D: DerefMut<Target = B>,
B: Buffer<Context = PyFormatContext<'a>>,
{
buffer: D,
saved_location: InterpolatedStringState,
}
impl<'a, B, D> WithInterpolatedStringState<'a, B, D>
where
D: DerefMut<Target = B>,
B: Buffer<Context = PyFormatContext<'a>>,
{
pub(crate) fn new(expr_location: InterpolatedStringState, mut buffer: D) -> Self {
let context = buffer.state_mut().context_mut();
let saved_location = context.interpolated_string_state();
context.set_interpolated_string_state(expr_location);
Self {
buffer,
saved_location,
}
}
}
impl<'a, B, D> Deref for WithInterpolatedStringState<'a, B, D>
where
D: DerefMut<Target = B>,
B: Buffer<Context = PyFormatContext<'a>>,
{
type Target = B;
fn deref(&self) -> &Self::Target {
&self.buffer
}
}
impl<'a, B, D> DerefMut for WithInterpolatedStringState<'a, B, D>
where
D: DerefMut<Target = B>,
B: Buffer<Context = PyFormatContext<'a>>,
{
fn deref_mut(&mut self) -> &mut Self::Target {
&mut self.buffer
}
}
impl<'a, B, D> Drop for WithInterpolatedStringState<'a, B, D>
where
D: DerefMut<Target = B>,
B: Buffer<Context = PyFormatContext<'a>>,
{
fn drop(&mut self) {
self.buffer
.state_mut()
.context_mut()
.set_interpolated_string_state(self.saved_location);
}
}