Add test cases

.
Basic nested range formatting
2023-09-22 15:50:34 +02:00 · 2023-09-21 16:58:54 +02:00 · 2023-09-21 15:23:17 +02:00 · 2023-09-21 13:38:05 +02:00 · 2023-09-21 13:37:52 +02:00 · 2023-09-21 13:28:59 +02:00
21 changed files with 703 additions and 152 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -2322,6 +2322,7 @@ dependencies = [
 "bitflags 2.4.0",
 "clap",
 "countme",
+ "indoc",
 "insta",
 "itertools 0.11.0",
 "memchr",
--- a/crates/ruff_benchmark/benches/formatter.rs
+++ b/crates/ruff_benchmark/benches/formatter.rs
@@ -4,7 +4,7 @@ use ruff_benchmark::criterion::{
    criterion_group, criterion_main, BenchmarkId, Criterion, Throughput,
 };
 use ruff_benchmark::{TestCase, TestFile, TestFileDownloadError};
-use ruff_python_formatter::{format_node, PyFormatOptions};
+use ruff_python_formatter::{format_module_ast, PyFormatOptions};
 use ruff_python_index::CommentRangesBuilder;
 use ruff_python_parser::lexer::lex;
 use ruff_python_parser::{parse_tokens, Mode};
@@ -65,13 +65,14 @@ fn benchmark_formatter(criterion: &mut Criterion) {
                let comment_ranges = comment_ranges.finish();

                // Parse the AST.
-                let python_ast = parse_tokens(tokens, Mode::Module, "<filename>")
+                let module = parse_tokens(tokens, Mode::Module, "<filename>")
                    .expect("Input to be a valid python program");

                b.iter(|| {
                    let options = PyFormatOptions::from_extension(Path::new(case.name()));
-                    let formatted = format_node(&python_ast, &comment_ranges, case.code(), options)
-                        .expect("Formatting to succeed");
+                    let formatted =
+                        format_module_ast(&module, &comment_ranges, case.code(), options)
+                            .expect("Formatting to succeed");

                    formatted.print().expect("Printing to succeed")
                });
--- a/crates/ruff_cli/src/args.rs
+++ b/crates/ruff_cli/src/args.rs
@@ -11,6 +11,7 @@ use ruff_linter::settings::types::{
    FilePattern, PatternPrefixPair, PerFileIgnore, PreviewMode, PythonVersion, SerializationFormat,
 };
 use ruff_linter::{RuleParser, RuleSelector, RuleSelectorParser};
+use ruff_python_formatter::LspRowColumn;
 use ruff_workspace::configuration::{Configuration, RuleSelection};
 use ruff_workspace::resolver::ConfigurationTransformer;

@@ -395,6 +396,14 @@ pub struct FormatCommand {
    preview: bool,
    #[clap(long, overrides_with("preview"), hide = true)]
    no_preview: bool,
+    /// Range formatting start: Zero-indexed row and zero-indexed char-based column separated by
+    /// colon, e.g. `1:2`
+    #[clap(long)]
+    pub start: Option<LspRowColumn>,
+    /// Range formatting end: Zero-indexed row and zero-indexed char-based column separated by
+    /// colon, e.g. `3:4`
+    #[clap(long)]
+    pub end: Option<LspRowColumn>,
 }

 #[derive(Debug, Clone, Copy, clap::ValueEnum)]
@@ -516,6 +525,8 @@ impl FormatCommand {
                files: self.files,
                isolated: self.isolated,
                stdin_filename: self.stdin_filename,
+                start: self.start,
+                end: self.end,
            },
            CliOverrides {
                line_length: self.line_length,
@@ -572,6 +583,8 @@ pub struct FormatArguments {
    pub files: Vec<PathBuf>,
    pub isolated: bool,
    pub stdin_filename: Option<PathBuf>,
+    pub start: Option<LspRowColumn>,
+    pub end: Option<LspRowColumn>,
 }

 /// CLI settings that function as configuration overrides.
--- a/crates/ruff_cli/src/commands/format.rs
+++ b/crates/ruff_cli/src/commands/format.rs
@@ -15,7 +15,7 @@ use ruff_linter::fs;
 use ruff_linter::logging::LogLevel;
 use ruff_linter::warn_user_once;
 use ruff_python_ast::{PySourceType, SourceType};
-use ruff_python_formatter::{format_module, FormatModuleError, PyFormatOptions};
+use ruff_python_formatter::{format_module_source, FormatModuleError, PyFormatOptions};
 use ruff_source_file::{find_newline, LineEnding};
 use ruff_workspace::resolver::python_files_in_path;

@@ -153,7 +153,7 @@ fn format_path(

    let options = options.with_line_ending(line_ending);

-    let formatted = format_module(&unformatted, options)
+    let formatted = format_module_source(&unformatted, options)
        .map_err(|err| FormatCommandError::FormatModule(Some(path.to_path_buf()), err))?;

    let formatted = formatted.as_code();
--- a/crates/ruff_cli/src/commands/format_stdin.rs
+++ b/crates/ruff_cli/src/commands/format_stdin.rs
@@ -5,7 +5,9 @@ use anyhow::Result;
 use log::warn;

 use ruff_python_ast::PySourceType;
-use ruff_python_formatter::{format_module, PyFormatOptions};
+use ruff_python_formatter::{
+    format_module_source, format_module_source_range, LspRowColumn, PyFormatOptions,
+};
 use ruff_workspace::resolver::python_file_at_path;

 use crate::args::{CliOverrides, FormatArguments};
@@ -42,7 +44,7 @@ pub(crate) fn format_stdin(cli: &FormatArguments, overrides: &CliOverrides) -> R
        .formatter
        .to_format_options(path.map(PySourceType::from).unwrap_or_default());

-    match format_source(path, options, mode) {
+    match format_source(path, options, mode, cli.start, cli.end) {
        Ok(result) => match mode {
            FormatMode::Write => Ok(ExitStatus::Success),
            FormatMode::Check => {
@@ -65,12 +67,21 @@ fn format_source(
    path: Option<&Path>,
    options: PyFormatOptions,
    mode: FormatMode,
+    start: Option<LspRowColumn>,
+    end: Option<LspRowColumn>,
 ) -> Result<FormatCommandResult, FormatCommandError> {
    let unformatted = read_from_stdin()
        .map_err(|err| FormatCommandError::Read(path.map(Path::to_path_buf), err))?;
-    let formatted = format_module(&unformatted, options)
-        .map_err(|err| FormatCommandError::FormatModule(path.map(Path::to_path_buf), err))?;
-    let formatted = formatted.as_code();
+    let formatted = if start.is_some() || end.is_some() {
+        let formatted = format_module_source_range(&unformatted, options, start, end)
+            .map_err(|err| FormatCommandError::FormatModule(path.map(Path::to_path_buf), err))?;
+        formatted
+    } else {
+        let formatted = format_module_source(&unformatted, options)
+            .map_err(|err| FormatCommandError::FormatModule(path.map(Path::to_path_buf), err))?;
+        let formatted = formatted.as_code();
+        formatted.to_string()
+    };
    if formatted.len() == unformatted.len() && formatted == unformatted {
        Ok(FormatCommandResult::Unchanged)
    } else {
--- a/crates/ruff_dev/src/format_dev.rs
+++ b/crates/ruff_dev/src/format_dev.rs
@@ -34,7 +34,7 @@ use ruff_formatter::{FormatError, LineWidth, PrintError};
 use ruff_linter::logging::LogLevel;
 use ruff_linter::settings::types::{FilePattern, FilePatternSet};
 use ruff_python_formatter::{
-    format_module, FormatModuleError, MagicTrailingComma, PyFormatOptions,
+    format_module_source, FormatModuleError, MagicTrailingComma, PyFormatOptions,
 };
 use ruff_workspace::resolver::{python_files_in_path, PyprojectConfig, Resolver};

@@ -800,7 +800,7 @@ fn format_dev_file(
    let content = fs::read_to_string(input_path)?;
    #[cfg(not(debug_assertions))]
    let start = Instant::now();
-    let printed = match format_module(&content, options.clone()) {
+    let printed = match format_module_source(&content, options.clone()) {
        Ok(printed) => printed,
        Err(err @ (FormatModuleError::LexError(_) | FormatModuleError::ParseError(_))) => {
            return Err(CheckFileError::SyntaxErrorInInput(err));
@@ -827,7 +827,7 @@ fn format_dev_file(
    }

    if stability_check {
-        let reformatted = match format_module(formatted, options) {
+        let reformatted = match format_module_source(formatted, options) {
            Ok(reformatted) => reformatted,
            Err(err @ (FormatModuleError::LexError(_) | FormatModuleError::ParseError(_))) => {
                return Err(CheckFileError::SyntaxErrorInOutput {
--- a/crates/ruff_formatter/src/diagnostics.rs
+++ b/crates/ruff_formatter/src/diagnostics.rs
@@ -1,6 +1,5 @@
 use crate::prelude::TagKind;
 use crate::GroupId;
-use ruff_text_size::TextRange;
 use std::error::Error;

 #[derive(Debug, PartialEq, Eq, Copy, Clone)]
@@ -12,7 +11,7 @@ pub enum FormatError {
    SyntaxError { message: &'static str },
    /// In case range formatting failed because the provided range was larger
    /// than the formatted syntax tree
-    RangeError { input: TextRange, tree: TextRange },
+    RangeError { row: usize, col: usize },

    /// In case printing the document failed because it has an invalid structure.
    InvalidDocument(InvalidDocumentError),
@@ -32,9 +31,9 @@ impl std::fmt::Display for FormatError {
            FormatError::SyntaxError {message} => {
                std::write!(fmt, "syntax error: {message}")
            },
-            FormatError::RangeError { input, tree } => std::write!(
+            FormatError::RangeError { row, col } => std::write!(
                fmt,
-                "formatting range {input:?} is larger than syntax tree {tree:?}"
+                "formatting range {row}:{col} is not a valid index"
            ),
            FormatError::InvalidDocument(error) => std::write!(fmt, "Invalid document: {error}\n\n This is an internal Rome error. Please report if necessary."),
            FormatError::PoorLayout => {
--- a/crates/ruff_formatter/src/macros.rs
+++ b/crates/ruff_formatter/src/macros.rs
@@ -334,7 +334,7 @@ macro_rules! best_fitting {
            $crate::BestFitting::from_arguments_unchecked($crate::format_args!($least_expanded, $($tail),+))
        }
    }}
-}
+    }

 #[cfg(test)]
 mod tests {
--- a/crates/ruff_python_formatter/Cargo.toml
+++ b/crates/ruff_python_formatter/Cargo.toml
@@ -43,6 +43,7 @@ insta = { workspace = true, features = ["glob"] }
 serde = { workspace = true }
 serde_json = { workspace = true }
 similar = { workspace = true }
+indoc = "2.0.4"

 [[test]]
 name = "ruff_python_formatter_fixtures"
--- a/crates/ruff_python_formatter/src/cli.rs
+++ b/crates/ruff_python_formatter/src/cli.rs
@@ -2,17 +2,17 @@

 use std::path::{Path, PathBuf};

-use anyhow::{bail, Context, Result};
+use anyhow::{format_err, Context, Result};
 use clap::{command, Parser, ValueEnum};

 use ruff_formatter::SourceCode;
-use ruff_python_index::CommentRangesBuilder;
-use ruff_python_parser::lexer::lex;
-use ruff_python_parser::{parse_tokens, Mode};
-use ruff_text_size::Ranged;
+use ruff_python_index::tokens_and_ranges;
+use ruff_python_parser::{parse_ok_tokens, Mode};
+use ruff_source_file::Locator;
+use ruff_text_size::{Ranged, TextLen, TextRange, TextSize};

 use crate::comments::collect_comments;
-use crate::{format_node, PyFormatOptions};
+use crate::{format_module_ast, format_module_range, PyFormatOptions};

 #[derive(ValueEnum, Clone, Debug)]
 pub enum Emit {
@@ -37,38 +37,46 @@ pub struct Cli {
    pub print_ir: bool,
    #[clap(long)]
    pub print_comments: bool,
+    /// byte offset for range formatting
+    #[clap(long)]
+    pub start: Option<u32>,
+    /// byte offset for range formatting
+    #[clap(long)]
+    pub end: Option<u32>,
 }

-pub fn format_and_debug_print(input: &str, cli: &Cli, source_type: &Path) -> Result<String> {
-    let mut tokens = Vec::new();
-    let mut comment_ranges = CommentRangesBuilder::default();
+pub fn format_and_debug_print(source: &str, cli: &Cli, source_type: &Path) -> Result<String> {
+    let (tokens, comment_ranges) = tokens_and_ranges(source)
+        .map_err(|err| format_err!("Source contains syntax errors {err:?}"))?;
+    let module =
+        parse_ok_tokens(tokens, Mode::Module, "<filename>").context("Syntax error in input")?;
+    let options = PyFormatOptions::from_extension(source_type);
+    let source_code = SourceCode::new(source);
+    let locator = Locator::new(source);

-    for result in lex(input, Mode::Module) {
-        let (token, range) = match result {
-            Ok((token, range)) => (token, range),
-            Err(err) => bail!("Source contains syntax errors {err:?}"),
-        };
-
-        comment_ranges.visit_token(&token, range);
-        tokens.push(Ok((token, range)));
+    if cli.start.is_some() || cli.end.is_some() {
+        let range = TextRange::new(
+            cli.start.map(TextSize::new).unwrap_or_default(),
+            cli.end.map(TextSize::new).unwrap_or(source.text_len()),
+        );
+        return Ok(format_module_range(
+            &module,
+            &comment_ranges,
+            source,
+            options,
+            &locator,
+            range,
+        )?);
    }

-    let comment_ranges = comment_ranges.finish();
-
-    // Parse the AST.
-    let python_ast =
-        parse_tokens(tokens, Mode::Module, "<filename>").context("Syntax error in input")?;
-
-    let options = PyFormatOptions::from_extension(source_type);
-    let formatted = format_node(&python_ast, &comment_ranges, input, options)
+    let formatted = format_module_ast(&module, &comment_ranges, source, options)
        .context("Failed to format node")?;
    if cli.print_ir {
-        println!("{}", formatted.document().display(SourceCode::new(input)));
+        println!("{}", formatted.document().display(source_code));
    }
    if cli.print_comments {
        // Print preceding, following and enclosing nodes
-        let source_code = SourceCode::new(input);
-        let decorated_comments = collect_comments(&python_ast, source_code, &comment_ranges);
+        let decorated_comments = collect_comments(&module, source_code, &comment_ranges);
        if !decorated_comments.is_empty() {
            println!("# Comment decoration: Range, Preceding, Following, Enclosing, Comment");
        }
@@ -86,13 +94,10 @@ pub fn format_and_debug_print(input: &str, cli: &Cli, source_type: &Path) -> Res
                    comment.enclosing_node().kind(),
                    comment.enclosing_node().range()
                ),
-                comment.slice().text(SourceCode::new(input)),
+                comment.slice().text(source_code),
            );
        }
-        println!(
-            "{:#?}",
-            formatted.context().comments().debug(SourceCode::new(input))
-        );
+        println!("{:#?}", formatted.context().comments().debug(source_code));
    }
    Ok(formatted
        .print()
--- a/crates/ruff_python_formatter/src/comments/mod.rs
+++ b/crates/ruff_python_formatter/src/comments/mod.rs
@@ -549,9 +549,9 @@ mod tests {

    use ruff_formatter::SourceCode;
    use ruff_python_ast::Mod;
-    use ruff_python_index::CommentRangesBuilder;
-    use ruff_python_parser::lexer::lex;
-    use ruff_python_parser::{parse_tokens, Mode};
+    use ruff_python_index::tokens_and_ranges;
+
+    use ruff_python_parser::{parse_ok_tokens, Mode};
    use ruff_python_trivia::CommentRanges;

    use crate::comments::Comments;
@@ -563,19 +563,11 @@ mod tests {
    }

    impl<'a> CommentsTestCase<'a> {
-        fn from_code(code: &'a str) -> Self {
-            let source_code = SourceCode::new(code);
-            let tokens: Vec<_> = lex(code, Mode::Module).collect();
-
-            let mut comment_ranges = CommentRangesBuilder::default();
-
-            for (token, range) in tokens.iter().flatten() {
-                comment_ranges.visit_token(token, *range);
-            }
-
-            let comment_ranges = comment_ranges.finish();
-
-            let parsed = parse_tokens(tokens, Mode::Module, "test.py")
+        fn from_code(source: &'a str) -> Self {
+            let source_code = SourceCode::new(source);
+            let (tokens, comment_ranges) =
+                tokens_and_ranges(source).expect("Expect source to be valid Python");
+            let parsed = parse_ok_tokens(tokens, Mode::Module, "test.py")
                .expect("Expect source to be valid Python");

            CommentsTestCase {
--- a/crates/ruff_python_formatter/src/lib.rs
+++ b/crates/ruff_python_formatter/src/lib.rs
@@ -1,21 +1,27 @@
+use std::iter;
+use std::str::FromStr;
 use thiserror::Error;
-use tracing::Level;
+use tracing::{warn, Level};

 use ruff_formatter::prelude::*;
 use ruff_formatter::{format, FormatError, Formatted, PrintError, Printed, SourceCode};
 use ruff_python_ast::node::AstNode;
-use ruff_python_ast::Mod;
-use ruff_python_index::CommentRangesBuilder;
-use ruff_python_parser::lexer::{lex, LexicalError};
-use ruff_python_parser::{parse_tokens, Mode, ParseError};
-use ruff_python_trivia::CommentRanges;
+use ruff_python_ast::{
+    Mod, Stmt, StmtClassDef, StmtFor, StmtFunctionDef, StmtIf, StmtWhile, StmtWith,
+};
+use ruff_python_index::tokens_and_ranges;
+use ruff_python_parser::lexer::LexicalError;
+use ruff_python_parser::{parse_ok_tokens, Mode, ParseError};
+use ruff_python_trivia::{is_python_whitespace, CommentRanges};
 use ruff_source_file::Locator;
+use ruff_text_size::{Ranged, TextLen, TextRange, TextSize};

 use crate::comments::{
    dangling_comments, leading_comments, trailing_comments, Comments, SourceComment,
 };
 pub use crate::context::PyFormatContext;
 pub use crate::options::{MagicTrailingComma, PreviewMode, PyFormatOptions, QuoteStyle};
+use crate::statement::suite::SuiteKind;
 use crate::verbatim::suppressed_node;
 pub use settings::FormatterSettings;

@@ -30,6 +36,7 @@ mod options;
 pub(crate) mod other;
 pub(crate) mod pattern;
 mod prelude;
+mod range_formatting;
 mod settings;
 pub(crate) mod statement;
 pub(crate) mod type_param;
@@ -123,61 +130,260 @@ impl From<ParseError> for FormatModuleError {
    }
 }

-#[tracing::instrument(level = Level::TRACE, skip_all)]
-pub fn format_module(
-    contents: &str,
+#[tracing::instrument(name = "format", level = Level::TRACE, skip_all)]
+pub fn format_module_source(
+    source: &str,
    options: PyFormatOptions,
 ) -> Result<Printed, FormatModuleError> {
-    // Tokenize once
-    let mut tokens = Vec::new();
-    let mut comment_ranges = CommentRangesBuilder::default();
-
-    for result in lex(contents, Mode::Module) {
-        let (token, range) = result?;
-
-        comment_ranges.visit_token(&token, range);
-        tokens.push(Ok((token, range)));
-    }
-
-    let comment_ranges = comment_ranges.finish();
-
-    // Parse the AST.
-    let python_ast = parse_tokens(tokens, Mode::Module, "<filename>")?;
-
-    let formatted = format_node(&python_ast, &comment_ranges, contents, options)?;
-
+    let (tokens, comment_ranges) = tokens_and_ranges(source)?;
+    let module = parse_ok_tokens(tokens, Mode::Module, "<filename>")?;
+    let formatted = format_module_ast(&module, &comment_ranges, source, options)?;
    Ok(formatted.print()?)
 }

-pub fn format_node<'a>(
-    root: &'a Mod,
+/// Range formatting coordinate: Zero-indexed row and zero-indexed char-based column separated by
+/// colon, e.g. `1:2`.
+///
+/// See [`Locator::convert_row_and_column`] for details on the semantics.
+#[derive(Copy, Clone, Debug, Default)]
+pub struct LspRowColumn {
+    row: usize,
+    col: usize,
+}
+
+impl FromStr for LspRowColumn {
+    type Err = &'static str;
+
+    fn from_str(s: &str) -> Result<Self, Self::Err> {
+        let Some((row, col)) = s.split_once(':') else {
+            return Err("Coordinate is missing a colon, the format is `<row>:<column>`");
+        };
+
+        Ok(LspRowColumn {
+            row: row.parse().map_err(|_| "row must be a number")?,
+            col: col.parse().map_err(|_| "col must be a number")?,
+        })
+    }
+}
+#[tracing::instrument(name = "format", level = Level::TRACE, skip_all)]
+pub fn format_module_source_range(
+    source: &str,
+    options: PyFormatOptions,
+    start: Option<LspRowColumn>,
+    end: Option<LspRowColumn>,
+) -> Result<String, FormatModuleError> {
+    let (tokens, comment_ranges) = tokens_and_ranges(source)?;
+    let module = parse_ok_tokens(tokens, Mode::Module, "<filename>")?;
+    let locator = Locator::new(source);
+
+    let start = if let Some(start) = start {
+        locator
+            .convert_row_and_column(start.row, start.col)
+            .ok_or(FormatError::RangeError {
+                row: start.row,
+                col: start.col,
+            })?
+    } else {
+        TextSize::default()
+    };
+    let end = if let Some(end) = end {
+        locator
+            .convert_row_and_column(end.row, end.col)
+            .ok_or(FormatError::RangeError {
+                row: end.row,
+                col: end.col,
+            })?
+    } else {
+        source.text_len()
+    };
+
+    let formatted = format_module_range(
+        &module,
+        &comment_ranges,
+        source,
+        options,
+        &locator,
+        TextRange::new(start, end),
+    )?;
+    Ok(formatted)
+}
+
+pub fn format_module_ast<'a>(
+    module: &'a Mod,
    comment_ranges: &'a CommentRanges,
    source: &'a str,
    options: PyFormatOptions,
 ) -> FormatResult<Formatted<PyFormatContext<'a>>> {
-    let comments = Comments::from_ast(root, SourceCode::new(source), comment_ranges);
-
+    let source_code = SourceCode::new(source);
+    let comments = Comments::from_ast(module, source_code, comment_ranges);
    let locator = Locator::new(source);

    let formatted = format!(
        PyFormatContext::new(options, locator.contents(), comments),
-        [root.format()]
+        [module.format()]
    )?;
    formatted
        .context()
        .comments()
-        .assert_all_formatted(SourceCode::new(source));
+        .assert_all_formatted(source_code);
    Ok(formatted)
 }

-/// Public function for generating a printable string of the debug comments.
-pub fn pretty_comments(root: &Mod, comment_ranges: &CommentRanges, source: &str) -> String {
-    let comments = Comments::from_ast(root, SourceCode::new(source), comment_ranges);
+/// Is range inside the body of a node, if we consider the whitespace surrounding the suite as part
+/// of the body?
+///
+/// TODO: Handle leading comments on the first statement
+fn range_in_body(suite: &[Stmt], range: TextRange, source: &str) -> bool {
+    let suite_start = suite.first().unwrap().start();
+    let suite_end = suite.last().unwrap().end();

-    std::format!(
-        "{comments:#?}",
-        comments = comments.debug(SourceCode::new(source))
-    )
+    if range.start() < suite_start
+        // Extend the range include all whitespace prior to the first statement
+        && !source[TextRange::new(range.start(), suite_start)]
+            .chars()
+            .all(|c| is_python_whitespace(c))
+    {
+        return false;
+    }
+    if range.end() > suite_end
+        // Extend the range include all whitespace after to the last statement
+        && !source[TextRange::new(suite_end,range.end())]
+            .chars()
+            .all(|c| is_python_whitespace(c))
+    {
+        return false;
+    }
+
+    true
+}
+
+pub fn format_module_range<'a>(
+    module: &'a Mod,
+    comment_ranges: &'a CommentRanges,
+    source: &'a str,
+    options: PyFormatOptions,
+    locator: &Locator<'a>,
+    range: TextRange,
+) -> FormatResult<String> {
+    let comments = Comments::from_ast(&module, SourceCode::new(source), &comment_ranges);
+
+    let Mod::Module(module_inner) = &module else {
+        panic!("That's not a module");
+    };
+
+    // TODO: Move this to LspRowColumn? we first count chars to then discard that anyway
+    // Consider someone wanted to format `print(i); print(j)`. This wouldn't work indent-wise, so
+    // we always do whole lines instead which means we can count indentation normally
+    // ```python
+    // if True:
+    //     for i in range(10): j=i+1; print(i); print(j)
+    // ```
+    let range = TextRange::new(
+        locator.line_start(range.start()),
+        locator.line_end(range.end()),
+    );
+
+    // ```
+    // a = 1; b = 2; c = 3; d = 4; e = 5
+    //             ^ b end  ^ d start
+    //          ^^^^^^^^^^^^^^^ range
+    //          ^ range start ^ range end
+    // ```
+    // TODO: If it goes beyond the end of the last stmt or before start, do we need to format
+    // the parent?
+    let mut parent_body: &[Stmt] = module_inner.body.as_slice();
+    let mut in_range;
+
+    // TODO: Allow partial inclusions, e.g.
+    // ```python
+    // not_formatted = 0
+    // start = 1
+    // if cond_formatted:
+    //     last_formatted = 2
+    //     not_formatted_anymore = 3
+    // ```
+    // prob a slice and an optional trailing arg
+    let in_range = loop {
+        let start = parent_body.partition_point(|child| child.end() < range.start());
+        let end = parent_body.partition_point(|child| child.start() < range.end());
+        in_range = &parent_body[start..end];
+
+        let [single_stmt] = in_range else {
+            break in_range;
+        };
+
+        match single_stmt {
+            Stmt::For(StmtFor { body, .. })
+            | Stmt::While(StmtWhile { body, .. })
+            | Stmt::With(StmtWith { body, .. })
+            | Stmt::FunctionDef(StmtFunctionDef { body, .. })
+            | Stmt::ClassDef(StmtClassDef { body, .. }) => {
+                // We need to format the header or a trailing comment
+                // TODO: ignore trivia
+                if range_in_body(body, range, source) {
+                    break in_range;
+                } else {
+                    parent_body = &body;
+                }
+            }
+            Stmt::If(StmtIf {
+                body,
+                elif_else_clauses,
+                ..
+            }) => {
+                let if_all_end = TextRange::new(
+                    range.start(),
+                    elif_else_clauses
+                        .last()
+                        .map(|clause| clause.body.last().unwrap().end())
+                        .unwrap_or(body.last().unwrap().end()),
+                );
+                if !range_in_body(body, if_all_end, source) {
+                    break in_range;
+                } else if let Some(body) = iter::once(body)
+                    .chain(elif_else_clauses.iter().map(|clause| &clause.body))
+                    .find(|body| range_in_body(body, range, source))
+                {
+                    parent_body = &body;
+                } else {
+                    break in_range;
+                }
+            }
+            // | Stmt::StmtTry(ast::StmtTry { body, .. })
+            // | Stmt::ExceptHandlerExceptHandler(ast::ExceptHandlerExceptHandler { body, .. })
+            // | Stmt::ElifElseClause(ast::ElifElseClause { body, .. }) => &body,
+            // match
+            _ => break in_range,
+        }
+    };
+
+    let (Some(first), Some(last)) = (in_range.first(), in_range.last()) else {
+        // TODO: Use tracing again https://github.com/tokio-rs/tracing/issues/2721
+        // TODO: Forward this to something proper
+        eprintln!("The formatting range contains no statements");
+        return Ok(source.to_string());
+    };
+
+    let mut buffer = source[TextRange::up_to(first.start())].to_string();
+
+    let formatted: Formatted<PyFormatContext> = format!(
+        PyFormatContext::new(options.clone(), locator.contents(), comments),
+        // TODO: Make suite formatting accept slices
+        [in_range.to_vec().format().with_options(SuiteKind::TopLevel)]
+    )?;
+    //println!("{}", formatted.document().display(SourceCode::new(source)));
+    // TODO: Make the printer use the buffer instead
+    buffer += formatted.print_with_indent(1)?.as_code();
+    buffer += &source[TextRange::new(last.end(), source.text_len())];
+    return Ok(buffer.to_string());
+}
+
+/// Public function for generating a printable string of the debug comments.
+pub fn pretty_comments(module: &Mod, comment_ranges: &CommentRanges, source: &str) -> String {
+    let source_code = SourceCode::new(source);
+    let comments = Comments::from_ast(module, source_code, comment_ranges);
+
+    std::format!("{comments:#?}", comments = comments.debug(source_code))
 }

 #[cfg(test)]
@@ -187,11 +393,11 @@ mod tests {
    use anyhow::Result;
    use insta::assert_snapshot;

-    use ruff_python_index::CommentRangesBuilder;
-    use ruff_python_parser::lexer::lex;
-    use ruff_python_parser::{parse_tokens, Mode};
+    use ruff_python_index::tokens_and_ranges;

-    use crate::{format_module, format_node, PyFormatOptions};
+    use ruff_python_parser::{parse_ok_tokens, Mode};
+
+    use crate::{format_module_ast, format_module_source, PyFormatOptions};

    /// Very basic test intentionally kept very similar to the CLI
    #[test]
@@ -207,7 +413,7 @@ if True:
    pass
 # trailing
 "#;
-        let actual = format_module(input, PyFormatOptions::default())?
+        let actual = format_module_source(input, PyFormatOptions::default())?
            .as_code()
            .to_string();
        assert_eq!(expected, actual);
@@ -218,7 +424,7 @@ if True:
    #[ignore]
    #[test]
    fn quick_test() {
-        let src = r#"
+        let source = r#"
 def main() -> None:
    if True:
        some_very_long_variable_name_abcdefghijk = Foo()
@@ -228,23 +434,13 @@ def main() -> None:
        ]

 "#;
-        // Tokenize once
-        let mut tokens = Vec::new();
-        let mut comment_ranges = CommentRangesBuilder::default();
-
-        for result in lex(src, Mode::Module) {
-            let (token, range) = result.unwrap();
-            comment_ranges.visit_token(&token, range);
-            tokens.push(Ok((token, range)));
-        }
-
-        let comment_ranges = comment_ranges.finish();
+        let (tokens, comment_ranges) = tokens_and_ranges(source).unwrap();

        // Parse the AST.
        let source_path = "code_inline.py";
-        let python_ast = parse_tokens(tokens, Mode::Module, source_path).unwrap();
+        let module = parse_ok_tokens(tokens, Mode::Module, source_path).unwrap();
        let options = PyFormatOptions::from_extension(Path::new(source_path));
-        let formatted = format_node(&python_ast, &comment_ranges, src, options).unwrap();
+        let formatted = format_module_ast(&module, &comment_ranges, source, options).unwrap();

        // Uncomment the `dbg` to print the IR.
        // Use `dbg_write!(f, []) instead of `write!(f, [])` in your formatting code to print some IR
--- a/crates/ruff_python_formatter/src/main.rs
+++ b/crates/ruff_python_formatter/src/main.rs
@@ -25,11 +25,11 @@ fn main() -> Result<()> {
                cli.emit
            );
        }
-        let input = read_from_stdin()?;
+        let source = read_from_stdin()?;
        // It seems reasonable to give this a dummy name
-        let formatted = format_and_debug_print(&input, &cli, Path::new("stdin.py"))?;
+        let formatted = format_and_debug_print(&source, &cli, Path::new("stdin.py"))?;
        if cli.check {
-            if formatted == input {
+            if formatted == source {
                return Ok(());
            }
            bail!("Content not correctly formatted")
@@ -37,9 +37,9 @@ fn main() -> Result<()> {
        stdout().lock().write_all(formatted.as_bytes())?;
    } else {
        for file in &cli.files {
-            let input = fs::read_to_string(file)
+            let source = fs::read_to_string(file)
                .with_context(|| format!("Could not read {}: ", file.display()))?;
-            let formatted = format_and_debug_print(&input, &cli, file)?;
+            let formatted = format_and_debug_print(&source, &cli, file)?;
            match cli.emit {
                Some(Emit::Stdout) => stdout().lock().write_all(formatted.as_bytes())?,
                None | Some(Emit::Files) => {
--- a/crates/ruff_python_formatter/src/range_formatting.rs
+++ b/crates/ruff_python_formatter/src/range_formatting.rs
@@ -0,0 +1,226 @@
+#[cfg(test)]
+mod tests {
+    use crate::{format_module_source_range, LspRowColumn, PyFormatOptions};
+    use indoc::indoc;
+    use insta::assert_snapshot;
+
+    fn format(source: &str, start: (usize, usize), end: (usize, usize)) -> String {
+        format_module_source_range(
+            source,
+            PyFormatOptions::default(),
+            Some(LspRowColumn {
+                row: start.0,
+                col: start.1,
+            }),
+            Some(LspRowColumn {
+                row: end.0,
+                col: end.1,
+            }),
+        )
+        .unwrap()
+    }
+
+    #[test]
+    fn test_top_level() {
+        assert_snapshot!(format(indoc! {r#"
+        a = [1,]
+        b = [1,]
+        c = [1,]
+        d = [1,]
+        "#}, (1, 3), (2, 5)), @r###"
+        a = [1,]
+        b = [
+                1,
+            ]
+            c = [
+                1,
+            ]
+        d = [1,]
+        "###);
+    }
+
+    #[test]
+    fn test_easy_nested() {
+        assert_snapshot!(format(indoc! {r#"
+        a = [1,]
+        for i in range( 1 ):
+            b = [1,]
+            c = [1,]
+            d = [1,]
+        e = [1,]
+        "#}, (3, 3), (3, 5)), @r###"
+        a = [1,]
+        for i in range(1):
+                b = [
+                    1,
+                ]
+                c = [
+                    1,
+                ]
+                d = [
+                    1,
+                ]
+
+        e = [1,]
+        "###);
+    }
+
+    #[test]
+    fn test_if() {
+        let source = indoc! {r#"
+        import     random
+        if random.random()    <    0.5:
+            a = [1,]
+            b = [1,]
+        elif random.random()    <    0.75:
+            c = [1,]
+            d = [1,]
+        else:
+            e = [1,]
+            f = [1,]
+        g = [1,]
+        "#};
+
+        assert_snapshot!(format(source, (3, 0), (3, 10)), @r###"
+        import     random
+        if random.random() < 0.5:
+                a = [
+                    1,
+                ]
+                b = [
+                    1,
+                ]
+            elif random.random() < 0.75:
+                c = [
+                    1,
+                ]
+                d = [
+                    1,
+                ]
+            else:
+                e = [
+                    1,
+                ]
+                f = [
+                    1,
+                ]
+
+        g = [1,]
+        "###);
+        assert_snapshot!(format(source, (6, 0), (6, 10)), @r###"
+        import     random
+        if random.random() < 0.5:
+                a = [
+                    1,
+                ]
+                b = [
+                    1,
+                ]
+            elif random.random() < 0.75:
+                c = [
+                    1,
+                ]
+                d = [
+                    1,
+                ]
+            else:
+                e = [
+                    1,
+                ]
+                f = [
+                    1,
+                ]
+
+        g = [1,]
+        "###);
+        assert_snapshot!(format(source, (9, 0), (9, 10)), @r###"
+        import     random
+        if random.random() < 0.5:
+                a = [
+                    1,
+                ]
+                b = [
+                    1,
+                ]
+            elif random.random() < 0.75:
+                c = [
+                    1,
+                ]
+                d = [
+                    1,
+                ]
+            else:
+                e = [
+                    1,
+                ]
+                f = [
+                    1,
+                ]
+
+        g = [1,]
+        "###);
+        assert_snapshot!(format(source, (3, 0), (6, 10)), @r###"
+        import     random
+        if random.random() < 0.5:
+                a = [
+                    1,
+                ]
+                b = [
+                    1,
+                ]
+            elif random.random() < 0.75:
+                c = [
+                    1,
+                ]
+                d = [
+                    1,
+                ]
+            else:
+                e = [
+                    1,
+                ]
+                f = [
+                    1,
+                ]
+
+        g = [1,]
+        "###);
+    }
+
+    // TODO
+    #[test]
+    fn test_trailing_comment() {
+        assert_snapshot!(format(indoc! {r#"
+        if True:
+            a = [1,]
+            # trailing comment
+        "#}, (1, 3), (2, 5)), @r###"
+        if True:
+                a = [
+                    1,
+                ]
+
+            # trailing comment
+        "###);
+    }
+
+    // TODO
+    #[test]
+    fn test_alternative_indent() {
+        assert_snapshot!(format(indoc! {r#"
+        if True:
+          a = [1,]
+          b = [1,]
+          c = [1,]
+        "#}, (1, 3), (2, 5)), @r###"
+        if True:
+          a = [
+                1,
+            ]
+            b = [
+                1,
+            ]
+          c = [1,]
+        "###);
+    }
+}
--- a/crates/ruff_python_formatter/tests/fixtures.rs
+++ b/crates/ruff_python_formatter/tests/fixtures.rs
@@ -1,5 +1,5 @@
 use ruff_formatter::FormatOptions;
-use ruff_python_formatter::{format_module, PyFormatOptions};
+use ruff_python_formatter::{format_module_source, PyFormatOptions};
 use similar::TextDiff;
 use std::fmt::{Formatter, Write};
 use std::io::BufReader;
@@ -20,7 +20,7 @@ fn black_compatibility() {
            PyFormatOptions::from_extension(input_path)
        };

-        let printed = format_module(&content, options.clone()).unwrap_or_else(|err| {
+        let printed = format_module_source(&content, options.clone()).unwrap_or_else(|err| {
            panic!(
                "Formatting of {} to succeed but encountered error {err}",
                input_path.display()
@@ -107,7 +107,8 @@ fn format() {
        let content = fs::read_to_string(input_path).unwrap();

        let options = PyFormatOptions::from_extension(input_path);
-        let printed = format_module(&content, options.clone()).expect("Formatting to succeed");
+        let printed =
+            format_module_source(&content, options.clone()).expect("Formatting to succeed");
        let formatted_code = printed.as_code();

        ensure_stability_when_formatting_twice(formatted_code, options.clone(), input_path);
@@ -124,7 +125,7 @@ fn format() {

            for (i, options) in options.into_iter().enumerate() {
                let printed =
-                    format_module(&content, options.clone()).expect("Formatting to succeed");
+                    format_module_source(&content, options.clone()).expect("Formatting to succeed");
                let formatted_code = printed.as_code();

                ensure_stability_when_formatting_twice(formatted_code, options.clone(), input_path);
@@ -139,7 +140,8 @@ fn format() {
                .unwrap();
            }
        } else {
-            let printed = format_module(&content, options.clone()).expect("Formatting to succeed");
+            let printed =
+                format_module_source(&content, options.clone()).expect("Formatting to succeed");
            let formatted_code = printed.as_code();

            ensure_stability_when_formatting_twice(formatted_code, options, input_path);
@@ -174,7 +176,7 @@ fn ensure_stability_when_formatting_twice(
    options: PyFormatOptions,
    input_path: &Path,
 ) {
-    let reformatted = match format_module(formatted_code, options) {
+    let reformatted = match format_module_source(formatted_code, options) {
        Ok(reformatted) => reformatted,
        Err(err) => {
            panic!(
--- a/crates/ruff_python_index/src/comment_ranges.rs
+++ b/crates/ruff_python_index/src/comment_ranges.rs
@@ -1,6 +1,7 @@
 use std::fmt::Debug;

-use ruff_python_parser::Tok;
+use ruff_python_parser::lexer::{lex, LexicalError};
+use ruff_python_parser::{Mode, Tok};
 use ruff_python_trivia::CommentRanges;
 use ruff_text_size::TextRange;

@@ -20,3 +21,21 @@ impl CommentRangesBuilder {
        CommentRanges::new(self.ranges)
    }
 }
+
+/// Helper method to lex and extract comment ranges
+pub fn tokens_and_ranges(
+    source: &str,
+) -> Result<(Vec<(Tok, TextRange)>, CommentRanges), LexicalError> {
+    let mut tokens = Vec::new();
+    let mut comment_ranges = CommentRangesBuilder::default();
+
+    for result in lex(source, Mode::Module) {
+        let (token, range) = result?;
+
+        comment_ranges.visit_token(&token, range);
+        tokens.push((token, range));
+    }
+
+    let comment_ranges = comment_ranges.finish();
+    Ok((tokens, comment_ranges))
+}
--- a/crates/ruff_python_index/src/lib.rs
+++ b/crates/ruff_python_index/src/lib.rs
@@ -1,5 +1,5 @@
 mod comment_ranges;
 mod indexer;

-pub use comment_ranges::CommentRangesBuilder;
+pub use comment_ranges::{tokens_and_ranges, CommentRangesBuilder};
 pub use indexer::Indexer;
--- a/crates/ruff_python_parser/src/lib.rs
+++ b/crates/ruff_python_parser/src/lib.rs
@@ -110,8 +110,8 @@
 //! [lexer]: crate::lexer

 pub use parser::{
-    parse, parse_expression, parse_expression_starts_at, parse_program, parse_starts_at,
-    parse_suite, parse_tokens, ParseError, ParseErrorType,
+    parse, parse_expression, parse_expression_starts_at, parse_ok_tokens, parse_program,
+    parse_starts_at, parse_suite, parse_tokens, ParseError, ParseErrorType,
 };
 use ruff_python_ast::{CmpOp, Expr, Mod, PySourceType, Suite};
 use ruff_text_size::{Ranged, TextRange, TextSize};
--- a/crates/ruff_python_parser/src/parser.rs
+++ b/crates/ruff_python_parser/src/parser.rs
@@ -18,7 +18,7 @@ use itertools::Itertools;
 pub(super) use lalrpop_util::ParseError as LalrpopError;
 use ruff_text_size::{TextRange, TextSize};

-use crate::lexer::{lex, lex_starts_at};
+use crate::lexer::{lex, lex_starts_at, Spanned};
 use crate::{
    lexer::{self, LexResult, LexicalError, LexicalErrorType},
    python,
@@ -159,7 +159,7 @@ pub fn parse_expression_starts_at(
 /// let program = parse(source, Mode::Ipython, "<embedded>");
 /// assert!(program.is_ok());
 /// ```
-pub fn parse(source: &str, mode: Mode, source_path: &str) -> Result<ast::Mod, ParseError> {
+pub fn parse(source: &str, mode: Mode, source_path: &str) -> Result<Mod, ParseError> {
    parse_starts_at(source, mode, source_path, TextSize::default())
 }

@@ -191,7 +191,7 @@ pub fn parse_starts_at(
    mode: Mode,
    source_path: &str,
    offset: TextSize,
-) -> Result<ast::Mod, ParseError> {
+) -> Result<Mod, ParseError> {
    let lxr = lexer::lex_starts_at(source, mode, offset);
    parse_tokens(lxr, mode, source_path)
 }
@@ -215,7 +215,7 @@ pub fn parse_tokens(
    lxr: impl IntoIterator<Item = LexResult>,
    mode: Mode,
    source_path: &str,
-) -> Result<ast::Mod, ParseError> {
+) -> Result<Mod, ParseError> {
    let lxr = lxr.into_iter();

    parse_filtered_tokens(
@@ -225,19 +225,35 @@ pub fn parse_tokens(
    )
 }

+/// Parse tokens into an AST like [`parse_tokens`], but we already know all tokens are valid.
+pub fn parse_ok_tokens(
+    lxr: impl IntoIterator<Item = Spanned>,
+    mode: Mode,
+    source_path: &str,
+) -> Result<Mod, ParseError> {
+    let lxr = lxr
+        .into_iter()
+        .filter(|(tok, _)| !matches!(tok, Tok::Comment { .. } | Tok::NonLogicalNewline));
+    let marker_token = (Tok::start_marker(mode), TextRange::default());
+    let lexer = iter::once(marker_token)
+        .chain(lxr)
+        .map(|(t, range)| (range.start(), t, range.end()));
+    python::TopParser::new()
+        .parse(mode, lexer)
+        .map_err(|e| parse_error_from_lalrpop(e, source_path))
+}
+
 fn parse_filtered_tokens(
    lxr: impl IntoIterator<Item = LexResult>,
    mode: Mode,
    source_path: &str,
-) -> Result<ast::Mod, ParseError> {
+) -> Result<Mod, ParseError> {
    let marker_token = (Tok::start_marker(mode), TextRange::default());
    let lexer = iter::once(Ok(marker_token)).chain(lxr);
    python::TopParser::new()
        .parse(
            mode,
-            lexer
-                .into_iter()
-                .map_ok(|(t, range)| (range.start(), t, range.end())),
+            lexer.map_ok(|(t, range)| (range.start(), t, range.end())),
        )
        .map_err(|e| parse_error_from_lalrpop(e, source_path))
 }
--- a/crates/ruff_source_file/src/locator.rs
+++ b/crates/ruff_source_file/src/locator.rs
@@ -441,6 +441,75 @@ impl<'a> Locator<'a> {
        }
    }

+    /// Compute the byte offset from zero-indexed row and column indices.
+    ///
+    /// We get row and column from the LSP. E.g.
+    /// ```text
+    /// a=(1,2,)
+    /// b=(3,4,)
+    ///   ^
+    /// c=(5,6,)
+    /// ```
+    /// has coordinates `1:2`. Note that indices are computed in chars, e.g.
+    /// ```text
+    /// a=(1,2,)
+    /// "안녕"
+    ///    ^
+    /// ```
+    /// where the first syllable is a single character (two bytes), we get `1:2`, while for
+    /// ```text
+    /// a=(1,2,)
+    /// "감기"
+    ///    ^
+    /// ```
+    /// where the first syllable is three characters (three times two bytes), we get `1:4`.
+    ///
+    /// ```rust
+    /// # use ruff_text_size::{Ranged, TextLen, TextRange, TextSize};
+    /// # use ruff_source_file::Locator;
+    ///
+    /// let source = "a=(1,2,)\nb=(3,4,)";
+    /// let locator = Locator::new(source);
+    /// let offset = locator.convert_row_and_column(1, 2).unwrap();
+    /// assert_eq!(&source[TextRange::new(offset, source.text_len())], "(3,4,)");
+    ///
+    /// let source = "a=(1,2,)\n'안녕'";
+    /// let locator = Locator::new(source);
+    /// let offset = locator.convert_row_and_column(1, 2).unwrap();
+    /// assert_eq!(&source[TextRange::new(offset, source.text_len())], "녕'");
+    ///
+    /// let source = "a=(1,2,)\n'감기'";
+    /// let locator = Locator::new(source);
+    /// let offset = locator.convert_row_and_column(1, 4).unwrap();
+    /// assert_eq!(&source[TextRange::new(offset, source.text_len())], "기'");
+    /// ```
+    pub fn convert_row_and_column(&self, row: usize, column: usize) -> Option<TextSize> {
+        let line_start = *self.to_index().line_starts().get(row)?;
+        let next_line_start = self
+            .to_index()
+            .line_starts()
+            .get(row + 1)
+            .copied()
+            .unwrap_or(self.contents.text_len());
+        let line_contents = &self.contents[TextRange::from(line_start..next_line_start)];
+        debug_assert!(
+            line_contents
+                .chars()
+                // Since the range goes to the next line start, `line_contents` contains the line
+                // break
+                .take_while(|c| *c != '\n' && *c != '\r')
+                .count()
+                >= column,
+            "The column is not in the line"
+        );
+        let len_in_line: TextSize = line_contents
+            .chars()
+            .take(column)
+            .map(TextLen::text_len)
+            .sum();
+        Some(line_start + len_in_line)
+    }
+
    /// Take the source code between the given [`TextRange`].
    #[inline]
    pub fn slice<T: Ranged>(&self, ranged: T) -> &'a str {
--- a/crates/ruff_wasm/src/lib.rs
+++ b/crates/ruff_wasm/src/lib.rs
@@ -14,7 +14,7 @@ use ruff_linter::settings::{flags, DUMMY_VARIABLE_RGX, PREFIXES};
 use ruff_linter::source_kind::SourceKind;
 use ruff_python_ast::{Mod, PySourceType};
 use ruff_python_codegen::Stylist;
-use ruff_python_formatter::{format_node, pretty_comments, PyFormatContext};
+use ruff_python_formatter::{format_module_ast, pretty_comments, PyFormatContext};
 use ruff_python_index::{CommentRangesBuilder, Indexer};
 use ruff_python_parser::lexer::LexResult;
 use ruff_python_parser::{parse_tokens, AsMode, Mode};
@@ -305,7 +305,7 @@ impl<'a> ParsedModule<'a> {
            .formatter
            .to_format_options(PySourceType::default());

-        format_node(
+        format_module_ast(
            &self.module,
            &self.comment_ranges,
            self.source_code,
Author	SHA1	Message	Date
konstin	de239ace74	Add test cases	2023-09-22 15:50:34 +02:00
konstin	53b5121f30	.	2023-09-21 16:58:54 +02:00
konstin	be93983e8e	Basic nested range formatting	2023-09-21 15:23:17 +02:00
konstin	a1239b8f2d	Make basic range formatting work in vs code	2023-09-21 13:38:05 +02:00
konstin	d1b12acb3c	Add byte offset indexing	2023-09-21 13:37:52 +02:00
konstin	1aabf59f77	Formatter and parser refactoring I got confused and refactored a bit	2023-09-21 13:28:59 +02:00