Compare commits

...

6 Commits

Author SHA1 Message Date
konstin
de239ace74 Add test cases 2023-09-22 15:50:34 +02:00
konstin
53b5121f30 . 2023-09-21 16:58:54 +02:00
konstin
be93983e8e Basic nested range formatting 2023-09-21 15:23:17 +02:00
konstin
a1239b8f2d Make basic range formatting work in vs code 2023-09-21 13:38:05 +02:00
konstin
d1b12acb3c Add byte offset indexing 2023-09-21 13:37:52 +02:00
konstin
1aabf59f77 Formatter and parser refactoring
I got confused and refactored a bit
2023-09-21 13:28:59 +02:00
21 changed files with 703 additions and 152 deletions

1
Cargo.lock generated
View File

@@ -2322,6 +2322,7 @@ dependencies = [
"bitflags 2.4.0",
"clap",
"countme",
"indoc",
"insta",
"itertools 0.11.0",
"memchr",

View File

@@ -4,7 +4,7 @@ use ruff_benchmark::criterion::{
criterion_group, criterion_main, BenchmarkId, Criterion, Throughput,
};
use ruff_benchmark::{TestCase, TestFile, TestFileDownloadError};
use ruff_python_formatter::{format_node, PyFormatOptions};
use ruff_python_formatter::{format_module_ast, PyFormatOptions};
use ruff_python_index::CommentRangesBuilder;
use ruff_python_parser::lexer::lex;
use ruff_python_parser::{parse_tokens, Mode};
@@ -65,13 +65,14 @@ fn benchmark_formatter(criterion: &mut Criterion) {
let comment_ranges = comment_ranges.finish();
// Parse the AST.
let python_ast = parse_tokens(tokens, Mode::Module, "<filename>")
let module = parse_tokens(tokens, Mode::Module, "<filename>")
.expect("Input to be a valid python program");
b.iter(|| {
let options = PyFormatOptions::from_extension(Path::new(case.name()));
let formatted = format_node(&python_ast, &comment_ranges, case.code(), options)
.expect("Formatting to succeed");
let formatted =
format_module_ast(&module, &comment_ranges, case.code(), options)
.expect("Formatting to succeed");
formatted.print().expect("Printing to succeed")
});

View File

@@ -11,6 +11,7 @@ use ruff_linter::settings::types::{
FilePattern, PatternPrefixPair, PerFileIgnore, PreviewMode, PythonVersion, SerializationFormat,
};
use ruff_linter::{RuleParser, RuleSelector, RuleSelectorParser};
use ruff_python_formatter::LspRowColumn;
use ruff_workspace::configuration::{Configuration, RuleSelection};
use ruff_workspace::resolver::ConfigurationTransformer;
@@ -395,6 +396,14 @@ pub struct FormatCommand {
preview: bool,
#[clap(long, overrides_with("preview"), hide = true)]
no_preview: bool,
/// Range formatting start: Zero-indexed row and zero-indexed char-based column separated by
/// colon, e.g. `1:2`
#[clap(long)]
pub start: Option<LspRowColumn>,
/// Range formatting end: Zero-indexed row and zero-indexed char-based column separated by
/// colon, e.g. `3:4`
#[clap(long)]
pub end: Option<LspRowColumn>,
}
#[derive(Debug, Clone, Copy, clap::ValueEnum)]
@@ -516,6 +525,8 @@ impl FormatCommand {
files: self.files,
isolated: self.isolated,
stdin_filename: self.stdin_filename,
start: self.start,
end: self.end,
},
CliOverrides {
line_length: self.line_length,
@@ -572,6 +583,8 @@ pub struct FormatArguments {
pub files: Vec<PathBuf>,
pub isolated: bool,
pub stdin_filename: Option<PathBuf>,
pub start: Option<LspRowColumn>,
pub end: Option<LspRowColumn>,
}
/// CLI settings that function as configuration overrides.

View File

@@ -15,7 +15,7 @@ use ruff_linter::fs;
use ruff_linter::logging::LogLevel;
use ruff_linter::warn_user_once;
use ruff_python_ast::{PySourceType, SourceType};
use ruff_python_formatter::{format_module, FormatModuleError, PyFormatOptions};
use ruff_python_formatter::{format_module_source, FormatModuleError, PyFormatOptions};
use ruff_source_file::{find_newline, LineEnding};
use ruff_workspace::resolver::python_files_in_path;
@@ -153,7 +153,7 @@ fn format_path(
let options = options.with_line_ending(line_ending);
let formatted = format_module(&unformatted, options)
let formatted = format_module_source(&unformatted, options)
.map_err(|err| FormatCommandError::FormatModule(Some(path.to_path_buf()), err))?;
let formatted = formatted.as_code();

View File

@@ -5,7 +5,9 @@ use anyhow::Result;
use log::warn;
use ruff_python_ast::PySourceType;
use ruff_python_formatter::{format_module, PyFormatOptions};
use ruff_python_formatter::{
format_module_source, format_module_source_range, LspRowColumn, PyFormatOptions,
};
use ruff_workspace::resolver::python_file_at_path;
use crate::args::{CliOverrides, FormatArguments};
@@ -42,7 +44,7 @@ pub(crate) fn format_stdin(cli: &FormatArguments, overrides: &CliOverrides) -> R
.formatter
.to_format_options(path.map(PySourceType::from).unwrap_or_default());
match format_source(path, options, mode) {
match format_source(path, options, mode, cli.start, cli.end) {
Ok(result) => match mode {
FormatMode::Write => Ok(ExitStatus::Success),
FormatMode::Check => {
@@ -65,12 +67,21 @@ fn format_source(
path: Option<&Path>,
options: PyFormatOptions,
mode: FormatMode,
start: Option<LspRowColumn>,
end: Option<LspRowColumn>,
) -> Result<FormatCommandResult, FormatCommandError> {
let unformatted = read_from_stdin()
.map_err(|err| FormatCommandError::Read(path.map(Path::to_path_buf), err))?;
let formatted = format_module(&unformatted, options)
.map_err(|err| FormatCommandError::FormatModule(path.map(Path::to_path_buf), err))?;
let formatted = formatted.as_code();
let formatted = if start.is_some() || end.is_some() {
let formatted = format_module_source_range(&unformatted, options, start, end)
.map_err(|err| FormatCommandError::FormatModule(path.map(Path::to_path_buf), err))?;
formatted
} else {
let formatted = format_module_source(&unformatted, options)
.map_err(|err| FormatCommandError::FormatModule(path.map(Path::to_path_buf), err))?;
let formatted = formatted.as_code();
formatted.to_string()
};
if formatted.len() == unformatted.len() && formatted == unformatted {
Ok(FormatCommandResult::Unchanged)
} else {

View File

@@ -34,7 +34,7 @@ use ruff_formatter::{FormatError, LineWidth, PrintError};
use ruff_linter::logging::LogLevel;
use ruff_linter::settings::types::{FilePattern, FilePatternSet};
use ruff_python_formatter::{
format_module, FormatModuleError, MagicTrailingComma, PyFormatOptions,
format_module_source, FormatModuleError, MagicTrailingComma, PyFormatOptions,
};
use ruff_workspace::resolver::{python_files_in_path, PyprojectConfig, Resolver};
@@ -800,7 +800,7 @@ fn format_dev_file(
let content = fs::read_to_string(input_path)?;
#[cfg(not(debug_assertions))]
let start = Instant::now();
let printed = match format_module(&content, options.clone()) {
let printed = match format_module_source(&content, options.clone()) {
Ok(printed) => printed,
Err(err @ (FormatModuleError::LexError(_) | FormatModuleError::ParseError(_))) => {
return Err(CheckFileError::SyntaxErrorInInput(err));
@@ -827,7 +827,7 @@ fn format_dev_file(
}
if stability_check {
let reformatted = match format_module(formatted, options) {
let reformatted = match format_module_source(formatted, options) {
Ok(reformatted) => reformatted,
Err(err @ (FormatModuleError::LexError(_) | FormatModuleError::ParseError(_))) => {
return Err(CheckFileError::SyntaxErrorInOutput {

View File

@@ -1,6 +1,5 @@
use crate::prelude::TagKind;
use crate::GroupId;
use ruff_text_size::TextRange;
use std::error::Error;
#[derive(Debug, PartialEq, Eq, Copy, Clone)]
@@ -12,7 +11,7 @@ pub enum FormatError {
SyntaxError { message: &'static str },
/// In case range formatting failed because the provided range was larger
/// than the formatted syntax tree
RangeError { input: TextRange, tree: TextRange },
RangeError { row: usize, col: usize },
/// In case printing the document failed because it has an invalid structure.
InvalidDocument(InvalidDocumentError),
@@ -32,9 +31,9 @@ impl std::fmt::Display for FormatError {
FormatError::SyntaxError {message} => {
std::write!(fmt, "syntax error: {message}")
},
FormatError::RangeError { input, tree } => std::write!(
FormatError::RangeError { row, col } => std::write!(
fmt,
"formatting range {input:?} is larger than syntax tree {tree:?}"
"formatting range {row}:{col} is not a valid index"
),
FormatError::InvalidDocument(error) => std::write!(fmt, "Invalid document: {error}\n\n This is an internal Rome error. Please report if necessary."),
FormatError::PoorLayout => {

View File

@@ -334,7 +334,7 @@ macro_rules! best_fitting {
$crate::BestFitting::from_arguments_unchecked($crate::format_args!($least_expanded, $($tail),+))
}
}}
}
}
#[cfg(test)]
mod tests {

View File

@@ -43,6 +43,7 @@ insta = { workspace = true, features = ["glob"] }
serde = { workspace = true }
serde_json = { workspace = true }
similar = { workspace = true }
indoc = "2.0.4"
[[test]]
name = "ruff_python_formatter_fixtures"

View File

@@ -2,17 +2,17 @@
use std::path::{Path, PathBuf};
use anyhow::{bail, Context, Result};
use anyhow::{format_err, Context, Result};
use clap::{command, Parser, ValueEnum};
use ruff_formatter::SourceCode;
use ruff_python_index::CommentRangesBuilder;
use ruff_python_parser::lexer::lex;
use ruff_python_parser::{parse_tokens, Mode};
use ruff_text_size::Ranged;
use ruff_python_index::tokens_and_ranges;
use ruff_python_parser::{parse_ok_tokens, Mode};
use ruff_source_file::Locator;
use ruff_text_size::{Ranged, TextLen, TextRange, TextSize};
use crate::comments::collect_comments;
use crate::{format_node, PyFormatOptions};
use crate::{format_module_ast, format_module_range, PyFormatOptions};
#[derive(ValueEnum, Clone, Debug)]
pub enum Emit {
@@ -37,38 +37,46 @@ pub struct Cli {
pub print_ir: bool,
#[clap(long)]
pub print_comments: bool,
/// byte offset for range formatting
#[clap(long)]
pub start: Option<u32>,
/// byte offset for range formatting
#[clap(long)]
pub end: Option<u32>,
}
pub fn format_and_debug_print(input: &str, cli: &Cli, source_type: &Path) -> Result<String> {
let mut tokens = Vec::new();
let mut comment_ranges = CommentRangesBuilder::default();
pub fn format_and_debug_print(source: &str, cli: &Cli, source_type: &Path) -> Result<String> {
let (tokens, comment_ranges) = tokens_and_ranges(source)
.map_err(|err| format_err!("Source contains syntax errors {err:?}"))?;
let module =
parse_ok_tokens(tokens, Mode::Module, "<filename>").context("Syntax error in input")?;
let options = PyFormatOptions::from_extension(source_type);
let source_code = SourceCode::new(source);
let locator = Locator::new(source);
for result in lex(input, Mode::Module) {
let (token, range) = match result {
Ok((token, range)) => (token, range),
Err(err) => bail!("Source contains syntax errors {err:?}"),
};
comment_ranges.visit_token(&token, range);
tokens.push(Ok((token, range)));
if cli.start.is_some() || cli.end.is_some() {
let range = TextRange::new(
cli.start.map(TextSize::new).unwrap_or_default(),
cli.end.map(TextSize::new).unwrap_or(source.text_len()),
);
return Ok(format_module_range(
&module,
&comment_ranges,
source,
options,
&locator,
range,
)?);
}
let comment_ranges = comment_ranges.finish();
// Parse the AST.
let python_ast =
parse_tokens(tokens, Mode::Module, "<filename>").context("Syntax error in input")?;
let options = PyFormatOptions::from_extension(source_type);
let formatted = format_node(&python_ast, &comment_ranges, input, options)
let formatted = format_module_ast(&module, &comment_ranges, source, options)
.context("Failed to format node")?;
if cli.print_ir {
println!("{}", formatted.document().display(SourceCode::new(input)));
println!("{}", formatted.document().display(source_code));
}
if cli.print_comments {
// Print preceding, following and enclosing nodes
let source_code = SourceCode::new(input);
let decorated_comments = collect_comments(&python_ast, source_code, &comment_ranges);
let decorated_comments = collect_comments(&module, source_code, &comment_ranges);
if !decorated_comments.is_empty() {
println!("# Comment decoration: Range, Preceding, Following, Enclosing, Comment");
}
@@ -86,13 +94,10 @@ pub fn format_and_debug_print(input: &str, cli: &Cli, source_type: &Path) -> Res
comment.enclosing_node().kind(),
comment.enclosing_node().range()
),
comment.slice().text(SourceCode::new(input)),
comment.slice().text(source_code),
);
}
println!(
"{:#?}",
formatted.context().comments().debug(SourceCode::new(input))
);
println!("{:#?}", formatted.context().comments().debug(source_code));
}
Ok(formatted
.print()

View File

@@ -549,9 +549,9 @@ mod tests {
use ruff_formatter::SourceCode;
use ruff_python_ast::Mod;
use ruff_python_index::CommentRangesBuilder;
use ruff_python_parser::lexer::lex;
use ruff_python_parser::{parse_tokens, Mode};
use ruff_python_index::tokens_and_ranges;
use ruff_python_parser::{parse_ok_tokens, Mode};
use ruff_python_trivia::CommentRanges;
use crate::comments::Comments;
@@ -563,19 +563,11 @@ mod tests {
}
impl<'a> CommentsTestCase<'a> {
fn from_code(code: &'a str) -> Self {
let source_code = SourceCode::new(code);
let tokens: Vec<_> = lex(code, Mode::Module).collect();
let mut comment_ranges = CommentRangesBuilder::default();
for (token, range) in tokens.iter().flatten() {
comment_ranges.visit_token(token, *range);
}
let comment_ranges = comment_ranges.finish();
let parsed = parse_tokens(tokens, Mode::Module, "test.py")
fn from_code(source: &'a str) -> Self {
let source_code = SourceCode::new(source);
let (tokens, comment_ranges) =
tokens_and_ranges(source).expect("Expect source to be valid Python");
let parsed = parse_ok_tokens(tokens, Mode::Module, "test.py")
.expect("Expect source to be valid Python");
CommentsTestCase {

View File

@@ -1,21 +1,27 @@
use std::iter;
use std::str::FromStr;
use thiserror::Error;
use tracing::Level;
use tracing::{warn, Level};
use ruff_formatter::prelude::*;
use ruff_formatter::{format, FormatError, Formatted, PrintError, Printed, SourceCode};
use ruff_python_ast::node::AstNode;
use ruff_python_ast::Mod;
use ruff_python_index::CommentRangesBuilder;
use ruff_python_parser::lexer::{lex, LexicalError};
use ruff_python_parser::{parse_tokens, Mode, ParseError};
use ruff_python_trivia::CommentRanges;
use ruff_python_ast::{
Mod, Stmt, StmtClassDef, StmtFor, StmtFunctionDef, StmtIf, StmtWhile, StmtWith,
};
use ruff_python_index::tokens_and_ranges;
use ruff_python_parser::lexer::LexicalError;
use ruff_python_parser::{parse_ok_tokens, Mode, ParseError};
use ruff_python_trivia::{is_python_whitespace, CommentRanges};
use ruff_source_file::Locator;
use ruff_text_size::{Ranged, TextLen, TextRange, TextSize};
use crate::comments::{
dangling_comments, leading_comments, trailing_comments, Comments, SourceComment,
};
pub use crate::context::PyFormatContext;
pub use crate::options::{MagicTrailingComma, PreviewMode, PyFormatOptions, QuoteStyle};
use crate::statement::suite::SuiteKind;
use crate::verbatim::suppressed_node;
pub use settings::FormatterSettings;
@@ -30,6 +36,7 @@ mod options;
pub(crate) mod other;
pub(crate) mod pattern;
mod prelude;
mod range_formatting;
mod settings;
pub(crate) mod statement;
pub(crate) mod type_param;
@@ -123,61 +130,260 @@ impl From<ParseError> for FormatModuleError {
}
}
#[tracing::instrument(level = Level::TRACE, skip_all)]
pub fn format_module(
contents: &str,
#[tracing::instrument(name = "format", level = Level::TRACE, skip_all)]
pub fn format_module_source(
source: &str,
options: PyFormatOptions,
) -> Result<Printed, FormatModuleError> {
// Tokenize once
let mut tokens = Vec::new();
let mut comment_ranges = CommentRangesBuilder::default();
for result in lex(contents, Mode::Module) {
let (token, range) = result?;
comment_ranges.visit_token(&token, range);
tokens.push(Ok((token, range)));
}
let comment_ranges = comment_ranges.finish();
// Parse the AST.
let python_ast = parse_tokens(tokens, Mode::Module, "<filename>")?;
let formatted = format_node(&python_ast, &comment_ranges, contents, options)?;
let (tokens, comment_ranges) = tokens_and_ranges(source)?;
let module = parse_ok_tokens(tokens, Mode::Module, "<filename>")?;
let formatted = format_module_ast(&module, &comment_ranges, source, options)?;
Ok(formatted.print()?)
}
pub fn format_node<'a>(
root: &'a Mod,
/// Range formatting coordinate: Zero-indexed row and zero-indexed char-based column separated by
/// colon, e.g. `1:2`.
///
/// See [`Locator::convert_row_and_column`] for details on the semantics.
#[derive(Copy, Clone, Debug, Default)]
pub struct LspRowColumn {
row: usize,
col: usize,
}
impl FromStr for LspRowColumn {
type Err = &'static str;
fn from_str(s: &str) -> Result<Self, Self::Err> {
let Some((row, col)) = s.split_once(':') else {
return Err("Coordinate is missing a colon, the format is `<row>:<column>`");
};
Ok(LspRowColumn {
row: row.parse().map_err(|_| "row must be a number")?,
col: col.parse().map_err(|_| "col must be a number")?,
})
}
}
#[tracing::instrument(name = "format", level = Level::TRACE, skip_all)]
pub fn format_module_source_range(
source: &str,
options: PyFormatOptions,
start: Option<LspRowColumn>,
end: Option<LspRowColumn>,
) -> Result<String, FormatModuleError> {
let (tokens, comment_ranges) = tokens_and_ranges(source)?;
let module = parse_ok_tokens(tokens, Mode::Module, "<filename>")?;
let locator = Locator::new(source);
let start = if let Some(start) = start {
locator
.convert_row_and_column(start.row, start.col)
.ok_or(FormatError::RangeError {
row: start.row,
col: start.col,
})?
} else {
TextSize::default()
};
let end = if let Some(end) = end {
locator
.convert_row_and_column(end.row, end.col)
.ok_or(FormatError::RangeError {
row: end.row,
col: end.col,
})?
} else {
source.text_len()
};
let formatted = format_module_range(
&module,
&comment_ranges,
source,
options,
&locator,
TextRange::new(start, end),
)?;
Ok(formatted)
}
pub fn format_module_ast<'a>(
module: &'a Mod,
comment_ranges: &'a CommentRanges,
source: &'a str,
options: PyFormatOptions,
) -> FormatResult<Formatted<PyFormatContext<'a>>> {
let comments = Comments::from_ast(root, SourceCode::new(source), comment_ranges);
let source_code = SourceCode::new(source);
let comments = Comments::from_ast(module, source_code, comment_ranges);
let locator = Locator::new(source);
let formatted = format!(
PyFormatContext::new(options, locator.contents(), comments),
[root.format()]
[module.format()]
)?;
formatted
.context()
.comments()
.assert_all_formatted(SourceCode::new(source));
.assert_all_formatted(source_code);
Ok(formatted)
}
/// Public function for generating a printable string of the debug comments.
pub fn pretty_comments(root: &Mod, comment_ranges: &CommentRanges, source: &str) -> String {
let comments = Comments::from_ast(root, SourceCode::new(source), comment_ranges);
/// Is range inside the body of a node, if we consider the whitespace surrounding the suite as part
/// of the body?
///
/// TODO: Handle leading comments on the first statement
fn range_in_body(suite: &[Stmt], range: TextRange, source: &str) -> bool {
let suite_start = suite.first().unwrap().start();
let suite_end = suite.last().unwrap().end();
std::format!(
"{comments:#?}",
comments = comments.debug(SourceCode::new(source))
)
if range.start() < suite_start
// Extend the range include all whitespace prior to the first statement
&& !source[TextRange::new(range.start(), suite_start)]
.chars()
.all(|c| is_python_whitespace(c))
{
return false;
}
if range.end() > suite_end
// Extend the range include all whitespace after to the last statement
&& !source[TextRange::new(suite_end,range.end())]
.chars()
.all(|c| is_python_whitespace(c))
{
return false;
}
true
}
pub fn format_module_range<'a>(
module: &'a Mod,
comment_ranges: &'a CommentRanges,
source: &'a str,
options: PyFormatOptions,
locator: &Locator<'a>,
range: TextRange,
) -> FormatResult<String> {
let comments = Comments::from_ast(&module, SourceCode::new(source), &comment_ranges);
let Mod::Module(module_inner) = &module else {
panic!("That's not a module");
};
// TODO: Move this to LspRowColumn? we first count chars to then discard that anyway
// Consider someone wanted to format `print(i); print(j)`. This wouldn't work indent-wise, so
// we always do whole lines instead which means we can count indentation normally
// ```python
// if True:
// for i in range(10): j=i+1; print(i); print(j)
// ```
let range = TextRange::new(
locator.line_start(range.start()),
locator.line_end(range.end()),
);
// ```
// a = 1; b = 2; c = 3; d = 4; e = 5
// ^ b end ^ d start
// ^^^^^^^^^^^^^^^ range
// ^ range start ^ range end
// ```
// TODO: If it goes beyond the end of the last stmt or before start, do we need to format
// the parent?
let mut parent_body: &[Stmt] = module_inner.body.as_slice();
let mut in_range;
// TODO: Allow partial inclusions, e.g.
// ```python
// not_formatted = 0
// start = 1
// if cond_formatted:
// last_formatted = 2
// not_formatted_anymore = 3
// ```
// prob a slice and an optional trailing arg
let in_range = loop {
let start = parent_body.partition_point(|child| child.end() < range.start());
let end = parent_body.partition_point(|child| child.start() < range.end());
in_range = &parent_body[start..end];
let [single_stmt] = in_range else {
break in_range;
};
match single_stmt {
Stmt::For(StmtFor { body, .. })
| Stmt::While(StmtWhile { body, .. })
| Stmt::With(StmtWith { body, .. })
| Stmt::FunctionDef(StmtFunctionDef { body, .. })
| Stmt::ClassDef(StmtClassDef { body, .. }) => {
// We need to format the header or a trailing comment
// TODO: ignore trivia
if range_in_body(body, range, source) {
break in_range;
} else {
parent_body = &body;
}
}
Stmt::If(StmtIf {
body,
elif_else_clauses,
..
}) => {
let if_all_end = TextRange::new(
range.start(),
elif_else_clauses
.last()
.map(|clause| clause.body.last().unwrap().end())
.unwrap_or(body.last().unwrap().end()),
);
if !range_in_body(body, if_all_end, source) {
break in_range;
} else if let Some(body) = iter::once(body)
.chain(elif_else_clauses.iter().map(|clause| &clause.body))
.find(|body| range_in_body(body, range, source))
{
parent_body = &body;
} else {
break in_range;
}
}
// | Stmt::StmtTry(ast::StmtTry { body, .. })
// | Stmt::ExceptHandlerExceptHandler(ast::ExceptHandlerExceptHandler { body, .. })
// | Stmt::ElifElseClause(ast::ElifElseClause { body, .. }) => &body,
// match
_ => break in_range,
}
};
let (Some(first), Some(last)) = (in_range.first(), in_range.last()) else {
// TODO: Use tracing again https://github.com/tokio-rs/tracing/issues/2721
// TODO: Forward this to something proper
eprintln!("The formatting range contains no statements");
return Ok(source.to_string());
};
let mut buffer = source[TextRange::up_to(first.start())].to_string();
let formatted: Formatted<PyFormatContext> = format!(
PyFormatContext::new(options.clone(), locator.contents(), comments),
// TODO: Make suite formatting accept slices
[in_range.to_vec().format().with_options(SuiteKind::TopLevel)]
)?;
//println!("{}", formatted.document().display(SourceCode::new(source)));
// TODO: Make the printer use the buffer instead
buffer += formatted.print_with_indent(1)?.as_code();
buffer += &source[TextRange::new(last.end(), source.text_len())];
return Ok(buffer.to_string());
}
/// Public function for generating a printable string of the debug comments.
pub fn pretty_comments(module: &Mod, comment_ranges: &CommentRanges, source: &str) -> String {
let source_code = SourceCode::new(source);
let comments = Comments::from_ast(module, source_code, comment_ranges);
std::format!("{comments:#?}", comments = comments.debug(source_code))
}
#[cfg(test)]
@@ -187,11 +393,11 @@ mod tests {
use anyhow::Result;
use insta::assert_snapshot;
use ruff_python_index::CommentRangesBuilder;
use ruff_python_parser::lexer::lex;
use ruff_python_parser::{parse_tokens, Mode};
use ruff_python_index::tokens_and_ranges;
use crate::{format_module, format_node, PyFormatOptions};
use ruff_python_parser::{parse_ok_tokens, Mode};
use crate::{format_module_ast, format_module_source, PyFormatOptions};
/// Very basic test intentionally kept very similar to the CLI
#[test]
@@ -207,7 +413,7 @@ if True:
pass
# trailing
"#;
let actual = format_module(input, PyFormatOptions::default())?
let actual = format_module_source(input, PyFormatOptions::default())?
.as_code()
.to_string();
assert_eq!(expected, actual);
@@ -218,7 +424,7 @@ if True:
#[ignore]
#[test]
fn quick_test() {
let src = r#"
let source = r#"
def main() -> None:
if True:
some_very_long_variable_name_abcdefghijk = Foo()
@@ -228,23 +434,13 @@ def main() -> None:
]
"#;
// Tokenize once
let mut tokens = Vec::new();
let mut comment_ranges = CommentRangesBuilder::default();
for result in lex(src, Mode::Module) {
let (token, range) = result.unwrap();
comment_ranges.visit_token(&token, range);
tokens.push(Ok((token, range)));
}
let comment_ranges = comment_ranges.finish();
let (tokens, comment_ranges) = tokens_and_ranges(source).unwrap();
// Parse the AST.
let source_path = "code_inline.py";
let python_ast = parse_tokens(tokens, Mode::Module, source_path).unwrap();
let module = parse_ok_tokens(tokens, Mode::Module, source_path).unwrap();
let options = PyFormatOptions::from_extension(Path::new(source_path));
let formatted = format_node(&python_ast, &comment_ranges, src, options).unwrap();
let formatted = format_module_ast(&module, &comment_ranges, source, options).unwrap();
// Uncomment the `dbg` to print the IR.
// Use `dbg_write!(f, []) instead of `write!(f, [])` in your formatting code to print some IR

View File

@@ -25,11 +25,11 @@ fn main() -> Result<()> {
cli.emit
);
}
let input = read_from_stdin()?;
let source = read_from_stdin()?;
// It seems reasonable to give this a dummy name
let formatted = format_and_debug_print(&input, &cli, Path::new("stdin.py"))?;
let formatted = format_and_debug_print(&source, &cli, Path::new("stdin.py"))?;
if cli.check {
if formatted == input {
if formatted == source {
return Ok(());
}
bail!("Content not correctly formatted")
@@ -37,9 +37,9 @@ fn main() -> Result<()> {
stdout().lock().write_all(formatted.as_bytes())?;
} else {
for file in &cli.files {
let input = fs::read_to_string(file)
let source = fs::read_to_string(file)
.with_context(|| format!("Could not read {}: ", file.display()))?;
let formatted = format_and_debug_print(&input, &cli, file)?;
let formatted = format_and_debug_print(&source, &cli, file)?;
match cli.emit {
Some(Emit::Stdout) => stdout().lock().write_all(formatted.as_bytes())?,
None | Some(Emit::Files) => {

View File

@@ -0,0 +1,226 @@
#[cfg(test)]
mod tests {
use crate::{format_module_source_range, LspRowColumn, PyFormatOptions};
use indoc::indoc;
use insta::assert_snapshot;
fn format(source: &str, start: (usize, usize), end: (usize, usize)) -> String {
format_module_source_range(
source,
PyFormatOptions::default(),
Some(LspRowColumn {
row: start.0,
col: start.1,
}),
Some(LspRowColumn {
row: end.0,
col: end.1,
}),
)
.unwrap()
}
#[test]
fn test_top_level() {
assert_snapshot!(format(indoc! {r#"
a = [1,]
b = [1,]
c = [1,]
d = [1,]
"#}, (1, 3), (2, 5)), @r###"
a = [1,]
b = [
1,
]
c = [
1,
]
d = [1,]
"###);
}
#[test]
fn test_easy_nested() {
assert_snapshot!(format(indoc! {r#"
a = [1,]
for i in range( 1 ):
b = [1,]
c = [1,]
d = [1,]
e = [1,]
"#}, (3, 3), (3, 5)), @r###"
a = [1,]
for i in range(1):
b = [
1,
]
c = [
1,
]
d = [
1,
]
e = [1,]
"###);
}
#[test]
fn test_if() {
let source = indoc! {r#"
import random
if random.random() < 0.5:
a = [1,]
b = [1,]
elif random.random() < 0.75:
c = [1,]
d = [1,]
else:
e = [1,]
f = [1,]
g = [1,]
"#};
assert_snapshot!(format(source, (3, 0), (3, 10)), @r###"
import random
if random.random() < 0.5:
a = [
1,
]
b = [
1,
]
elif random.random() < 0.75:
c = [
1,
]
d = [
1,
]
else:
e = [
1,
]
f = [
1,
]
g = [1,]
"###);
assert_snapshot!(format(source, (6, 0), (6, 10)), @r###"
import random
if random.random() < 0.5:
a = [
1,
]
b = [
1,
]
elif random.random() < 0.75:
c = [
1,
]
d = [
1,
]
else:
e = [
1,
]
f = [
1,
]
g = [1,]
"###);
assert_snapshot!(format(source, (9, 0), (9, 10)), @r###"
import random
if random.random() < 0.5:
a = [
1,
]
b = [
1,
]
elif random.random() < 0.75:
c = [
1,
]
d = [
1,
]
else:
e = [
1,
]
f = [
1,
]
g = [1,]
"###);
assert_snapshot!(format(source, (3, 0), (6, 10)), @r###"
import random
if random.random() < 0.5:
a = [
1,
]
b = [
1,
]
elif random.random() < 0.75:
c = [
1,
]
d = [
1,
]
else:
e = [
1,
]
f = [
1,
]
g = [1,]
"###);
}
// TODO
#[test]
fn test_trailing_comment() {
assert_snapshot!(format(indoc! {r#"
if True:
a = [1,]
# trailing comment
"#}, (1, 3), (2, 5)), @r###"
if True:
a = [
1,
]
# trailing comment
"###);
}
// TODO
#[test]
fn test_alternative_indent() {
assert_snapshot!(format(indoc! {r#"
if True:
a = [1,]
b = [1,]
c = [1,]
"#}, (1, 3), (2, 5)), @r###"
if True:
a = [
1,
]
b = [
1,
]
c = [1,]
"###);
}
}

View File

@@ -1,5 +1,5 @@
use ruff_formatter::FormatOptions;
use ruff_python_formatter::{format_module, PyFormatOptions};
use ruff_python_formatter::{format_module_source, PyFormatOptions};
use similar::TextDiff;
use std::fmt::{Formatter, Write};
use std::io::BufReader;
@@ -20,7 +20,7 @@ fn black_compatibility() {
PyFormatOptions::from_extension(input_path)
};
let printed = format_module(&content, options.clone()).unwrap_or_else(|err| {
let printed = format_module_source(&content, options.clone()).unwrap_or_else(|err| {
panic!(
"Formatting of {} to succeed but encountered error {err}",
input_path.display()
@@ -107,7 +107,8 @@ fn format() {
let content = fs::read_to_string(input_path).unwrap();
let options = PyFormatOptions::from_extension(input_path);
let printed = format_module(&content, options.clone()).expect("Formatting to succeed");
let printed =
format_module_source(&content, options.clone()).expect("Formatting to succeed");
let formatted_code = printed.as_code();
ensure_stability_when_formatting_twice(formatted_code, options.clone(), input_path);
@@ -124,7 +125,7 @@ fn format() {
for (i, options) in options.into_iter().enumerate() {
let printed =
format_module(&content, options.clone()).expect("Formatting to succeed");
format_module_source(&content, options.clone()).expect("Formatting to succeed");
let formatted_code = printed.as_code();
ensure_stability_when_formatting_twice(formatted_code, options.clone(), input_path);
@@ -139,7 +140,8 @@ fn format() {
.unwrap();
}
} else {
let printed = format_module(&content, options.clone()).expect("Formatting to succeed");
let printed =
format_module_source(&content, options.clone()).expect("Formatting to succeed");
let formatted_code = printed.as_code();
ensure_stability_when_formatting_twice(formatted_code, options, input_path);
@@ -174,7 +176,7 @@ fn ensure_stability_when_formatting_twice(
options: PyFormatOptions,
input_path: &Path,
) {
let reformatted = match format_module(formatted_code, options) {
let reformatted = match format_module_source(formatted_code, options) {
Ok(reformatted) => reformatted,
Err(err) => {
panic!(

View File

@@ -1,6 +1,7 @@
use std::fmt::Debug;
use ruff_python_parser::Tok;
use ruff_python_parser::lexer::{lex, LexicalError};
use ruff_python_parser::{Mode, Tok};
use ruff_python_trivia::CommentRanges;
use ruff_text_size::TextRange;
@@ -20,3 +21,21 @@ impl CommentRangesBuilder {
CommentRanges::new(self.ranges)
}
}
/// Helper method to lex and extract comment ranges
pub fn tokens_and_ranges(
source: &str,
) -> Result<(Vec<(Tok, TextRange)>, CommentRanges), LexicalError> {
let mut tokens = Vec::new();
let mut comment_ranges = CommentRangesBuilder::default();
for result in lex(source, Mode::Module) {
let (token, range) = result?;
comment_ranges.visit_token(&token, range);
tokens.push((token, range));
}
let comment_ranges = comment_ranges.finish();
Ok((tokens, comment_ranges))
}

View File

@@ -1,5 +1,5 @@
mod comment_ranges;
mod indexer;
pub use comment_ranges::CommentRangesBuilder;
pub use comment_ranges::{tokens_and_ranges, CommentRangesBuilder};
pub use indexer::Indexer;

View File

@@ -110,8 +110,8 @@
//! [lexer]: crate::lexer
pub use parser::{
parse, parse_expression, parse_expression_starts_at, parse_program, parse_starts_at,
parse_suite, parse_tokens, ParseError, ParseErrorType,
parse, parse_expression, parse_expression_starts_at, parse_ok_tokens, parse_program,
parse_starts_at, parse_suite, parse_tokens, ParseError, ParseErrorType,
};
use ruff_python_ast::{CmpOp, Expr, Mod, PySourceType, Suite};
use ruff_text_size::{Ranged, TextRange, TextSize};

View File

@@ -18,7 +18,7 @@ use itertools::Itertools;
pub(super) use lalrpop_util::ParseError as LalrpopError;
use ruff_text_size::{TextRange, TextSize};
use crate::lexer::{lex, lex_starts_at};
use crate::lexer::{lex, lex_starts_at, Spanned};
use crate::{
lexer::{self, LexResult, LexicalError, LexicalErrorType},
python,
@@ -159,7 +159,7 @@ pub fn parse_expression_starts_at(
/// let program = parse(source, Mode::Ipython, "<embedded>");
/// assert!(program.is_ok());
/// ```
pub fn parse(source: &str, mode: Mode, source_path: &str) -> Result<ast::Mod, ParseError> {
pub fn parse(source: &str, mode: Mode, source_path: &str) -> Result<Mod, ParseError> {
parse_starts_at(source, mode, source_path, TextSize::default())
}
@@ -191,7 +191,7 @@ pub fn parse_starts_at(
mode: Mode,
source_path: &str,
offset: TextSize,
) -> Result<ast::Mod, ParseError> {
) -> Result<Mod, ParseError> {
let lxr = lexer::lex_starts_at(source, mode, offset);
parse_tokens(lxr, mode, source_path)
}
@@ -215,7 +215,7 @@ pub fn parse_tokens(
lxr: impl IntoIterator<Item = LexResult>,
mode: Mode,
source_path: &str,
) -> Result<ast::Mod, ParseError> {
) -> Result<Mod, ParseError> {
let lxr = lxr.into_iter();
parse_filtered_tokens(
@@ -225,19 +225,35 @@ pub fn parse_tokens(
)
}
/// Parse tokens into an AST like [`parse_tokens`], but we already know all tokens are valid.
pub fn parse_ok_tokens(
lxr: impl IntoIterator<Item = Spanned>,
mode: Mode,
source_path: &str,
) -> Result<Mod, ParseError> {
let lxr = lxr
.into_iter()
.filter(|(tok, _)| !matches!(tok, Tok::Comment { .. } | Tok::NonLogicalNewline));
let marker_token = (Tok::start_marker(mode), TextRange::default());
let lexer = iter::once(marker_token)
.chain(lxr)
.map(|(t, range)| (range.start(), t, range.end()));
python::TopParser::new()
.parse(mode, lexer)
.map_err(|e| parse_error_from_lalrpop(e, source_path))
}
fn parse_filtered_tokens(
lxr: impl IntoIterator<Item = LexResult>,
mode: Mode,
source_path: &str,
) -> Result<ast::Mod, ParseError> {
) -> Result<Mod, ParseError> {
let marker_token = (Tok::start_marker(mode), TextRange::default());
let lexer = iter::once(Ok(marker_token)).chain(lxr);
python::TopParser::new()
.parse(
mode,
lexer
.into_iter()
.map_ok(|(t, range)| (range.start(), t, range.end())),
lexer.map_ok(|(t, range)| (range.start(), t, range.end())),
)
.map_err(|e| parse_error_from_lalrpop(e, source_path))
}

View File

@@ -441,6 +441,75 @@ impl<'a> Locator<'a> {
}
}
/// Compute the byte offset from zero-indexed row and column indices.
///
/// We get row and column from the LSP. E.g.
/// ```text
/// a=(1,2,)
/// b=(3,4,)
/// ^
/// c=(5,6,)
/// ```
/// has coordinates `1:2`. Note that indices are computed in chars, e.g.
/// ```text
/// a=(1,2,)
/// "안녕"
/// ^
/// ```
/// where the first syllable is a single character (two bytes), we get `1:2`, while for
/// ```text
/// a=(1,2,)
/// "감기"
/// ^
/// ```
/// where the first syllable is three characters (three times two bytes), we get `1:4`.
///
/// ```rust
/// # use ruff_text_size::{Ranged, TextLen, TextRange, TextSize};
/// # use ruff_source_file::Locator;
///
/// let source = "a=(1,2,)\nb=(3,4,)";
/// let locator = Locator::new(source);
/// let offset = locator.convert_row_and_column(1, 2).unwrap();
/// assert_eq!(&source[TextRange::new(offset, source.text_len())], "(3,4,)");
///
/// let source = "a=(1,2,)\n'안녕'";
/// let locator = Locator::new(source);
/// let offset = locator.convert_row_and_column(1, 2).unwrap();
/// assert_eq!(&source[TextRange::new(offset, source.text_len())], "녕'");
///
/// let source = "a=(1,2,)\n'감기'";
/// let locator = Locator::new(source);
/// let offset = locator.convert_row_and_column(1, 4).unwrap();
/// assert_eq!(&source[TextRange::new(offset, source.text_len())], "기'");
/// ```
pub fn convert_row_and_column(&self, row: usize, column: usize) -> Option<TextSize> {
let line_start = *self.to_index().line_starts().get(row)?;
let next_line_start = self
.to_index()
.line_starts()
.get(row + 1)
.copied()
.unwrap_or(self.contents.text_len());
let line_contents = &self.contents[TextRange::from(line_start..next_line_start)];
debug_assert!(
line_contents
.chars()
// Since the range goes to the next line start, `line_contents` contains the line
// break
.take_while(|c| *c != '\n' && *c != '\r')
.count()
>= column,
"The column is not in the line"
);
let len_in_line: TextSize = line_contents
.chars()
.take(column)
.map(TextLen::text_len)
.sum();
Some(line_start + len_in_line)
}
/// Take the source code between the given [`TextRange`].
#[inline]
pub fn slice<T: Ranged>(&self, ranged: T) -> &'a str {

View File

@@ -14,7 +14,7 @@ use ruff_linter::settings::{flags, DUMMY_VARIABLE_RGX, PREFIXES};
use ruff_linter::source_kind::SourceKind;
use ruff_python_ast::{Mod, PySourceType};
use ruff_python_codegen::Stylist;
use ruff_python_formatter::{format_node, pretty_comments, PyFormatContext};
use ruff_python_formatter::{format_module_ast, pretty_comments, PyFormatContext};
use ruff_python_index::{CommentRangesBuilder, Indexer};
use ruff_python_parser::lexer::LexResult;
use ruff_python_parser::{parse_tokens, AsMode, Mode};
@@ -305,7 +305,7 @@ impl<'a> ParsedModule<'a> {
.formatter
.to_format_options(PySourceType::default());
format_node(
format_module_ast(
&self.module,
&self.comment_ranges,
self.source_code,