Compare commits

...

1 Commits

Author SHA1 Message Date
Dhruv Manilawala
6b0d7cff4d Expose TokenKind to the linter 2024-05-02 11:00:20 +05:30
16 changed files with 137 additions and 121 deletions

View File

@@ -10,7 +10,7 @@ use ruff_linter::settings::{flags, LinterSettings};
use ruff_linter::source_kind::SourceKind;
use ruff_linter::{registry::Rule, RuleSelector};
use ruff_python_ast::PySourceType;
use ruff_python_parser::{lexer, parse_program_tokens, Mode};
use ruff_python_parser::{parse_program_tokens, Mode};
#[cfg(target_os = "windows")]
#[global_allocator]
@@ -55,7 +55,7 @@ fn benchmark_linter(mut group: BenchmarkGroup, settings: &LinterSettings) {
&case,
|b, case| {
// Tokenize the source.
let tokens: Vec<_> = lexer::lex(case.code(), Mode::Module).collect();
let (tokens, kinds) = ruff_python_parser::tokenize(case.code(), Mode::Module);
// Parse the source.
let ast = parse_program_tokens(tokens.clone(), case.code(), false).unwrap();
@@ -71,6 +71,7 @@ fn benchmark_linter(mut group: BenchmarkGroup, settings: &LinterSettings) {
PySourceType::from(path.as_path()),
ParseSource::Precomputed {
tokens: &tokens,
kinds: &kinds,
ast: &ast,
},
);

View File

@@ -90,7 +90,6 @@ pub(crate) fn check_physical_lines(
mod tests {
use ruff_python_codegen::Stylist;
use ruff_python_index::Indexer;
use ruff_python_parser::lexer::lex;
use ruff_python_parser::Mode;
use ruff_source_file::Locator;
@@ -105,7 +104,7 @@ mod tests {
fn e501_non_ascii_char() {
let line = "'\u{4e9c}' * 2"; // 7 in UTF-32, 9 in UTF-8.
let locator = Locator::new(line);
let tokens: Vec<_> = lex(line, Mode::Module).collect();
let (tokens, _) = ruff_python_parser::tokenize(line, Mode::Module);
let indexer = Indexer::from_tokens(&tokens, &locator);
let stylist = Stylist::from_tokens(&tokens, &locator);

View File

@@ -394,8 +394,7 @@ impl TodoDirectiveKind {
#[cfg(test)]
mod tests {
use ruff_python_parser::lexer::LexResult;
use ruff_python_parser::{lexer, Mode};
use ruff_python_parser::Mode;
use ruff_text_size::{TextLen, TextRange, TextSize};
use ruff_python_index::Indexer;
@@ -407,11 +406,11 @@ mod tests {
use crate::noqa::NoqaMapping;
fn noqa_mappings(contents: &str) -> NoqaMapping {
let lxr: Vec<LexResult> = lexer::lex(contents, Mode::Module).collect();
let (tokens, _) = ruff_python_parser::tokenize(contents, Mode::Module);
let locator = Locator::new(contents);
let indexer = Indexer::from_tokens(&lxr, &locator);
let indexer = Indexer::from_tokens(&tokens, &locator);
extract_noqa_line_for(&lxr, &locator, &indexer)
extract_noqa_line_for(&tokens, &locator, &indexer)
}
#[test]
@@ -586,9 +585,9 @@ assert foo, \
let contents = "x = 1
y = 2
z = x + 1";
let lxr: Vec<LexResult> = lexer::lex(contents, Mode::Module).collect();
let (tokens, _) = ruff_python_parser::tokenize(contents, Mode::Module);
let locator = Locator::new(contents);
let indexer = Indexer::from_tokens(&lxr, &locator);
let indexer = Indexer::from_tokens(&tokens, &locator);
assert_eq!(
extract_isort_directives(&locator, &indexer).exclusions,
Vec::default()
@@ -599,9 +598,9 @@ x = 1
y = 2
# isort: on
z = x + 1";
let lxr: Vec<LexResult> = lexer::lex(contents, Mode::Module).collect();
let (tokens, _) = ruff_python_parser::tokenize(contents, Mode::Module);
let locator = Locator::new(contents);
let indexer = Indexer::from_tokens(&lxr, &locator);
let indexer = Indexer::from_tokens(&tokens, &locator);
assert_eq!(
extract_isort_directives(&locator, &indexer).exclusions,
Vec::from_iter([TextRange::new(TextSize::from(0), TextSize::from(25))])
@@ -614,9 +613,9 @@ y = 2
# isort: on
z = x + 1
# isort: on";
let lxr: Vec<LexResult> = lexer::lex(contents, Mode::Module).collect();
let (tokens, _) = ruff_python_parser::tokenize(contents, Mode::Module);
let locator = Locator::new(contents);
let indexer = Indexer::from_tokens(&lxr, &locator);
let indexer = Indexer::from_tokens(&tokens, &locator);
assert_eq!(
extract_isort_directives(&locator, &indexer).exclusions,
Vec::from_iter([TextRange::new(TextSize::from(0), TextSize::from(38))])
@@ -626,9 +625,9 @@ z = x + 1
x = 1
y = 2
z = x + 1";
let lxr: Vec<LexResult> = lexer::lex(contents, Mode::Module).collect();
let (tokens, _) = ruff_python_parser::tokenize(contents, Mode::Module);
let locator = Locator::new(contents);
let indexer = Indexer::from_tokens(&lxr, &locator);
let indexer = Indexer::from_tokens(&tokens, &locator);
assert_eq!(
extract_isort_directives(&locator, &indexer).exclusions,
Vec::from_iter([TextRange::at(TextSize::from(0), contents.text_len())])
@@ -638,9 +637,9 @@ z = x + 1";
x = 1
y = 2
z = x + 1";
let lxr: Vec<LexResult> = lexer::lex(contents, Mode::Module).collect();
let (tokens, _) = ruff_python_parser::tokenize(contents, Mode::Module);
let locator = Locator::new(contents);
let indexer = Indexer::from_tokens(&lxr, &locator);
let indexer = Indexer::from_tokens(&tokens, &locator);
assert_eq!(
extract_isort_directives(&locator, &indexer).exclusions,
Vec::default()
@@ -652,9 +651,9 @@ x = 1
y = 2
# isort: skip_file
z = x + 1";
let lxr: Vec<LexResult> = lexer::lex(contents, Mode::Module).collect();
let (tokens, _) = ruff_python_parser::tokenize(contents, Mode::Module);
let locator = Locator::new(contents);
let indexer = Indexer::from_tokens(&lxr, &locator);
let indexer = Indexer::from_tokens(&tokens, &locator);
assert_eq!(
extract_isort_directives(&locator, &indexer).exclusions,
Vec::default()
@@ -666,9 +665,9 @@ z = x + 1";
let contents = "x = 1
y = 2
z = x + 1";
let lxr: Vec<LexResult> = lexer::lex(contents, Mode::Module).collect();
let (tokens, _) = ruff_python_parser::tokenize(contents, Mode::Module);
let locator = Locator::new(contents);
let indexer = Indexer::from_tokens(&lxr, &locator);
let indexer = Indexer::from_tokens(&tokens, &locator);
assert_eq!(
extract_isort_directives(&locator, &indexer).splits,
Vec::new()
@@ -678,9 +677,9 @@ z = x + 1";
y = 2
# isort: split
z = x + 1";
let lxr: Vec<LexResult> = lexer::lex(contents, Mode::Module).collect();
let (tokens, _) = ruff_python_parser::tokenize(contents, Mode::Module);
let locator = Locator::new(contents);
let indexer = Indexer::from_tokens(&lxr, &locator);
let indexer = Indexer::from_tokens(&tokens, &locator);
assert_eq!(
extract_isort_directives(&locator, &indexer).splits,
vec![TextSize::from(12)]
@@ -689,9 +688,9 @@ z = x + 1";
let contents = "x = 1
y = 2 # isort: split
z = x + 1";
let lxr: Vec<LexResult> = lexer::lex(contents, Mode::Module).collect();
let (tokens, _) = ruff_python_parser::tokenize(contents, Mode::Module);
let locator = Locator::new(contents);
let indexer = Indexer::from_tokens(&lxr, &locator);
let indexer = Indexer::from_tokens(&tokens, &locator);
assert_eq!(
extract_isort_directives(&locator, &indexer).splits,
vec![TextSize::from(13)]

View File

@@ -321,7 +321,6 @@ mod tests {
use ruff_python_ast::PySourceType;
use ruff_python_codegen::Stylist;
use ruff_python_parser::lexer::LexResult;
use ruff_python_parser::{parse_suite, Mode};
use ruff_source_file::{LineEnding, Locator};
use ruff_text_size::TextSize;
@@ -332,7 +331,7 @@ mod tests {
fn start_of_file() -> Result<()> {
fn insert(contents: &str) -> Result<Insertion> {
let program = parse_suite(contents)?;
let tokens: Vec<LexResult> = ruff_python_parser::tokenize(contents, Mode::Module);
let (tokens, _) = ruff_python_parser::tokenize(contents, Mode::Module);
let locator = Locator::new(contents);
let stylist = Stylist::from_tokens(&tokens, &locator);
Ok(Insertion::start_of_file(&program, &locator, &stylist))
@@ -443,7 +442,7 @@ x = 1
#[test]
fn start_of_block() {
fn insert(contents: &str, offset: TextSize) -> Insertion {
let tokens: Vec<LexResult> = ruff_python_parser::tokenize(contents, Mode::Module);
let (tokens, _) = ruff_python_parser::tokenize(contents, Mode::Module);
let locator = Locator::new(contents);
let stylist = Stylist::from_tokens(&tokens, &locator);
Insertion::start_of_block(offset, &locator, &stylist, PySourceType::default())

View File

@@ -15,9 +15,9 @@ use ruff_python_ast::{PySourceType, Suite};
use ruff_python_codegen::Stylist;
use ruff_python_index::Indexer;
use ruff_python_parser::lexer::LexResult;
use ruff_python_parser::{AsMode, ParseError};
use ruff_python_parser::{AsMode, ParseError, TokenKind};
use ruff_source_file::{Locator, SourceFileBuilder};
use ruff_text_size::Ranged;
use ruff_text_size::{Ranged, TextRange};
use crate::checkers::ast::check_ast;
use crate::checkers::filesystem::check_file_path;
@@ -356,7 +356,7 @@ pub fn add_noqa_to_path(
let contents = source_kind.source_code();
// Tokenize once.
let tokens: Vec<LexResult> = ruff_python_parser::tokenize(contents, source_type.as_mode());
let (tokens, kinds) = ruff_python_parser::tokenize(contents, source_type.as_mode());
// Map row and column locations to byte slices (lazily).
let locator = Locator::new(contents);
@@ -390,7 +390,7 @@ pub fn add_noqa_to_path(
flags::Noqa::Disabled,
source_kind,
source_type,
TokenSource::Tokens(tokens),
TokenSource::Tokens(tokens, kinds),
);
// Log any parse errors.
@@ -526,7 +526,7 @@ pub fn lint_fix<'a>(
// Continuously fix until the source code stabilizes.
loop {
// Tokenize once.
let tokens: Vec<LexResult> =
let (tokens, kinds) =
ruff_python_parser::tokenize(transformed.source_code(), source_type.as_mode());
// Map row and column locations to byte slices (lazily).
@@ -558,7 +558,7 @@ pub fn lint_fix<'a>(
noqa,
&transformed,
source_type,
TokenSource::Tokens(tokens),
TokenSource::Tokens(tokens, kinds),
);
if iterations == 0 {
@@ -702,6 +702,7 @@ pub enum ParseSource<'a> {
/// Use the precomputed tokens and AST.
Precomputed {
tokens: &'a [LexResult],
kinds: &'a [(TokenKind, TextRange)],
ast: &'a Suite,
},
}
@@ -714,11 +715,14 @@ impl<'a> ParseSource<'a> {
source_type: PySourceType,
) -> TokenSource<'a> {
match self {
Self::None => TokenSource::Tokens(ruff_python_parser::tokenize(
source_kind.source_code(),
source_type.as_mode(),
)),
Self::Precomputed { tokens, ast } => TokenSource::Precomputed { tokens, ast },
Self::None => {
let (tokens, kinds) =
ruff_python_parser::tokenize(source_kind.source_code(), source_type.as_mode());
TokenSource::Tokens(tokens, kinds)
}
Self::Precomputed { tokens, kinds, ast } => {
TokenSource::Precomputed { tokens, kinds, ast }
}
}
}
}
@@ -726,10 +730,11 @@ impl<'a> ParseSource<'a> {
#[derive(Debug, Clone)]
pub enum TokenSource<'a> {
/// Use the precomputed tokens to generate the AST.
Tokens(Vec<LexResult>),
Tokens(Vec<LexResult>, Vec<(TokenKind, TextRange)>),
/// Use the precomputed tokens and AST.
Precomputed {
tokens: &'a [LexResult],
kinds: &'a [(TokenKind, TextRange)],
ast: &'a Suite,
},
}
@@ -739,13 +744,21 @@ impl Deref for TokenSource<'_> {
fn deref(&self) -> &Self::Target {
match self {
Self::Tokens(tokens) => tokens,
Self::Tokens(tokens, _) => tokens,
Self::Precomputed { tokens, .. } => tokens,
}
}
}
impl<'a> TokenSource<'a> {
#[allow(dead_code)]
pub(crate) fn kinds(&self) -> &[(TokenKind, TextRange)] {
match self {
Self::Tokens(_, kinds) => kinds,
Self::Precomputed { kinds, .. } => kinds,
}
}
/// Convert to an [`AstSource`], parsing if necessary.
fn into_ast_source(
self,
@@ -753,11 +766,13 @@ impl<'a> TokenSource<'a> {
source_type: PySourceType,
) -> Result<AstSource<'a>, ParseError> {
match self {
Self::Tokens(tokens) => Ok(AstSource::Ast(ruff_python_parser::parse_program_tokens(
tokens,
source_kind.source_code(),
source_type.is_ipynb(),
)?)),
Self::Tokens(tokens, _) => {
Ok(AstSource::Ast(ruff_python_parser::parse_program_tokens(
tokens,
source_kind.source_code(),
source_type.is_ipynb(),
)?))
}
Self::Precomputed { ast, .. } => Ok(AstSource::Precomputed(ast)),
}
}

View File

@@ -506,8 +506,8 @@ struct Line {
#[cfg(test)]
mod tests {
use ruff_python_parser::lexer::LexResult;
use ruff_python_parser::{lexer, Mode};
use ruff_python_parser::Mode;
use ruff_source_file::Locator;
@@ -592,9 +592,9 @@ if False:
}
fn assert_logical_lines(contents: &str, expected: &[&str]) {
let lxr: Vec<LexResult> = lexer::lex(contents, Mode::Module).collect();
let (tokens, _) = ruff_python_parser::tokenize(contents, Mode::Module);
let locator = Locator::new(contents);
let actual: Vec<String> = LogicalLines::from_tokens(&lxr, &locator)
let actual: Vec<String> = LogicalLines::from_tokens(&tokens, &locator)
.into_iter()
.map(|line| line.text_trimmed())
.map(ToString::to_string)

View File

@@ -11,7 +11,6 @@ mod tests {
use anyhow::Result;
use regex::Regex;
use ruff_python_parser::lexer::LexResult;
use test_case::test_case;
@@ -600,7 +599,7 @@ mod tests {
let source_type = PySourceType::default();
let source_kind = SourceKind::Python(contents.to_string());
let settings = LinterSettings::for_rules(Linter::Pyflakes.rules());
let tokens: Vec<LexResult> = ruff_python_parser::tokenize(&contents, source_type.as_mode());
let (tokens, kinds) = ruff_python_parser::tokenize(&contents, source_type.as_mode());
let locator = Locator::new(&contents);
let stylist = Stylist::from_tokens(&tokens, &locator);
let indexer = Indexer::from_tokens(&tokens, &locator);
@@ -624,7 +623,7 @@ mod tests {
flags::Noqa::Enabled,
&source_kind,
source_type,
TokenSource::Tokens(tokens),
TokenSource::Tokens(tokens, kinds),
);
diagnostics.sort_by_key(Ranged::start);
let actual = diagnostics

View File

@@ -13,7 +13,6 @@ use ruff_diagnostics::{Applicability, Diagnostic, FixAvailability};
use ruff_python_ast::PySourceType;
use ruff_python_codegen::Stylist;
use ruff_python_index::Indexer;
use ruff_python_parser::lexer::LexResult;
use ruff_python_parser::AsMode;
use ruff_python_trivia::textwrap::dedent;
use ruff_source_file::{Locator, SourceFileBuilder};
@@ -111,7 +110,7 @@ pub(crate) fn test_contents<'a>(
settings: &LinterSettings,
) -> (Vec<Message>, Cow<'a, SourceKind>) {
let source_type = PySourceType::from(path);
let tokens: Vec<LexResult> =
let (tokens, kinds) =
ruff_python_parser::tokenize(source_kind.source_code(), source_type.as_mode());
let locator = Locator::new(source_kind.source_code());
let stylist = Stylist::from_tokens(&tokens, &locator);
@@ -137,7 +136,7 @@ pub(crate) fn test_contents<'a>(
flags::Noqa::Enabled,
source_kind,
source_type,
TokenSource::Tokens(tokens),
TokenSource::Tokens(tokens, kinds),
);
let source_has_errors = error.is_some();
@@ -177,7 +176,7 @@ pub(crate) fn test_contents<'a>(
transformed = Cow::Owned(transformed.updated(fixed_contents, &source_map));
let tokens: Vec<LexResult> =
let (tokens, kinds) =
ruff_python_parser::tokenize(transformed.source_code(), source_type.as_mode());
let locator = Locator::new(transformed.source_code());
let stylist = Stylist::from_tokens(&tokens, &locator);
@@ -203,7 +202,7 @@ pub(crate) fn test_contents<'a>(
flags::Noqa::Enabled,
&transformed,
source_type,
TokenSource::Tokens(tokens),
TokenSource::Tokens(tokens, kinds),
);
if let Some(fixed_error) = fixed_error {

View File

@@ -2,7 +2,7 @@ mod generator;
mod stylist;
pub use generator::Generator;
use ruff_python_parser::{lexer, parse_suite, Mode, ParseError};
use ruff_python_parser::{parse_suite, Mode, ParseError};
use ruff_source_file::Locator;
pub use stylist::Stylist;
@@ -10,7 +10,7 @@ pub use stylist::Stylist;
pub fn round_trip(code: &str) -> Result<String, ParseError> {
let locator = Locator::new(code);
let python_ast = parse_suite(code)?;
let tokens: Vec<_> = lexer::lex(code, Mode::Module).collect();
let (tokens, _) = ruff_python_parser::tokenize(code, Mode::Module);
let stylist = Stylist::from_tokens(&tokens, &locator);
let mut generator: Generator = (&stylist).into();
generator.unparse_suite(&python_ast);

View File

@@ -126,7 +126,6 @@ impl Deref for Indentation {
#[cfg(test)]
mod tests {
use ruff_python_parser::lexer::lex;
use ruff_python_parser::Mode;
use ruff_source_file::{find_newline, LineEnding};
@@ -138,7 +137,7 @@ mod tests {
fn indentation() {
let contents = r"x = 1";
let locator = Locator::new(contents);
let tokens: Vec<_> = lex(contents, Mode::Module).collect();
let (tokens, _) = ruff_python_parser::tokenize(contents, Mode::Module);
assert_eq!(
Stylist::from_tokens(&tokens, &locator).indentation(),
&Indentation::default()
@@ -149,7 +148,7 @@ if True:
pass
";
let locator = Locator::new(contents);
let tokens: Vec<_> = lex(contents, Mode::Module).collect();
let (tokens, _) = ruff_python_parser::tokenize(contents, Mode::Module);
assert_eq!(
Stylist::from_tokens(&tokens, &locator).indentation(),
&Indentation(" ".to_string())
@@ -160,7 +159,7 @@ if True:
pass
";
let locator = Locator::new(contents);
let tokens: Vec<_> = lex(contents, Mode::Module).collect();
let (tokens, _) = ruff_python_parser::tokenize(contents, Mode::Module);
assert_eq!(
Stylist::from_tokens(&tokens, &locator).indentation(),
&Indentation(" ".to_string())
@@ -171,7 +170,7 @@ if True:
pass
";
let locator = Locator::new(contents);
let tokens: Vec<_> = lex(contents, Mode::Module).collect();
let (tokens, _) = ruff_python_parser::tokenize(contents, Mode::Module);
assert_eq!(
Stylist::from_tokens(&tokens, &locator).indentation(),
&Indentation("\t".to_string())
@@ -186,7 +185,7 @@ x = (
)
";
let locator = Locator::new(contents);
let tokens: Vec<_> = lex(contents, Mode::Module).collect();
let (tokens, _) = ruff_python_parser::tokenize(contents, Mode::Module);
assert_eq!(
Stylist::from_tokens(&tokens, &locator).indentation(),
&Indentation::default()
@@ -199,7 +198,7 @@ class FormFeedIndent:
print(a)
";
let locator = Locator::new(contents);
let tokens: Vec<_> = lex(contents, Mode::Module).collect();
let (tokens, _) = ruff_python_parser::tokenize(contents, Mode::Module);
assert_eq!(
Stylist::from_tokens(&tokens, &locator).indentation(),
&Indentation(" ".to_string())
@@ -210,7 +209,7 @@ class FormFeedIndent:
fn quote() {
let contents = r"x = 1";
let locator = Locator::new(contents);
let tokens: Vec<_> = lex(contents, Mode::Module).collect();
let (tokens, _) = ruff_python_parser::tokenize(contents, Mode::Module);
assert_eq!(
Stylist::from_tokens(&tokens, &locator).quote(),
Quote::default()
@@ -218,7 +217,7 @@ class FormFeedIndent:
let contents = r"x = '1'";
let locator = Locator::new(contents);
let tokens: Vec<_> = lex(contents, Mode::Module).collect();
let (tokens, _) = ruff_python_parser::tokenize(contents, Mode::Module);
assert_eq!(
Stylist::from_tokens(&tokens, &locator).quote(),
Quote::Single
@@ -226,7 +225,7 @@ class FormFeedIndent:
let contents = r"x = f'1'";
let locator = Locator::new(contents);
let tokens: Vec<_> = lex(contents, Mode::Module).collect();
let (tokens, _) = ruff_python_parser::tokenize(contents, Mode::Module);
assert_eq!(
Stylist::from_tokens(&tokens, &locator).quote(),
Quote::Single
@@ -234,7 +233,7 @@ class FormFeedIndent:
let contents = r#"x = "1""#;
let locator = Locator::new(contents);
let tokens: Vec<_> = lex(contents, Mode::Module).collect();
let (tokens, _) = ruff_python_parser::tokenize(contents, Mode::Module);
assert_eq!(
Stylist::from_tokens(&tokens, &locator).quote(),
Quote::Double
@@ -242,7 +241,7 @@ class FormFeedIndent:
let contents = r#"x = f"1""#;
let locator = Locator::new(contents);
let tokens: Vec<_> = lex(contents, Mode::Module).collect();
let (tokens, _) = ruff_python_parser::tokenize(contents, Mode::Module);
assert_eq!(
Stylist::from_tokens(&tokens, &locator).quote(),
Quote::Double
@@ -250,7 +249,7 @@ class FormFeedIndent:
let contents = r#"s = "It's done.""#;
let locator = Locator::new(contents);
let tokens: Vec<_> = lex(contents, Mode::Module).collect();
let (tokens, _) = ruff_python_parser::tokenize(contents, Mode::Module);
assert_eq!(
Stylist::from_tokens(&tokens, &locator).quote(),
Quote::Double
@@ -263,7 +262,7 @@ def f():
pass
"#;
let locator = Locator::new(contents);
let tokens: Vec<_> = lex(contents, Mode::Module).collect();
let (tokens, _) = ruff_python_parser::tokenize(contents, Mode::Module);
assert_eq!(
Stylist::from_tokens(&tokens, &locator).quote(),
Quote::default()
@@ -276,7 +275,7 @@ def f():
a = 'v'
"#;
let locator = Locator::new(contents);
let tokens: Vec<_> = lex(contents, Mode::Module).collect();
let (tokens, _) = ruff_python_parser::tokenize(contents, Mode::Module);
assert_eq!(
Stylist::from_tokens(&tokens, &locator).quote(),
Quote::Single
@@ -288,7 +287,7 @@ a = 'v'
a = "v"
"#;
let locator = Locator::new(contents);
let tokens: Vec<_> = lex(contents, Mode::Module).collect();
let (tokens, _) = ruff_python_parser::tokenize(contents, Mode::Module);
assert_eq!(
Stylist::from_tokens(&tokens, &locator).quote(),
Quote::Double
@@ -301,7 +300,7 @@ a = "v"
a = f'v'
"#;
let locator = Locator::new(contents);
let tokens: Vec<_> = lex(contents, Mode::Module).collect();
let (tokens, _) = ruff_python_parser::tokenize(contents, Mode::Module);
assert_eq!(
Stylist::from_tokens(&tokens, &locator).quote(),
Quote::Single
@@ -313,7 +312,7 @@ a = f'v'
a = f"v"
"#;
let locator = Locator::new(contents);
let tokens: Vec<_> = lex(contents, Mode::Module).collect();
let (tokens, _) = ruff_python_parser::tokenize(contents, Mode::Module);
assert_eq!(
Stylist::from_tokens(&tokens, &locator).quote(),
Quote::Double
@@ -323,7 +322,7 @@ a = f"v"
f'''Module docstring.'''
";
let locator = Locator::new(contents);
let tokens: Vec<_> = lex(contents, Mode::Module).collect();
let (tokens, _) = ruff_python_parser::tokenize(contents, Mode::Module);
assert_eq!(
Stylist::from_tokens(&tokens, &locator).quote(),
Quote::Single

View File

@@ -244,8 +244,7 @@ impl Indexer {
#[cfg(test)]
mod tests {
use ruff_python_parser::lexer::LexResult;
use ruff_python_parser::{lexer, Mode};
use ruff_python_parser::Mode;
use ruff_source_file::Locator;
use ruff_text_size::{TextRange, TextSize};
@@ -254,8 +253,8 @@ mod tests {
#[test]
fn continuation() {
let contents = r"x = 1";
let lxr: Vec<LexResult> = lexer::lex(contents, Mode::Module).collect();
let indexer = Indexer::from_tokens(&lxr, &Locator::new(contents));
let (tokens, _) = ruff_python_parser::tokenize(contents, Mode::Module);
let indexer = Indexer::from_tokens(&tokens, &Locator::new(contents));
assert_eq!(indexer.continuation_line_starts(), &[]);
let contents = r"
@@ -267,8 +266,8 @@ y = 2
"
.trim();
let lxr: Vec<LexResult> = lexer::lex(contents, Mode::Module).collect();
let indexer = Indexer::from_tokens(&lxr, &Locator::new(contents));
let (tokens, _) = ruff_python_parser::tokenize(contents, Mode::Module);
let indexer = Indexer::from_tokens(&tokens, &Locator::new(contents));
assert_eq!(indexer.continuation_line_starts(), &[]);
let contents = r#"
@@ -287,8 +286,8 @@ if True:
)
"#
.trim();
let lxr: Vec<LexResult> = lexer::lex(contents, Mode::Module).collect();
let indexer = Indexer::from_tokens(lxr.as_slice(), &Locator::new(contents));
let (tokens, _) = ruff_python_parser::tokenize(contents, Mode::Module);
let indexer = Indexer::from_tokens(&tokens, &Locator::new(contents));
assert_eq!(
indexer.continuation_line_starts(),
[
@@ -319,8 +318,8 @@ x = 1; \
import os
"
.trim();
let lxr: Vec<LexResult> = lexer::lex(contents, Mode::Module).collect();
let indexer = Indexer::from_tokens(lxr.as_slice(), &Locator::new(contents));
let (tokens, _) = ruff_python_parser::tokenize(contents, Mode::Module);
let indexer = Indexer::from_tokens(&tokens, &Locator::new(contents));
assert_eq!(
indexer.continuation_line_starts(),
[
@@ -342,8 +341,8 @@ f'foo { 'str1' \
}'
"
.trim();
let lxr: Vec<LexResult> = lexer::lex(contents, Mode::Module).collect();
let indexer = Indexer::from_tokens(lxr.as_slice(), &Locator::new(contents));
let (tokens, _) = ruff_python_parser::tokenize(contents, Mode::Module);
let indexer = Indexer::from_tokens(&tokens, &Locator::new(contents));
assert_eq!(
indexer.continuation_line_starts(),
[
@@ -367,8 +366,8 @@ x = (
+ 2)
"
.trim();
let lxr: Vec<LexResult> = lexer::lex(contents, Mode::Module).collect();
let indexer = Indexer::from_tokens(lxr.as_slice(), &Locator::new(contents));
let (tokens, _) = ruff_python_parser::tokenize(contents, Mode::Module);
let indexer = Indexer::from_tokens(&tokens, &Locator::new(contents));
assert_eq!(
indexer.continuation_line_starts(),
[
@@ -392,8 +391,8 @@ f"start {f"inner {f"another"}"} end"
f"implicit " f"concatenation"
"#
.trim();
let lxr: Vec<LexResult> = lexer::lex(contents, Mode::Module).collect();
let indexer = Indexer::from_tokens(lxr.as_slice(), &Locator::new(contents));
let (tokens, _) = ruff_python_parser::tokenize(contents, Mode::Module);
let indexer = Indexer::from_tokens(&tokens, &Locator::new(contents));
assert_eq!(
indexer
.fstring_ranges()
@@ -428,8 +427,8 @@ f-string"""}
"""
"#
.trim();
let lxr: Vec<LexResult> = lexer::lex(contents, Mode::Module).collect();
let indexer = Indexer::from_tokens(lxr.as_slice(), &Locator::new(contents));
let (tokens, _) = ruff_python_parser::tokenize(contents, Mode::Module);
let indexer = Indexer::from_tokens(&tokens, &Locator::new(contents));
assert_eq!(
indexer
.fstring_ranges()
@@ -466,8 +465,8 @@ f-string"""}
the end"""
"#
.trim();
let lxr: Vec<LexResult> = lexer::lex(contents, Mode::Module).collect();
let indexer = Indexer::from_tokens(lxr.as_slice(), &Locator::new(contents));
let (tokens, _) = ruff_python_parser::tokenize(contents, Mode::Module);
let indexer = Indexer::from_tokens(&tokens, &Locator::new(contents));
// For reference, the ranges of the f-strings in the above code are as
// follows where the ones inside parentheses are nested f-strings:

View File

@@ -117,7 +117,7 @@ pub use crate::parser::Program;
pub use crate::token::{Tok, TokenKind};
use ruff_python_ast::{Expr, Mod, ModModule, PySourceType, Suite};
use ruff_text_size::TextSize;
use ruff_text_size::{TextRange, TextSize};
mod error;
pub mod lexer;
@@ -340,9 +340,14 @@ pub fn parse_tokens(tokens: Vec<LexResult>, source: &str, mode: Mode) -> Result<
}
/// Collect tokens up to and including the first error.
pub fn tokenize(contents: &str, mode: Mode) -> Vec<LexResult> {
let mut tokens: Vec<LexResult> = allocate_tokens_vec(contents);
pub fn tokenize(contents: &str, mode: Mode) -> (Vec<LexResult>, Vec<(TokenKind, TextRange)>) {
let mut tokens = allocate_tokens_vec(contents);
let mut kinds = allocate_token_kinds_vec(contents);
for tok in lexer::lex(contents, mode) {
if let Ok((token, range)) = tok.as_ref() {
kinds.push((TokenKind::from_token(token), *range));
}
let is_err = tok.is_err();
tokens.push(tok);
if is_err {
@@ -350,7 +355,7 @@ pub fn tokenize(contents: &str, mode: Mode) -> Vec<LexResult> {
}
}
tokens
(tokens, kinds)
}
/// Tokenizes all tokens.
@@ -373,6 +378,10 @@ pub fn allocate_tokens_vec(contents: &str) -> Vec<LexResult> {
Vec::with_capacity(approximate_tokens_lower_bound(contents))
}
fn allocate_token_kinds_vec(contents: &str) -> Vec<(TokenKind, TextRange)> {
Vec::with_capacity(approximate_tokens_lower_bound(contents))
}
/// Approximates the number of tokens when lexing `contents`.
fn approximate_tokens_lower_bound(contents: &str) -> usize {
contents.len().saturating_mul(15) / 100

View File

@@ -207,7 +207,6 @@ impl<'a> IntoIterator for &'a CommentRanges {
#[cfg(test)]
mod tests {
use ruff_python_index::Indexer;
use ruff_python_parser::lexer::LexResult;
use ruff_python_parser::{tokenize, Mode};
use ruff_source_file::Locator;
use ruff_text_size::TextSize;
@@ -216,7 +215,7 @@ mod tests {
fn block_comments_two_line_block_at_start() {
// arrange
let source = "# line 1\n# line 2\n";
let tokens = tokenize(source, Mode::Module);
let (tokens, _) = tokenize(source, Mode::Module);
let locator = Locator::new(source);
let indexer = Indexer::from_tokens(&tokens, &locator);
@@ -231,7 +230,7 @@ mod tests {
fn block_comments_indented_block() {
// arrange
let source = " # line 1\n # line 2\n";
let tokens = tokenize(source, Mode::Module);
let (tokens, _) = tokenize(source, Mode::Module);
let locator = Locator::new(source);
let indexer = Indexer::from_tokens(&tokens, &locator);
@@ -246,7 +245,7 @@ mod tests {
fn block_comments_single_line_is_not_a_block() {
// arrange
let source = "\n";
let tokens: Vec<LexResult> = tokenize(source, Mode::Module);
let (tokens, _) = tokenize(source, Mode::Module);
let locator = Locator::new(source);
let indexer = Indexer::from_tokens(&tokens, &locator);
@@ -261,7 +260,7 @@ mod tests {
fn block_comments_lines_with_code_not_a_block() {
// arrange
let source = "x = 1 # line 1\ny = 2 # line 2\n";
let tokens = tokenize(source, Mode::Module);
let (tokens, _) = tokenize(source, Mode::Module);
let locator = Locator::new(source);
let indexer = Indexer::from_tokens(&tokens, &locator);
@@ -276,7 +275,7 @@ mod tests {
fn block_comments_sequential_lines_not_in_block() {
// arrange
let source = " # line 1\n # line 2\n";
let tokens = tokenize(source, Mode::Module);
let (tokens, _) = tokenize(source, Mode::Module);
let locator = Locator::new(source);
let indexer = Indexer::from_tokens(&tokens, &locator);
@@ -296,7 +295,7 @@ mod tests {
# line 2
"""
"#;
let tokens = tokenize(source, Mode::Module);
let (tokens, _) = tokenize(source, Mode::Module);
let locator = Locator::new(source);
let indexer = Indexer::from_tokens(&tokens, &locator);
@@ -333,7 +332,7 @@ y = 2 # do not form a block comment
# therefore do not form a block comment
"""
"#;
let tokens = tokenize(source, Mode::Module);
let (tokens, _) = tokenize(source, Mode::Module);
let locator = Locator::new(source);
let indexer = Indexer::from_tokens(&tokens, &locator);

View File

@@ -13,7 +13,6 @@ use ruff_linter::{
use ruff_python_ast::PySourceType;
use ruff_python_codegen::Stylist;
use ruff_python_index::Indexer;
use ruff_python_parser::lexer::LexResult;
use ruff_python_parser::AsMode;
use ruff_source_file::Locator;
use ruff_text_size::Ranged;
@@ -52,7 +51,7 @@ pub(crate) fn check(
let source_kind = SourceKind::Python(contents.to_string());
// Tokenize once.
let tokens: Vec<LexResult> = ruff_python_parser::tokenize(contents, source_type.as_mode());
let (tokens, kinds) = ruff_python_parser::tokenize(contents, source_type.as_mode());
// Map row and column locations to byte slices (lazily).
let locator = Locator::with_index(contents, index);
@@ -81,7 +80,7 @@ pub(crate) fn check(
flags::Noqa::Enabled,
&source_kind,
source_type,
TokenSource::Tokens(tokens),
TokenSource::Tokens(tokens, kinds),
);
diagnostics

View File

@@ -275,6 +275,7 @@ impl Strategy for StrategyRemoveToken {
_ast: &'a Suite,
) -> Result<Box<dyn ExactSizeStringIter + 'a>> {
let token_ranges: Vec<_> = ruff_python_parser::tokenize(input, Mode::Module)
.0
.into_iter()
// At this point we know we have valid python code
.map(Result::unwrap)
@@ -319,7 +320,7 @@ fn minimization_step(
pattern: &Regex,
last_strategy_and_idx: Option<(&'static dyn Strategy, usize)>,
) -> Result<Option<(&'static dyn Strategy, usize, String)>> {
let tokens = ruff_python_parser::tokenize(input, Mode::Module);
let (tokens, _) = ruff_python_parser::tokenize(input, Mode::Module);
let ast = ruff_python_parser::parse_program_tokens(tokens, input, false)
.context("not valid python")?;

View File

@@ -17,7 +17,6 @@ use ruff_python_ast::{Mod, PySourceType};
use ruff_python_codegen::Stylist;
use ruff_python_formatter::{format_module_ast, pretty_comments, PyFormatContext, QuoteStyle};
use ruff_python_index::{CommentRangesBuilder, Indexer};
use ruff_python_parser::lexer::LexResult;
use ruff_python_parser::{parse_tokens, tokenize_all, AsMode, Mode, Program};
use ruff_python_trivia::CommentRanges;
use ruff_source_file::{Locator, SourceLocation};
@@ -162,7 +161,7 @@ impl Workspace {
let source_kind = SourceKind::Python(contents.to_string());
// Tokenize once.
let tokens: Vec<LexResult> = ruff_python_parser::tokenize(contents, source_type.as_mode());
let (tokens, kinds) = ruff_python_parser::tokenize(contents, source_type.as_mode());
// Map row and column locations to byte slices (lazily).
let locator = Locator::new(contents);
@@ -192,7 +191,7 @@ impl Workspace {
flags::Noqa::Enabled,
&source_kind,
source_type,
TokenSource::Tokens(tokens),
TokenSource::Tokens(tokens, kinds),
);
let source_code = locator.to_source_code();
@@ -256,7 +255,7 @@ impl Workspace {
}
pub fn tokens(&self, contents: &str) -> Result<String, Error> {
let tokens: Vec<_> = ruff_python_parser::lexer::lex(contents, Mode::Module).collect();
let tokens = ruff_python_parser::tokenize_all(contents, Mode::Module);
Ok(format!("{tokens:#?}"))
}