Compare commits

...

1 Commits

Author SHA1 Message Date
Charlie Marsh
c3311f300e Avoid extra String allocation during parsing 2024-01-07 10:35:17 -05:00
16 changed files with 56 additions and 57 deletions

View File

@@ -168,7 +168,7 @@ pub(crate) fn avoidable_escaped_quote(
match tok {
Tok::String {
value: string_contents,
value,
kind,
triple_quoted,
} => {
@@ -176,6 +176,8 @@ pub(crate) fn avoidable_escaped_quote(
continue;
}
let string_contents = locator.slice(&value);
// Check if we're using the preferred quotation style.
if !leading_quote(locator.slice(tok_range)).is_some_and(|text| {
contains_quote(text, quotes_settings.inline_quotes.as_char())
@@ -312,7 +314,7 @@ pub(crate) fn unnecessary_escaped_quote(
match tok {
Tok::String {
value: string_contents,
value,
kind,
triple_quoted,
} => {
@@ -320,6 +322,8 @@ pub(crate) fn unnecessary_escaped_quote(
continue;
}
let string_contents = locator.slice(&value);
let leading = match leading_quote(locator.slice(tok_range)) {
Some("\"") => Quote::Double,
Some("'") => Quote::Single,

View File

@@ -759,7 +759,7 @@ impl<'source> Lexer<'source> {
};
let tok = Tok::String {
value: self.source[TextRange::new(value_start, value_end)].to_string(),
value: TextRange::new(value_start, value_end),
kind,
triple_quoted,
};

View File

@@ -1605,8 +1605,8 @@ StringLiteralOrFString: StringType = {
StringLiteral: StringType = {
<location:@L> <string:string> <end_location:@R> =>? {
let (source, kind, triple_quoted) = string;
Ok(parse_string_literal(&source, kind, triple_quoted, (location..end_location).into())?)
let (value, kind, triple_quoted) = string;
Ok(parse_string_literal(&source_code[value], kind, triple_quoted, (location..end_location).into())?)
}
};
@@ -2061,7 +2061,7 @@ extern {
float => token::Tok::Float { value: <f64> },
complex => token::Tok::Complex { real: <f64>, imag: <f64> },
string => token::Tok::String {
value: <String>,
value: <TextRange>,
kind: <StringKind>,
triple_quoted: <bool>
},

View File

@@ -1,5 +1,5 @@
// auto-generated: "lalrpop 0.20.0"
// sha3: 031689e389556292d9dbd8a1b1ff8ca29bac76d83f1b345630481d620b89e1c2
// sha3: 28f158c07e00e286b0a28fb9af14b474f60e5d67d1dd47e5dddc93a4b622c46b
use ruff_text_size::{Ranged, TextLen, TextRange, TextSize};
use ruff_python_ast::{self as ast, Int, IpyEscapeKind};
use crate::{
@@ -54,7 +54,7 @@ mod __parse__Top {
Variant4(Int),
Variant5((IpyEscapeKind, String)),
Variant6(String),
Variant7((String, StringKind, bool)),
Variant7((TextRange, StringKind, bool)),
Variant8(core::option::Option<token::Tok>),
Variant9(Option<Box<ast::Parameter>>),
Variant10(core::option::Option<Option<Box<ast::Parameter>>>),
@@ -18373,16 +18373,6 @@ mod __parse__Top {
_ => __symbol_type_mismatch()
}
}
fn __pop_Variant7<
>(
__symbols: &mut alloc::vec::Vec<(TextSize,__Symbol<>,TextSize)>
) -> (TextSize, (String, StringKind, bool), TextSize)
{
match __symbols.pop() {
Some((__l, __Symbol::Variant7(__v), __r)) => (__l, __v, __r),
_ => __symbol_type_mismatch()
}
}
fn __pop_Variant3<
>(
__symbols: &mut alloc::vec::Vec<(TextSize,__Symbol<>,TextSize)>
@@ -18393,6 +18383,16 @@ mod __parse__Top {
_ => __symbol_type_mismatch()
}
}
fn __pop_Variant7<
>(
__symbols: &mut alloc::vec::Vec<(TextSize,__Symbol<>,TextSize)>
) -> (TextSize, (TextRange, StringKind, bool), TextSize)
{
match __symbols.pop() {
Some((__l, __Symbol::Variant7(__v), __r)) => (__l, __v, __r),
_ => __symbol_type_mismatch()
}
}
fn __pop_Variant67<
>(
__symbols: &mut alloc::vec::Vec<(TextSize,__Symbol<>,TextSize)>
@@ -36363,13 +36363,13 @@ fn __action217<
source_code: &str,
mode: Mode,
(_, location, _): (TextSize, TextSize, TextSize),
(_, string, _): (TextSize, (String, StringKind, bool), TextSize),
(_, string, _): (TextSize, (TextRange, StringKind, bool), TextSize),
(_, end_location, _): (TextSize, TextSize, TextSize),
) -> Result<StringType,__lalrpop_util::ParseError<TextSize,token::Tok,LexicalError>>
{
{
let (source, kind, triple_quoted) = string;
Ok(parse_string_literal(&source, kind, triple_quoted, (location..end_location).into())?)
let (value, kind, triple_quoted) = string;
Ok(parse_string_literal(&source_code[value], kind, triple_quoted, (location..end_location).into())?)
}
}
@@ -52719,7 +52719,7 @@ fn __action937<
>(
source_code: &str,
mode: Mode,
__0: (TextSize, (String, StringKind, bool), TextSize),
__0: (TextSize, (TextRange, StringKind, bool), TextSize),
__1: (TextSize, TextSize, TextSize),
) -> Result<StringType,__lalrpop_util::ParseError<TextSize,token::Tok,LexicalError>>
{
@@ -69997,7 +69997,7 @@ fn __action1494<
>(
source_code: &str,
mode: Mode,
__0: (TextSize, (String, StringKind, bool), TextSize),
__0: (TextSize, (TextRange, StringKind, bool), TextSize),
) -> Result<StringType,__lalrpop_util::ParseError<TextSize,token::Tok,LexicalError>>
{
let __start0 = __0.2;

View File

@@ -13,7 +13,7 @@ expression: lex_source(source)
),
(
String {
value: "",
value: 5..5,
kind: String,
triple_quoted: false,
},
@@ -37,7 +37,7 @@ expression: lex_source(source)
),
(
String {
value: "",
value: 16..16,
kind: String,
triple_quoted: false,
},

View File

@@ -5,7 +5,7 @@ expression: lex_source(source)
[
(
String {
value: "\\N{EN SPACE}",
value: 1..13,
kind: String,
triple_quoted: false,
},

View File

@@ -137,7 +137,7 @@ expression: lex_source(source)
),
(
String {
value: "",
value: 32..32,
kind: String,
triple_quoted: false,
},

View File

@@ -13,7 +13,7 @@ expression: lex_source(source)
),
(
String {
value: "a",
value: 7..8,
kind: String,
triple_quoted: false,
},
@@ -25,7 +25,7 @@ expression: lex_source(source)
),
(
String {
value: "b",
value: 15..16,
kind: String,
triple_quoted: false,
},
@@ -41,7 +41,7 @@ expression: lex_source(source)
),
(
String {
value: "c",
value: 24..25,
kind: String,
triple_quoted: false,
},
@@ -49,7 +49,7 @@ expression: lex_source(source)
),
(
String {
value: "d",
value: 34..35,
kind: String,
triple_quoted: false,
},

View File

@@ -5,7 +5,7 @@ expression: lex_source(source)
[
(
String {
value: "double",
value: 1..7,
kind: String,
triple_quoted: false,
},
@@ -13,7 +13,7 @@ expression: lex_source(source)
),
(
String {
value: "single",
value: 10..16,
kind: String,
triple_quoted: false,
},
@@ -21,7 +21,7 @@ expression: lex_source(source)
),
(
String {
value: "can\\'t",
value: 19..25,
kind: String,
triple_quoted: false,
},
@@ -29,7 +29,7 @@ expression: lex_source(source)
),
(
String {
value: "\\\\\\\"",
value: 28..32,
kind: String,
triple_quoted: false,
},
@@ -37,7 +37,7 @@ expression: lex_source(source)
),
(
String {
value: "\\t\\r\\n",
value: 35..41,
kind: String,
triple_quoted: false,
},
@@ -45,7 +45,7 @@ expression: lex_source(source)
),
(
String {
value: "\\g",
value: 44..46,
kind: String,
triple_quoted: false,
},
@@ -53,7 +53,7 @@ expression: lex_source(source)
),
(
String {
value: "raw\\'",
value: 50..55,
kind: RawString,
triple_quoted: false,
},
@@ -61,7 +61,7 @@ expression: lex_source(source)
),
(
String {
value: "\\420",
value: 58..62,
kind: String,
triple_quoted: false,
},
@@ -69,7 +69,7 @@ expression: lex_source(source)
),
(
String {
value: "\\200\\0a",
value: 65..72,
kind: String,
triple_quoted: false,
},

View File

@@ -5,7 +5,7 @@ expression: string_continuation_with_eol(MAC_EOL)
[
(
String {
value: "abc\\\rdef",
value: 1..9,
kind: String,
triple_quoted: false,
},

View File

@@ -5,7 +5,7 @@ expression: string_continuation_with_eol(UNIX_EOL)
[
(
String {
value: "abc\\\ndef",
value: 1..9,
kind: String,
triple_quoted: false,
},

View File

@@ -5,7 +5,7 @@ expression: string_continuation_with_eol(WINDOWS_EOL)
[
(
String {
value: "abc\\\r\ndef",
value: 1..10,
kind: String,
triple_quoted: false,
},

View File

@@ -5,7 +5,7 @@ expression: triple_quoted_eol(MAC_EOL)
[
(
String {
value: "\r test string\r ",
value: 3..18,
kind: String,
triple_quoted: true,
},

View File

@@ -5,7 +5,7 @@ expression: triple_quoted_eol(UNIX_EOL)
[
(
String {
value: "\n test string\n ",
value: 3..18,
kind: String,
triple_quoted: true,
},

View File

@@ -5,7 +5,7 @@ expression: triple_quoted_eol(WINDOWS_EOL)
[
(
String {
value: "\r\n test string\r\n ",
value: 3..20,
kind: String,
triple_quoted: true,
},

View File

@@ -7,7 +7,7 @@
use crate::Mode;
use ruff_python_ast::{Int, IpyEscapeKind};
use ruff_text_size::TextSize;
use ruff_text_size::{TextRange, TextSize};
use std::fmt;
/// The set of tokens the Python source code can be tokenized in.
@@ -37,8 +37,8 @@ pub enum Tok {
},
/// Token value for a string.
String {
/// The string value.
value: String,
/// The range of the string value.
value: TextRange,
/// The kind of string.
kind: StringKind,
/// Whether the string is triple quoted.
@@ -51,6 +51,8 @@ pub enum Tok {
/// part of the expression part and isn't an opening or closing brace.
FStringMiddle {
/// The string value.
// TODO(charlie): This could _maybe_ be a range, but we'd have to move logic into the parser
// to handle some escaping.
value: String,
/// Whether the string is raw or not.
is_raw: bool,
@@ -241,14 +243,7 @@ impl fmt::Display for Tok {
Int { value } => write!(f, "'{value}'"),
Float { value } => write!(f, "'{value}'"),
Complex { real, imag } => write!(f, "{real}j{imag}"),
String {
value,
kind,
triple_quoted,
} => {
let quotes = "\"".repeat(if *triple_quoted { 3 } else { 1 });
write!(f, "{kind}{quotes}{value}{quotes}")
}
String { .. } => write!(f, "String"),
FStringStart => f.write_str("FStringStart"),
FStringMiddle { value, .. } => f.write_str(value),
FStringEnd => f.write_str("FStringEnd"),