Add benches

Use shared finder
Box other strings
2024-02-09 15:39:15 -05:00 · 2024-02-09 15:39:15 -05:00 · 2024-02-09 15:39:15 -05:00 · 2024-02-09 15:39:15 -05:00
13 changed files with 1165 additions and 124 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -217,12 +217,12 @@ checksum = "327762f6e5a765692301e5bb513e0d9fef63be86bbc14528052b1cd3e6f03e07"

 [[package]]
 name = "bstr"
-version = "1.6.2"
+version = "1.9.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4c2f7349907b712260e64b0afe2f84692af14a454be26187d9df565c7f69266a"
+checksum = "c48f0051a4b4c5e0b6d365cd04af53aeaa209e3cc15ec2cdb69e73cc87fbd0dc"
 dependencies = [
 "memchr",
- "regex-automata 0.3.9",
+ "regex-automata 0.4.3",
 "serde",
 ]

@@ -1921,12 +1921,6 @@ dependencies = [
 "regex-syntax 0.6.29",
 ]

-[[package]]
-name = "regex-automata"
-version = "0.3.9"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "59b23e92ee4318893fa3fe3e6fb365258efbfe6ac6ab30f090cdcbb7aa37efa9"
-
 [[package]]
 name = "regex-automata"
 version = "0.4.3"
@@ -2342,16 +2336,22 @@ version = "0.0.0"
 dependencies = [
 "anyhow",
 "bitflags 2.4.1",
+ "bstr",
+ "codspeed-criterion-compat",
+ "criterion",
 "insta",
 "is-macro",
 "itertools 0.12.1",
 "lalrpop",
 "lalrpop-util",
 "memchr",
+ "mimalloc",
+ "once_cell",
 "ruff_python_ast",
 "ruff_text_size",
 "rustc-hash",
 "static_assertions",
+ "tikv-jemallocator",
 "tiny-keccak",
 "unicode-ident",
 "unicode_names2",
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -19,6 +19,7 @@ argfile = { version = "0.1.6" }
 assert_cmd = { version = "2.0.13" }
 bincode = { version = "1.3.3" }
 bitflags = { version = "2.4.1" }
+bstr = { version = "1.9.0" }
 cachedir = { version = "0.3.1" }
 chrono = { version = "0.4.33", default-features = false, features = ["clock"] }
 clap = { version = "4.4.18", features = ["derive"] }
--- a/crates/ruff_linter/src/rules/flake8_bandit/rules/hardcoded_bind_all_interfaces.rs
+++ b/crates/ruff_linter/src/rules/flake8_bandit/rules/hardcoded_bind_all_interfaces.rs
@@ -40,7 +40,9 @@ impl Violation for HardcodedBindAllInterfaces {
 pub(crate) fn hardcoded_bind_all_interfaces(checker: &mut Checker, string: StringLike) {
    let is_bind_all_interface = match string {
        StringLike::StringLiteral(ast::ExprStringLiteral { value, .. }) => value == "0.0.0.0",
-        StringLike::FStringLiteral(ast::FStringLiteralElement { value, .. }) => value == "0.0.0.0",
+        StringLike::FStringLiteral(ast::FStringLiteralElement { value, .. }) => {
+            &**value == "0.0.0.0"
+        }
        StringLike::BytesLiteral(_) => return,
    };

--- a/crates/ruff_linter/src/rules/flynt/helpers.rs
+++ b/crates/ruff_linter/src/rules/flynt/helpers.rs
@@ -15,7 +15,7 @@ fn to_f_string_expression_element(inner: &Expr) -> ast::FStringElement {
 /// Convert a string to a [`ast::FStringElement::Literal`].
 pub(super) fn to_f_string_literal_element(s: &str) -> ast::FStringElement {
    ast::FStringElement::Literal(ast::FStringLiteralElement {
-        value: s.to_owned(),
+        value: s.to_string().into_boxed_str(),
        range: TextRange::default(),
    })
 }
@@ -53,7 +53,7 @@ pub(super) fn to_f_string_element(expr: &Expr) -> Option<ast::FStringElement> {
    match expr {
        Expr::StringLiteral(ast::ExprStringLiteral { value, range }) => {
            Some(ast::FStringElement::Literal(ast::FStringLiteralElement {
-                value: value.to_string(),
+                value: value.to_string().into_boxed_str(),
                range: *range,
            }))
        }
--- a/crates/ruff_python_ast/src/comparable.rs
+++ b/crates/ruff_python_ast/src/comparable.rs
@@ -644,7 +644,7 @@ pub struct ComparableBytesLiteral<'a> {
 impl<'a> From<&'a ast::BytesLiteral> for ComparableBytesLiteral<'a> {
    fn from(bytes_literal: &'a ast::BytesLiteral) -> Self {
        Self {
-            value: bytes_literal.value.as_slice(),
+            value: &bytes_literal.value,
        }
    }
 }
--- a/crates/ruff_python_ast/src/nodes.rs
+++ b/crates/ruff_python_ast/src/nodes.rs
@@ -949,7 +949,7 @@ impl Ranged for FStringExpressionElement {
 #[derive(Clone, Debug, PartialEq)]
 pub struct FStringLiteralElement {
    pub range: TextRange,
-    pub value: String,
+    pub value: Box<str>,
 }

 impl Ranged for FStringLiteralElement {
@@ -962,7 +962,7 @@ impl Deref for FStringLiteralElement {
    type Target = str;

    fn deref(&self) -> &Self::Target {
-        self.value.as_str()
+        &self.value
    }
 }

@@ -1607,7 +1607,7 @@ impl Default for BytesLiteralValueInner {
 #[derive(Clone, Debug, Default, PartialEq)]
 pub struct BytesLiteral {
    pub range: TextRange,
-    pub value: Vec<u8>,
+    pub value: Box<[u8]>,
 }

 impl Ranged for BytesLiteral {
@@ -1620,7 +1620,7 @@ impl Deref for BytesLiteral {
    type Target = [u8];

    fn deref(&self) -> &Self::Target {
-        self.value.as_slice()
+        &self.value
    }
 }

--- a/crates/ruff_python_parser/Cargo.toml
+++ b/crates/ruff_python_parser/Cargo.toml
@@ -12,6 +12,11 @@ license = { workspace = true }
 build = "build.rs"

 [lib]
+bench = false
+
+[[bench]]
+name = "string"
+harness = false

 [dependencies]
 ruff_python_ast = { path = "../ruff_python_ast" }
@@ -19,14 +24,24 @@ ruff_text_size = { path = "../ruff_text_size" }

 anyhow = { workspace = true }
 bitflags = { workspace = true }
+bstr = { workspace = true }
 is-macro = { workspace = true }
 itertools = { workspace = true }
 lalrpop-util = { workspace = true, default-features = false }
 memchr = { workspace = true }
-unicode-ident = { workspace = true }
-unicode_names2 = { workspace = true }
 rustc-hash = { workspace = true }
 static_assertions = { workspace = true }
+unicode-ident = { workspace = true }
+unicode_names2 = { workspace = true }
+once_cell = "1.19.0"
+criterion = { workspace = true, default-features = false }
+codspeed-criterion-compat = { workspace = true, default-features = false, optional = true}
+
+[target.'cfg(target_os = "windows")'.dev-dependencies]
+mimalloc = { workspace = true }
+
+[target.'cfg(all(not(target_os = "windows"), not(target_os = "openbsd"), any(target_arch = "x86_64", target_arch = "aarch64", target_arch = "powerpc64")))'.dev-dependencies]
+tikv-jemallocator = { workspace = true }

 [dev-dependencies]
 insta = { workspace = true }
--- a/crates/ruff_python_parser/benches/string.rs
+++ b/crates/ruff_python_parser/benches/string.rs
@@ -0,0 +1,93 @@
+use criterion::{
+    black_box, criterion_group, criterion_main, measurement::WallTime, BatchSize, Criterion,
+};
+use ruff_python_parser::StringKind;
+use ruff_text_size::TextRange;
+
+#[cfg(target_os = "windows")]
+#[global_allocator]
+static GLOBAL: mimalloc::MiMalloc = mimalloc::MiMalloc;
+
+#[cfg(all(
+    not(target_os = "windows"),
+    not(target_os = "openbsd"),
+    any(
+        target_arch = "x86_64",
+        target_arch = "aarch64",
+        target_arch = "powerpc64"
+    )
+))]
+#[global_allocator]
+static GLOBAL: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc;
+
+fn benchmark_parser(criterion: &mut Criterion<WallTime>) {
+    let mut group = criterion.benchmark_group("parse");
+
+    let s = "\"\"\"Validate length based{ on BIN for major brands:
+        https://en.wikipedia.org/wiki/Payment_card_number#Issuer_identification_number_(IIN)\"\"\"";
+
+    // group.bench_with_input("new_string", &s, |b, &s| {
+    //     b.iter_batched(
+    //         || s.to_string().into_boxed_str(),
+    //         |data| {
+    //             ruff_python_parser::string::parse_string_literal(
+    //                 black_box(data),
+    //                 StringKind::String,
+    //                 true,
+    //                 TextRange::default(),
+    //             )
+    //         },
+    //         BatchSize::SmallInput,
+    //     );
+    // });
+    //
+    // group.bench_function("old_string", |b| {
+    //     b.iter_batched(
+    //         || s.to_string(),
+    //         |data| {
+    //             ruff_python_parser::old_string::parse_string_literal(
+    //                 black_box(&data),
+    //                 StringKind::String,
+    //                 true,
+    //                 TextRange::default(),
+    //             )
+    //         },
+    //         BatchSize::SmallInput,
+    //     );
+    // });
+
+    let s = "Item {i+1}";
+
+    group.bench_with_input("new_fstring", &s, |b, &s| {
+        b.iter_batched(
+            || s.to_string().into_boxed_str(),
+            |data| {
+                ruff_python_parser::string::parse_fstring_literal_element(
+                    black_box(data),
+                    true,
+                    TextRange::default(),
+                )
+            },
+            BatchSize::SmallInput,
+        );
+    });
+
+    group.bench_function("old_fstring", |b| {
+        b.iter_batched(
+            || s.to_string(),
+            |data| {
+                ruff_python_parser::old_string::parse_fstring_literal_element(
+                    black_box(&data),
+                    true,
+                    TextRange::default(),
+                )
+            },
+            BatchSize::SmallInput,
+        );
+    });
+
+    group.finish();
+}
+
+criterion_group!(parser, benchmark_parser);
+criterion_main!(parser);
--- a/crates/ruff_python_parser/src/lib.rs
+++ b/crates/ruff_python_parser/src/lib.rs
@@ -119,14 +119,15 @@ pub use token::{StringKind, Tok, TokenKind};

 use crate::lexer::LexResult;

-mod function;
-// Skip flattening lexer to distinguish from full ruff_python_parser
 mod context;
+mod function;
 mod invalid;
+// Skip flattening lexer to distinguish from full ruff_python_parser
 pub mod lexer;
+pub mod old_string;
 mod parser;
 mod soft_keywords;
-mod string;
+pub mod string;
 mod token;
 mod token_source;
 pub mod typing;
--- a/crates/ruff_python_parser/src/old_string.rs
+++ b/crates/ruff_python_parser/src/old_string.rs
@@ -0,0 +1,820 @@
+//! Parsing of string literals, bytes literals, and implicit string concatenation.
+
+use ruff_python_ast::{self as ast, Expr};
+use ruff_text_size::{Ranged, TextLen, TextRange, TextSize};
+
+use crate::lexer::{LexicalError, LexicalErrorType};
+use crate::string::FStringError;
+use crate::token::{StringKind, Tok};
+
+pub enum StringType {
+    Str(ast::StringLiteral),
+    Bytes(ast::BytesLiteral),
+    FString(ast::FString),
+}
+
+impl Ranged for StringType {
+    fn range(&self) -> TextRange {
+        match self {
+            Self::Str(node) => node.range(),
+            Self::Bytes(node) => node.range(),
+            Self::FString(node) => node.range(),
+        }
+    }
+}
+
+impl From<StringType> for Expr {
+    fn from(string: StringType) -> Self {
+        match string {
+            StringType::Str(node) => Expr::from(node),
+            StringType::Bytes(node) => Expr::from(node),
+            StringType::FString(node) => Expr::from(node),
+        }
+    }
+}
+
+struct StringParser<'a> {
+    rest: &'a str,
+    kind: StringKind,
+    location: TextSize,
+    range: TextRange,
+}
+
+impl<'a> StringParser<'a> {
+    fn new(source: &'a str, kind: StringKind, start: TextSize, range: TextRange) -> Self {
+        Self {
+            rest: source,
+            kind,
+            location: start,
+            range,
+        }
+    }
+
+    #[inline]
+    fn skip_bytes(&mut self, bytes: usize) -> &'a str {
+        let skipped_str = &self.rest[..bytes];
+        self.rest = &self.rest[bytes..];
+        self.location += skipped_str.text_len();
+        skipped_str
+    }
+
+    #[inline]
+    fn get_pos(&self) -> TextSize {
+        self.location
+    }
+
+    /// Returns the next byte in the string, if there is one.
+    ///
+    /// # Panics
+    ///
+    /// When the next byte is a part of a multi-byte character.
+    #[inline]
+    fn next_byte(&mut self) -> Option<u8> {
+        self.rest.as_bytes().first().map(|&byte| {
+            self.rest = &self.rest[1..];
+            self.location += TextSize::new(1);
+            byte
+        })
+    }
+
+    #[inline]
+    fn next_char(&mut self) -> Option<char> {
+        self.rest.chars().next().map(|c| {
+            self.rest = &self.rest[c.len_utf8()..];
+            self.location += c.text_len();
+            c
+        })
+    }
+
+    #[inline]
+    fn peek_byte(&self) -> Option<u8> {
+        self.rest.as_bytes().first().copied()
+    }
+
+    fn parse_unicode_literal(&mut self, literal_number: usize) -> Result<char, LexicalError> {
+        let mut p: u32 = 0u32;
+        let unicode_error = LexicalError::new(LexicalErrorType::UnicodeError, self.get_pos());
+        for i in 1..=literal_number {
+            match self.next_char() {
+                Some(c) => match c.to_digit(16) {
+                    Some(d) => p += d << ((literal_number - i) * 4),
+                    None => return Err(unicode_error),
+                },
+                None => return Err(unicode_error),
+            }
+        }
+        match p {
+            0xD800..=0xDFFF => Ok(std::char::REPLACEMENT_CHARACTER),
+            _ => std::char::from_u32(p).ok_or(unicode_error),
+        }
+    }
+
+    fn parse_octet(&mut self, o: u8) -> char {
+        let mut radix_bytes = [o, 0, 0];
+        let mut len = 1;
+
+        while len < 3 {
+            let Some(b'0'..=b'7') = self.peek_byte() else {
+                break;
+            };
+
+            radix_bytes[len] = self.next_byte().unwrap();
+            len += 1;
+        }
+
+        // OK because radix_bytes is always going to be in the ASCII range.
+        let radix_str = std::str::from_utf8(&radix_bytes[..len]).expect("ASCII bytes");
+        let value = u32::from_str_radix(radix_str, 8).unwrap();
+        char::from_u32(value).unwrap()
+    }
+
+    fn parse_unicode_name(&mut self) -> Result<char, LexicalError> {
+        let start_pos = self.get_pos();
+
+        let Some('{') = self.next_char() else {
+            return Err(LexicalError::new(LexicalErrorType::StringError, start_pos));
+        };
+
+        let start_pos = self.get_pos();
+        let Some(close_idx) = self.rest.find('}') else {
+            return Err(LexicalError::new(
+                LexicalErrorType::StringError,
+                self.get_pos(),
+            ));
+        };
+
+        let name_and_ending = self.skip_bytes(close_idx + 1);
+        let name = &name_and_ending[..name_and_ending.len() - 1];
+
+        unicode_names2::character(name)
+            .ok_or_else(|| LexicalError::new(LexicalErrorType::UnicodeError, start_pos))
+    }
+
+    fn parse_escaped_char(&mut self, string: &mut String) -> Result<(), LexicalError> {
+        let Some(first_char) = self.next_char() else {
+            return Err(LexicalError::new(
+                LexicalErrorType::StringError,
+                self.get_pos(),
+            ));
+        };
+
+        let new_char = match first_char {
+            '\\' => '\\',
+            '\'' => '\'',
+            '\"' => '"',
+            'a' => '\x07',
+            'b' => '\x08',
+            'f' => '\x0c',
+            'n' => '\n',
+            'r' => '\r',
+            't' => '\t',
+            'v' => '\x0b',
+            o @ '0'..='7' => self.parse_octet(o as u8),
+            'x' => self.parse_unicode_literal(2)?,
+            'u' if !self.kind.is_any_bytes() => self.parse_unicode_literal(4)?,
+            'U' if !self.kind.is_any_bytes() => self.parse_unicode_literal(8)?,
+            'N' if !self.kind.is_any_bytes() => self.parse_unicode_name()?,
+            // Special cases where the escape sequence is not a single character
+            '\n' => return Ok(()),
+            '\r' => {
+                if self.peek_byte() == Some(b'\n') {
+                    self.next_byte();
+                }
+
+                return Ok(());
+            }
+            _ => {
+                if self.kind.is_any_bytes() && !first_char.is_ascii() {
+                    return Err(LexicalError::new(
+                        LexicalErrorType::OtherError(
+                            "bytes can only contain ASCII literal characters"
+                                .to_string()
+                                .into_boxed_str(),
+                        ),
+                        self.get_pos(),
+                    ));
+                }
+
+                string.push('\\');
+
+                first_char
+            }
+        };
+
+        string.push(new_char);
+
+        Ok(())
+    }
+
+    fn parse_fstring_middle(&mut self) -> Result<ast::FStringElement, LexicalError> {
+        let mut value = String::with_capacity(self.rest.len());
+        while let Some(ch) = self.next_char() {
+            match ch {
+                // We can encounter a `\` as the last character in a `FStringMiddle`
+                // token which is valid in this context. For example,
+                //
+                // ```python
+                // f"\{foo} \{bar:\}"
+                // # ^     ^^     ^
+                // ```
+                //
+                // Here, the `FStringMiddle` token content will be "\" and " \"
+                // which is invalid if we look at the content in isolation:
+                //
+                // ```python
+                // "\"
+                // ```
+                //
+                // However, the content is syntactically valid in the context of
+                // the f-string because it's a substring of the entire f-string.
+                // This is still an invalid escape sequence, but we don't want to
+                // raise a syntax error as is done by the CPython parser. It might
+                // be supported in the future, refer to point 3: https://peps.python.org/pep-0701/#rejected-ideas
+                '\\' if !self.kind.is_raw() && self.peek_byte().is_some() => {
+                    self.parse_escaped_char(&mut value)?;
+                }
+                // If there are any curly braces inside a `FStringMiddle` token,
+                // then they were escaped (i.e. `{{` or `}}`). This means that
+                // we need increase the location by 2 instead of 1.
+                ch @ ('{' | '}') => {
+                    self.location += ch.text_len();
+                    value.push(ch);
+                }
+                ch => value.push(ch),
+            }
+        }
+        Ok(ast::FStringElement::Literal(ast::FStringLiteralElement {
+            value: value.into_boxed_str(),
+            range: self.range,
+        }))
+    }
+
+    fn parse_bytes(&mut self) -> Result<StringType, LexicalError> {
+        let mut content = String::with_capacity(self.rest.len());
+        while let Some(ch) = self.next_char() {
+            match ch {
+                '\\' if !self.kind.is_raw() => {
+                    self.parse_escaped_char(&mut content)?;
+                }
+                ch => {
+                    if !ch.is_ascii() {
+                        return Err(LexicalError::new(
+                            LexicalErrorType::OtherError(
+                                "bytes can only contain ASCII literal characters"
+                                    .to_string()
+                                    .into_boxed_str(),
+                            ),
+                            self.get_pos(),
+                        ));
+                    }
+                    content.push(ch);
+                }
+            }
+        }
+        Ok(StringType::Bytes(ast::BytesLiteral {
+            value: content
+                .chars()
+                .map(|c| c as u8)
+                .collect::<Vec<u8>>()
+                .into_boxed_slice(),
+            range: self.range,
+        }))
+    }
+
+    fn parse_string(&mut self) -> Result<StringType, LexicalError> {
+        let mut value = String::with_capacity(self.rest.len());
+        if self.kind.is_raw() {
+            value.push_str(self.skip_bytes(self.rest.len()));
+        } else {
+            loop {
+                let Some(escape_idx) = self.rest.find('\\') else {
+                    value.push_str(self.skip_bytes(self.rest.len()));
+                    break;
+                };
+
+                let before_with_slash = self.skip_bytes(escape_idx + 1);
+                let before = &before_with_slash[..before_with_slash.len() - 1];
+
+                value.push_str(before);
+                self.parse_escaped_char(&mut value)?;
+            }
+        }
+        Ok(StringType::Str(ast::StringLiteral {
+            value: value.into_boxed_str(),
+            unicode: self.kind.is_unicode(),
+            range: self.range,
+        }))
+    }
+
+    fn parse(&mut self) -> Result<StringType, LexicalError> {
+        if self.kind.is_any_bytes() {
+            self.parse_bytes()
+        } else {
+            self.parse_string()
+        }
+    }
+}
+
+pub fn parse_string_literal(
+    source: &str,
+    kind: StringKind,
+    triple_quoted: bool,
+    range: TextRange,
+) -> Result<StringType, LexicalError> {
+    let start_location = range.start()
+        + kind.prefix_len()
+        + if triple_quoted {
+            TextSize::from(3)
+        } else {
+            TextSize::from(1)
+        };
+    StringParser::new(source, kind, start_location, range).parse()
+}
+
+pub fn parse_fstring_literal_element(
+    source: &str,
+    is_raw: bool,
+    range: TextRange,
+) -> Result<ast::FStringElement, LexicalError> {
+    let kind = if is_raw {
+        StringKind::RawString
+    } else {
+        StringKind::String
+    };
+    StringParser::new(source, kind, range.start(), range).parse_fstring_middle()
+}
+
+pub(crate) fn concatenated_strings(
+    strings: Vec<StringType>,
+    range: TextRange,
+) -> Result<Expr, LexicalError> {
+    #[cfg(debug_assertions)]
+    debug_assert!(strings.len() > 1);
+
+    let mut has_fstring = false;
+    let mut byte_literal_count = 0;
+    for string in &strings {
+        match string {
+            StringType::FString(_) => has_fstring = true,
+            StringType::Bytes(_) => byte_literal_count += 1,
+            StringType::Str(_) => {}
+        }
+    }
+    let has_bytes = byte_literal_count > 0;
+
+    if has_bytes && byte_literal_count < strings.len() {
+        return Err(LexicalError::new(
+            LexicalErrorType::OtherError(
+                "cannot mix bytes and nonbytes literals"
+                    .to_string()
+                    .into_boxed_str(),
+            ),
+            range.start(),
+        ));
+    }
+
+    if has_bytes {
+        let mut values = Vec::with_capacity(strings.len());
+        for string in strings {
+            match string {
+                StringType::Bytes(value) => values.push(value),
+                _ => unreachable!("Unexpected non-bytes literal."),
+            }
+        }
+        return Ok(Expr::from(ast::ExprBytesLiteral {
+            value: ast::BytesLiteralValue::concatenated(values),
+            range,
+        }));
+    }
+
+    if !has_fstring {
+        let mut values = Vec::with_capacity(strings.len());
+        for string in strings {
+            match string {
+                StringType::Str(value) => values.push(value),
+                _ => unreachable!("Unexpected non-string literal."),
+            }
+        }
+        return Ok(Expr::from(ast::ExprStringLiteral {
+            value: ast::StringLiteralValue::concatenated(values),
+            range,
+        }));
+    }
+
+    let mut parts = Vec::with_capacity(strings.len());
+    for string in strings {
+        match string {
+            StringType::FString(fstring) => parts.push(ast::FStringPart::FString(fstring)),
+            StringType::Str(string) => parts.push(ast::FStringPart::Literal(string)),
+            StringType::Bytes(_) => unreachable!("Unexpected bytes literal."),
+        }
+    }
+
+    Ok(ast::ExprFString {
+        value: ast::FStringValue::concatenated(parts),
+        range,
+    }
+    .into())
+}
+
+/// Represents the different types of errors that can occur during parsing of an f-string.
+#[derive(Copy, Debug, Clone, PartialEq)]
+pub enum FStringErrorType {
+    /// Expected a right brace after an opened left brace.
+    UnclosedLbrace,
+    /// An invalid conversion flag was encountered.
+    InvalidConversionFlag,
+    /// A single right brace was encountered.
+    SingleRbrace,
+    /// Unterminated string.
+    UnterminatedString,
+    /// Unterminated triple-quoted string.
+    UnterminatedTripleQuotedString,
+    // TODO(dhruvmanila): The parser can't catch all cases of this error, but
+    // wherever it can, we'll display the correct error message.
+    /// A lambda expression without parentheses was encountered.
+    LambdaWithoutParentheses,
+}
+
+impl std::fmt::Display for FStringErrorType {
+    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
+        use FStringErrorType::{
+            InvalidConversionFlag, LambdaWithoutParentheses, SingleRbrace, UnclosedLbrace,
+            UnterminatedString, UnterminatedTripleQuotedString,
+        };
+        match self {
+            UnclosedLbrace => write!(f, "expecting '}}'"),
+            InvalidConversionFlag => write!(f, "invalid conversion character"),
+            SingleRbrace => write!(f, "single '}}' is not allowed"),
+            UnterminatedString => write!(f, "unterminated string"),
+            UnterminatedTripleQuotedString => write!(f, "unterminated triple-quoted string"),
+            LambdaWithoutParentheses => {
+                write!(f, "lambda expressions are not allowed without parentheses")
+            }
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use crate::lexer::LexicalErrorType;
+    use crate::parser::parse_suite;
+    use crate::{ParseErrorType, Suite};
+
+    use super::*;
+
+    const WINDOWS_EOL: &str = "\r\n";
+    const MAC_EOL: &str = "\r";
+    const UNIX_EOL: &str = "\n";
+
+    fn string_parser_escaped_eol(eol: &str) -> Suite {
+        let source = format!(r"'text \{eol}more text'");
+        parse_suite(&source).unwrap()
+    }
+
+    #[test]
+    fn test_string_parser_escaped_unix_eol() {
+        let parse_ast = string_parser_escaped_eol(UNIX_EOL);
+        insta::assert_debug_snapshot!(parse_ast);
+    }
+
+    #[test]
+    fn test_string_parser_escaped_mac_eol() {
+        let parse_ast = string_parser_escaped_eol(MAC_EOL);
+        insta::assert_debug_snapshot!(parse_ast);
+    }
+
+    #[test]
+    fn test_string_parser_escaped_windows_eol() {
+        let parse_ast = string_parser_escaped_eol(WINDOWS_EOL);
+        insta::assert_debug_snapshot!(parse_ast);
+    }
+
+    #[test]
+    fn test_parse_fstring() {
+        let source = r#"f"{a}{ b }{{foo}}""#;
+        let parse_ast = parse_suite(source).unwrap();
+
+        insta::assert_debug_snapshot!(parse_ast);
+    }
+
+    #[test]
+    fn test_parse_fstring_nested_spec() {
+        let source = r#"f"{foo:{spec}}""#;
+        let parse_ast = parse_suite(source).unwrap();
+
+        insta::assert_debug_snapshot!(parse_ast);
+    }
+
+    #[test]
+    fn test_parse_fstring_not_nested_spec() {
+        let source = r#"f"{foo:spec}""#;
+        let parse_ast = parse_suite(source).unwrap();
+
+        insta::assert_debug_snapshot!(parse_ast);
+    }
+
+    #[test]
+    fn test_parse_empty_fstring() {
+        insta::assert_debug_snapshot!(parse_suite(r#"f"""#,).unwrap());
+    }
+
+    #[test]
+    fn test_fstring_parse_self_documenting_base() {
+        let source = r#"f"{user=}""#;
+        let parse_ast = parse_suite(source).unwrap();
+
+        insta::assert_debug_snapshot!(parse_ast);
+    }
+
+    #[test]
+    fn test_fstring_parse_self_documenting_base_more() {
+        let source = r#"f"mix {user=} with text and {second=}""#;
+        let parse_ast = parse_suite(source).unwrap();
+
+        insta::assert_debug_snapshot!(parse_ast);
+    }
+
+    #[test]
+    fn test_fstring_parse_self_documenting_format() {
+        let source = r#"f"{user=:>10}""#;
+        let parse_ast = parse_suite(source).unwrap();
+
+        insta::assert_debug_snapshot!(parse_ast);
+    }
+
+    fn parse_fstring_error(source: &str) -> FStringErrorType {
+        parse_suite(source)
+            .map_err(|e| match e.error {
+                ParseErrorType::Lexical(LexicalErrorType::FStringError(e)) => e,
+                e => unreachable!("Expected FStringError: {:?}", e),
+            })
+            .expect_err("Expected error")
+    }
+
+    #[test]
+    fn test_parse_invalid_fstring() {
+        use FStringErrorType::{InvalidConversionFlag, LambdaWithoutParentheses};
+
+        assert_eq!(parse_fstring_error(r#"f"{5!x}""#), InvalidConversionFlag);
+        assert_eq!(
+            parse_fstring_error("f'{lambda x:{x}}'"),
+            LambdaWithoutParentheses
+        );
+        assert_eq!(
+            parse_fstring_error("f'{lambda x: {x}}'"),
+            LambdaWithoutParentheses
+        );
+        assert!(parse_suite(r#"f"{class}""#,).is_err());
+    }
+
+    #[test]
+    fn test_parse_fstring_not_equals() {
+        let source = r#"f"{1 != 2}""#;
+        let parse_ast = parse_suite(source).unwrap();
+        insta::assert_debug_snapshot!(parse_ast);
+    }
+
+    #[test]
+    fn test_parse_fstring_equals() {
+        let source = r#"f"{42 == 42}""#;
+        let parse_ast = parse_suite(source).unwrap();
+        insta::assert_debug_snapshot!(parse_ast);
+    }
+
+    #[test]
+    fn test_parse_fstring_self_doc_prec_space() {
+        let source = r#"f"{x   =}""#;
+        let parse_ast = parse_suite(source).unwrap();
+        insta::assert_debug_snapshot!(parse_ast);
+    }
+
+    #[test]
+    fn test_parse_fstring_self_doc_trailing_space() {
+        let source = r#"f"{x=   }""#;
+        let parse_ast = parse_suite(source).unwrap();
+        insta::assert_debug_snapshot!(parse_ast);
+    }
+
+    #[test]
+    fn test_parse_fstring_yield_expr() {
+        let source = r#"f"{yield}""#;
+        let parse_ast = parse_suite(source).unwrap();
+        insta::assert_debug_snapshot!(parse_ast);
+    }
+
+    #[test]
+    fn test_parse_string_concat() {
+        let source = "'Hello ' 'world'";
+        let parse_ast = parse_suite(source).unwrap();
+        insta::assert_debug_snapshot!(parse_ast);
+    }
+
+    #[test]
+    fn test_parse_u_string_concat_1() {
+        let source = "'Hello ' u'world'";
+        let parse_ast = parse_suite(source).unwrap();
+        insta::assert_debug_snapshot!(parse_ast);
+    }
+
+    #[test]
+    fn test_parse_u_string_concat_2() {
+        let source = "u'Hello ' 'world'";
+        let parse_ast = parse_suite(source).unwrap();
+        insta::assert_debug_snapshot!(parse_ast);
+    }
+
+    #[test]
+    fn test_parse_f_string_concat_1() {
+        let source = "'Hello ' f'world'";
+        let parse_ast = parse_suite(source).unwrap();
+        insta::assert_debug_snapshot!(parse_ast);
+    }
+
+    #[test]
+    fn test_parse_f_string_concat_2() {
+        let source = "'Hello ' f'world'";
+        let parse_ast = parse_suite(source).unwrap();
+        insta::assert_debug_snapshot!(parse_ast);
+    }
+
+    #[test]
+    fn test_parse_f_string_concat_3() {
+        let source = "'Hello ' f'world{\"!\"}'";
+        let parse_ast = parse_suite(source).unwrap();
+        insta::assert_debug_snapshot!(parse_ast);
+    }
+
+    #[test]
+    fn test_parse_f_string_concat_4() {
+        let source = "'Hello ' f'world{\"!\"}' 'again!'";
+        let parse_ast = parse_suite(source).unwrap();
+        insta::assert_debug_snapshot!(parse_ast);
+    }
+
+    #[test]
+    fn test_parse_u_f_string_concat_1() {
+        let source = "u'Hello ' f'world'";
+        let parse_ast = parse_suite(source).unwrap();
+        insta::assert_debug_snapshot!(parse_ast);
+    }
+
+    #[test]
+    fn test_parse_u_f_string_concat_2() {
+        let source = "u'Hello ' f'world' '!'";
+        let parse_ast = parse_suite(source).unwrap();
+        insta::assert_debug_snapshot!(parse_ast);
+    }
+
+    #[test]
+    fn test_parse_string_triple_quotes_with_kind() {
+        let source = "u'''Hello, world!'''";
+        let parse_ast = parse_suite(source).unwrap();
+        insta::assert_debug_snapshot!(parse_ast);
+    }
+
+    #[test]
+    fn test_single_quoted_byte() {
+        // single quote
+        let source = r##"b'\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x7f\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff'"##;
+        let parse_ast = parse_suite(source).unwrap();
+        insta::assert_debug_snapshot!(parse_ast);
+    }
+
+    #[test]
+    fn test_double_quoted_byte() {
+        // double quote
+        let source = r##"b"\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x7f\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff""##;
+        let parse_ast = parse_suite(source).unwrap();
+        insta::assert_debug_snapshot!(parse_ast);
+    }
+
+    #[test]
+    fn test_escape_char_in_byte_literal() {
+        // backslash does not escape
+        let source = r#"b"omkmok\Xaa""#; // spell-checker:ignore omkmok
+        let parse_ast = parse_suite(source).unwrap();
+        insta::assert_debug_snapshot!(parse_ast);
+    }
+
+    #[test]
+    fn test_raw_byte_literal_1() {
+        let source = r"rb'\x1z'";
+        let parse_ast = parse_suite(source).unwrap();
+        insta::assert_debug_snapshot!(parse_ast);
+    }
+
+    #[test]
+    fn test_raw_byte_literal_2() {
+        let source = r"rb'\\'";
+        let parse_ast = parse_suite(source).unwrap();
+        insta::assert_debug_snapshot!(parse_ast);
+    }
+
+    #[test]
+    fn test_escape_octet() {
+        let source = r"b'\43a\4\1234'";
+        let parse_ast = parse_suite(source).unwrap();
+        insta::assert_debug_snapshot!(parse_ast);
+    }
+
+    #[test]
+    fn test_fstring_escaped_newline() {
+        let source = r#"f"\n{x}""#;
+        let parse_ast = parse_suite(source).unwrap();
+        insta::assert_debug_snapshot!(parse_ast);
+    }
+
+    #[test]
+    fn test_fstring_constant_range() {
+        let source = r#"f"aaa{bbb}ccc{ddd}eee""#;
+        let parse_ast = parse_suite(source).unwrap();
+        insta::assert_debug_snapshot!(parse_ast);
+    }
+
+    #[test]
+    fn test_fstring_unescaped_newline() {
+        let source = r#"f"""
+{x}""""#;
+        let parse_ast = parse_suite(source).unwrap();
+        insta::assert_debug_snapshot!(parse_ast);
+    }
+
+    #[test]
+    fn test_fstring_escaped_character() {
+        let source = r#"f"\\{x}""#;
+        let parse_ast = parse_suite(source).unwrap();
+        insta::assert_debug_snapshot!(parse_ast);
+    }
+
+    #[test]
+    fn test_raw_fstring() {
+        let source = r#"rf"{x}""#;
+        let parse_ast = parse_suite(source).unwrap();
+        insta::assert_debug_snapshot!(parse_ast);
+    }
+
+    #[test]
+    fn test_triple_quoted_raw_fstring() {
+        let source = r#"rf"""{x}""""#;
+        let parse_ast = parse_suite(source).unwrap();
+        insta::assert_debug_snapshot!(parse_ast);
+    }
+
+    #[test]
+    fn test_fstring_line_continuation() {
+        let source = r#"rf"\
+{x}""#;
+        let parse_ast = parse_suite(source).unwrap();
+        insta::assert_debug_snapshot!(parse_ast);
+    }
+
+    #[test]
+    fn test_parse_fstring_nested_string_spec() {
+        let source = r#"f"{foo:{''}}""#;
+        let parse_ast = parse_suite(source).unwrap();
+
+        insta::assert_debug_snapshot!(parse_ast);
+    }
+
+    #[test]
+    fn test_parse_fstring_nested_concatenation_string_spec() {
+        let source = r#"f"{foo:{'' ''}}""#;
+        let parse_ast = parse_suite(source).unwrap();
+
+        insta::assert_debug_snapshot!(parse_ast);
+    }
+
+    /// <https://github.com/astral-sh/ruff/issues/8355>
+    #[test]
+    fn test_dont_panic_on_8_in_octal_escape() {
+        let source = r"bold = '\038[1m'";
+        let parse_ast = parse_suite(source).unwrap();
+
+        insta::assert_debug_snapshot!(parse_ast);
+    }
+
+    macro_rules! test_aliases_parse {
+        ($($name:ident: $alias:expr,)*) => {
+        $(
+            #[test]
+            fn $name() {
+                let source = format!(r#""\N{{{0}}}""#, $alias);
+                let parse_ast = parse_suite(&source).unwrap();
+                insta::assert_debug_snapshot!(parse_ast);
+            }
+        )*
+        }
+    }
+
+    test_aliases_parse! {
+        test_backspace_alias: "BACKSPACE",
+        test_bell_alias: "BEL",
+        test_carriage_return_alias: "CARRIAGE RETURN",
+        test_delete_alias: "DELETE",
+        test_escape_alias: "ESCAPE",
+        test_form_feed_alias: "FORM FEED",
+        test_hts_alias: "HTS",
+        test_character_tabulation_with_justification_alias: "CHARACTER TABULATION WITH JUSTIFICATION",
+    }
+}
--- a/crates/ruff_python_parser/src/python.lalrpop
+++ b/crates/ruff_python_parser/src/python.lalrpop
@@ -1616,7 +1616,7 @@ StringLiteralOrFString: StringType = {
 StringLiteral: StringType = {
    <location:@L> <string:string> <end_location:@R> =>? {
        let (source, kind, triple_quoted) = string;
-        Ok(parse_string_literal(&source, kind, triple_quoted, (location..end_location).into())?)
+        Ok(parse_string_literal(source, kind, triple_quoted, (location..end_location).into())?)
    }
 };

@@ -1633,7 +1633,7 @@ FStringMiddlePattern: ast::FStringElement = {
    FStringReplacementField,
    <location:@L> <fstring_middle:fstring_middle> <end_location:@R> =>? {
        let (source, is_raw, _) = fstring_middle;
-        Ok(parse_fstring_literal_element(&source, is_raw, (location..end_location).into())?)
+        Ok(parse_fstring_literal_element(source, is_raw, (location..end_location).into())?)
    }
 };

--- a/crates/ruff_python_parser/src/python.rs
+++ b/crates/ruff_python_parser/src/python.rs
@@ -1,5 +1,5 @@
 // auto-generated: "lalrpop 0.20.0"
-// sha3: 02c60b5c591440061dda68775005d87a203b5448c205120bda1566a62fc2147c
+// sha3: d38cc0f2252a58db42d3bd63a102b537865992b3cf51d402cdb4828f48989c9d
 use ruff_text_size::{Ranged, TextLen, TextRange, TextSize};
 use ruff_python_ast::{self as ast, Int, IpyEscapeKind};
 use crate::{
@@ -36369,7 +36369,7 @@ fn __action217<
 {
    {
        let (source, kind, triple_quoted) = string;
-        Ok(parse_string_literal(&source, kind, triple_quoted, (location..end_location).into())?)
+        Ok(parse_string_literal(source, kind, triple_quoted, (location..end_location).into())?)
    }
 }

@@ -36419,7 +36419,7 @@ fn __action220<
 {
    {
        let (source, is_raw, _) = fstring_middle;
-        Ok(parse_fstring_literal_element(&source, is_raw, (location..end_location).into())?)
+        Ok(parse_fstring_literal_element(source, is_raw, (location..end_location).into())?)
    }
 }

--- a/crates/ruff_python_parser/src/string.rs
+++ b/crates/ruff_python_parser/src/string.rs
@@ -1,12 +1,14 @@
 //! Parsing of string literals, bytes literals, and implicit string concatenation.

+use bstr::ByteSlice;
+
 use ruff_python_ast::{self as ast, Expr};
-use ruff_text_size::{Ranged, TextLen, TextRange, TextSize};
+use ruff_text_size::{Ranged, TextRange, TextSize};

 use crate::lexer::{LexicalError, LexicalErrorType};
 use crate::token::{StringKind, Tok};

-pub(crate) enum StringType {
+pub enum StringType {
    Str(ast::StringLiteral),
    Bytes(ast::BytesLiteral),
    FString(ast::FString),
@@ -32,34 +34,40 @@ impl From<StringType> for Expr {
    }
 }

-struct StringParser<'a> {
-    rest: &'a str,
+enum EscapedChar {
+    Literal(char),
+    Escape(char),
+}
+
+struct StringParser {
+    source: Box<str>,
+    cursor: usize,
    kind: StringKind,
-    location: TextSize,
+    offset: TextSize,
    range: TextRange,
 }

-impl<'a> StringParser<'a> {
-    fn new(source: &'a str, kind: StringKind, start: TextSize, range: TextRange) -> Self {
+impl StringParser {
+    fn new(source: Box<str>, kind: StringKind, offset: TextSize, range: TextRange) -> Self {
        Self {
-            rest: source,
+            source,
+            cursor: 0,
            kind,
-            location: start,
+            offset,
            range,
        }
    }

    #[inline]
-    fn skip_bytes(&mut self, bytes: usize) -> &'a str {
-        let skipped_str = &self.rest[..bytes];
-        self.rest = &self.rest[bytes..];
-        self.location += skipped_str.text_len();
+    fn skip_bytes(&mut self, bytes: usize) -> &str {
+        let skipped_str = &self.source[self.cursor..self.cursor + bytes];
+        self.cursor += bytes;
        skipped_str
    }

    #[inline]
    fn get_pos(&self) -> TextSize {
-        self.location
+        self.offset + TextSize::try_from(self.cursor).unwrap()
    }

    /// Returns the next byte in the string, if there is one.
@@ -69,25 +77,23 @@ impl<'a> StringParser<'a> {
    /// When the next byte is a part of a multi-byte character.
    #[inline]
    fn next_byte(&mut self) -> Option<u8> {
-        self.rest.as_bytes().first().map(|&byte| {
-            self.rest = &self.rest[1..];
-            self.location += TextSize::new(1);
+        self.source[self.cursor..].as_bytes().first().map(|&byte| {
+            self.cursor += 1;
            byte
        })
    }

    #[inline]
    fn next_char(&mut self) -> Option<char> {
-        self.rest.chars().next().map(|c| {
-            self.rest = &self.rest[c.len_utf8()..];
-            self.location += c.text_len();
+        self.source[self.cursor..].chars().next().map(|c| {
+            self.cursor += c.len_utf8();
            c
        })
    }

    #[inline]
    fn peek_byte(&self) -> Option<u8> {
-        self.rest.as_bytes().first().copied()
+        self.source[self.cursor..].as_bytes().first().copied()
    }

    fn parse_unicode_literal(&mut self, literal_number: usize) -> Result<char, LexicalError> {
@@ -135,7 +141,7 @@ impl<'a> StringParser<'a> {
        };

        let start_pos = self.get_pos();
-        let Some(close_idx) = self.rest.find('}') else {
+        let Some(close_idx) = self.source[self.cursor..].find('}') else {
            return Err(LexicalError::new(
                LexicalErrorType::StringError,
                self.get_pos(),
@@ -149,7 +155,8 @@ impl<'a> StringParser<'a> {
            .ok_or_else(|| LexicalError::new(LexicalErrorType::UnicodeError, start_pos))
    }

-    fn parse_escaped_char(&mut self, string: &mut String) -> Result<(), LexicalError> {
+    /// Parse an escaped character, returning the new character.
+    fn parse_escaped_char(&mut self) -> Result<Option<EscapedChar>, LexicalError> {
        let Some(first_char) = self.next_char() else {
            return Err(LexicalError::new(
                LexicalErrorType::StringError,
@@ -174,13 +181,13 @@ impl<'a> StringParser<'a> {
            'U' if !self.kind.is_any_bytes() => self.parse_unicode_literal(8)?,
            'N' if !self.kind.is_any_bytes() => self.parse_unicode_name()?,
            // Special cases where the escape sequence is not a single character
-            '\n' => return Ok(()),
+            '\n' => return Ok(None),
            '\r' => {
                if self.peek_byte() == Some(b'\n') {
                    self.next_byte();
                }

-                return Ok(());
+                return Ok(None);
            }
            _ => {
                if self.kind.is_any_bytes() && !first_char.is_ascii() {
@@ -194,21 +201,42 @@ impl<'a> StringParser<'a> {
                    ));
                }

-                string.push('\\');
-
-                first_char
+                return Ok(Some(EscapedChar::Escape(first_char)));
            }
        };

-        string.push(new_char);
-
-        Ok(())
+        Ok(Some(EscapedChar::Literal(new_char)))
    }

-    fn parse_fstring_middle(&mut self) -> Result<ast::FStringElement, LexicalError> {
-        let mut value = String::with_capacity(self.rest.len());
-        while let Some(ch) = self.next_char() {
-            match ch {
+    fn parse_fstring_middle(mut self) -> Result<ast::FStringElement, LexicalError> {
+        // Fast-path: if the f-string doesn't contain any escape sequences, return the literal.
+        let Some(mut index) = memchr::memchr3(b'{', b'}', b'\\', self.source.as_bytes()) else {
+            return Ok(ast::FStringElement::Literal(ast::FStringLiteralElement {
+                value: self.source,
+                range: self.range,
+            }));
+        };
+
+        let mut value = String::with_capacity(self.source.len());
+        loop {
+            // Add the characters before the escape sequence (or curly brace) to the string.
+            let before_with_slash_or_brace = self.skip_bytes(index + 1);
+            let before = &before_with_slash_or_brace[..before_with_slash_or_brace.len() - 1];
+            value.push_str(before);
+
+            // Add the escaped character to the string.
+            match &self.source.as_bytes()[self.cursor - 1] {
+                // If there are any curly braces inside a `FStringMiddle` token,
+                // then they were escaped (i.e. `{{` or `}}`). This means that
+                // we need increase the location by 2 instead of 1.
+                b'{' => {
+                    self.offset += TextSize::from(1);
+                    value.push('{');
+                }
+                b'}' => {
+                    self.offset += TextSize::from(1);
+                    value.push('}');
+                }
                // We can encounter a `\` as the last character in a `FStringMiddle`
                // token which is valid in this context. For example,
                //
@@ -229,71 +257,152 @@ impl<'a> StringParser<'a> {
                // This is still an invalid escape sequence, but we don't want to
                // raise a syntax error as is done by the CPython parser. It might
                // be supported in the future, refer to point 3: https://peps.python.org/pep-0701/#rejected-ideas
-                '\\' if !self.kind.is_raw() && self.peek_byte().is_some() => {
-                    self.parse_escaped_char(&mut value)?;
-                }
-                // If there are any curly braces inside a `FStringMiddle` token,
-                // then they were escaped (i.e. `{{` or `}}`). This means that
-                // we need increase the location by 2 instead of 1.
-                ch @ ('{' | '}') => {
-                    self.location += ch.text_len();
-                    value.push(ch);
-                }
-                ch => value.push(ch),
-            }
-        }
-        Ok(ast::FStringElement::Literal(ast::FStringLiteralElement {
-            value,
-            range: self.range,
-        }))
-    }
-
-    fn parse_bytes(&mut self) -> Result<StringType, LexicalError> {
-        let mut content = String::with_capacity(self.rest.len());
-        while let Some(ch) = self.next_char() {
-            match ch {
-                '\\' if !self.kind.is_raw() => {
-                    self.parse_escaped_char(&mut content)?;
+                b'\\' if !self.kind.is_raw() && self.peek_byte().is_some() => {
+                    match self.parse_escaped_char()? {
+                        None => {}
+                        Some(EscapedChar::Literal(c)) => value.push(c),
+                        Some(EscapedChar::Escape(c)) => {
+                            value.push('\\');
+                            value.push(c);
+                        }
+                    }
                }
                ch => {
-                    if !ch.is_ascii() {
-                        return Err(LexicalError::new(
-                            LexicalErrorType::OtherError(
-                                "bytes can only contain ASCII literal characters"
-                                    .to_string()
-                                    .into_boxed_str(),
-                            ),
-                            self.get_pos(),
-                        ));
-                    }
-                    content.push(ch);
+                    value.push(char::from(*ch));
                }
            }
+
+            let Some(next_index) =
+                memchr::memchr3(b'{', b'}', b'\\', self.source[self.cursor..].as_bytes())
+            else {
+                // Add the rest of the string to the value.
+                let rest = &self.source[self.cursor..];
+                value.push_str(rest);
+                break;
+            };
+
+            index = next_index;
        }
-        Ok(StringType::Bytes(ast::BytesLiteral {
-            value: content.chars().map(|c| c as u8).collect::<Vec<u8>>(),
+
+        Ok(ast::FStringElement::Literal(ast::FStringLiteralElement {
+            value: value.into_boxed_str(),
            range: self.range,
        }))
    }

-    fn parse_string(&mut self) -> Result<StringType, LexicalError> {
-        let mut value = String::with_capacity(self.rest.len());
-        if self.kind.is_raw() {
-            value.push_str(self.skip_bytes(self.rest.len()));
-        } else {
-            loop {
-                let Some(escape_idx) = self.rest.find('\\') else {
-                    value.push_str(self.skip_bytes(self.rest.len()));
-                    break;
-                };
-
-                let before_with_slash = self.skip_bytes(escape_idx + 1);
-                let before = &before_with_slash[..before_with_slash.len() - 1];
-
-                value.push_str(before);
-                self.parse_escaped_char(&mut value)?;
-            }
+    fn parse_bytes(mut self) -> Result<StringType, LexicalError> {
+        if let Some(index) = self.source.as_bytes().find_non_ascii_byte() {
+            return Err(LexicalError::new(
+                LexicalErrorType::OtherError(
+                    "bytes can only contain ASCII literal characters"
+                        .to_string()
+                        .into_boxed_str(),
+                ),
+                self.offset + TextSize::try_from(index).unwrap(),
+            ));
        }
+
+        if self.kind.is_raw() {
+            // For raw strings, no escaping is necessary.
+            return Ok(StringType::Bytes(ast::BytesLiteral {
+                value: self.source.into_boxed_bytes(),
+                range: self.range,
+            }));
+        }
+
+        let Some(mut escape) = memchr::memchr(b'\\', self.source.as_bytes()) else {
+            // If the string doesn't contain any escape sequences, return the owned string.
+            return Ok(StringType::Bytes(ast::BytesLiteral {
+                value: self.source.into_boxed_bytes(),
+                range: self.range,
+            }));
+        };
+
+        // If the string contains escape sequences, we need to parse them.
+        let mut value = Vec::with_capacity(self.source.len());
+        loop {
+            // Add the characters before the escape sequence to the string.
+            let before_with_slash = self.skip_bytes(escape + 1);
+            let before = &before_with_slash[..before_with_slash.len() - 1];
+            value.extend_from_slice(before.as_bytes());
+
+            // Add the escaped character to the string.
+            match self.parse_escaped_char()? {
+                None => {}
+                Some(EscapedChar::Literal(c)) => value.push(c as u8),
+                Some(EscapedChar::Escape(c)) => {
+                    value.push(b'\\');
+                    value.push(c as u8);
+                }
+            }
+
+            let Some(next_escape) = memchr::memchr(b'\\', self.source[self.cursor..].as_bytes())
+            else {
+                // Add the rest of the string to the value.
+                let rest = &self.source[self.cursor..];
+                value.extend_from_slice(rest.as_bytes());
+                break;
+            };
+
+            // Update the position of the next escape sequence.
+            escape = next_escape;
+        }
+
+        Ok(StringType::Bytes(ast::BytesLiteral {
+            value: value.into_boxed_slice(),
+            range: self.range,
+        }))
+    }
+
+    fn parse_string(mut self) -> Result<StringType, LexicalError> {
+        if self.kind.is_raw() {
+            // For raw strings, no escaping is necessary.
+            return Ok(StringType::Str(ast::StringLiteral {
+                value: self.source,
+                unicode: self.kind.is_unicode(),
+                range: self.range,
+            }));
+        }
+
+        let Some(mut escape) = memchr::memchr(b'\\', self.source.as_bytes()) else {
+            // If the string doesn't contain any escape sequences, return the owned string.
+            return Ok(StringType::Str(ast::StringLiteral {
+                value: self.source,
+                unicode: self.kind.is_unicode(),
+                range: self.range,
+            }));
+        };
+
+        // If the string contains escape sequences, we need to parse them.
+        let mut value = String::with_capacity(self.source.len());
+
+        loop {
+            // Add the characters before the escape sequence to the string.
+            let before_with_slash = self.skip_bytes(escape + 1);
+            let before = &before_with_slash[..before_with_slash.len() - 1];
+            value.push_str(before);
+
+            // Add the escaped character to the string.
+            match self.parse_escaped_char()? {
+                None => {}
+                Some(EscapedChar::Literal(c)) => value.push(c),
+                Some(EscapedChar::Escape(c)) => {
+                    value.push('\\');
+                    value.push(c);
+                }
+            }
+
+            let Some(next_escape) = self.source[self.cursor..].find('\\') else {
+                // Add the rest of the string to the value.
+                let rest = &self.source[self.cursor..];
+                value.push_str(rest);
+                break;
+            };
+
+            // Update the position of the next escape sequence.
+            escape = next_escape;
+        }
+
        Ok(StringType::Str(ast::StringLiteral {
            value: value.into_boxed_str(),
            unicode: self.kind.is_unicode(),
@@ -301,7 +410,7 @@ impl<'a> StringParser<'a> {
        }))
    }

-    fn parse(&mut self) -> Result<StringType, LexicalError> {
+    fn parse(self) -> Result<StringType, LexicalError> {
        if self.kind.is_any_bytes() {
            self.parse_bytes()
        } else {
@@ -310,8 +419,8 @@ impl<'a> StringParser<'a> {
    }
 }

-pub(crate) fn parse_string_literal(
-    source: &str,
+pub fn parse_string_literal(
+    source: Box<str>,
    kind: StringKind,
    triple_quoted: bool,
    range: TextRange,
@@ -326,8 +435,8 @@ pub(crate) fn parse_string_literal(
    StringParser::new(source, kind, start_location, range).parse()
 }

-pub(crate) fn parse_fstring_literal_element(
-    source: &str,
+pub fn parse_fstring_literal_element(
+    source: Box<str>,
    is_raw: bool,
    range: TextRange,
 ) -> Result<ast::FStringElement, LexicalError> {
@@ -360,7 +469,7 @@ pub(crate) fn concatenated_strings(
    if has_bytes && byte_literal_count < strings.len() {
        return Err(LexicalError::new(
            LexicalErrorType::OtherError(
-                "cannot mix bytes and nonbytes literals"
+                "cannot mix bytes and non-bytes literals"
                    .to_string()
                    .into_boxed_str(),
            ),
@@ -415,7 +524,7 @@ pub(crate) fn concatenated_strings(
 // TODO: consolidate these with ParseError
 /// An error that occurred during parsing of an f-string.
 #[derive(Debug, Clone, PartialEq)]
-struct FStringError {
+pub(crate) struct FStringError {
    /// The type of error that occurred.
    pub(crate) error: FStringErrorType,
    /// The location of the error.
Author	SHA1	Message	Date
Charlie Marsh	9ae4fb3b9f	Add benches	2024-02-09 15:39:15 -05:00
Charlie Marsh	c67d68271d	Use shared finder	2024-02-09 15:39:15 -05:00
Charlie Marsh	56b148bb43	Box other strings	2024-02-09 15:39:15 -05:00
Charlie Marsh	0a5a4f6d92	Remove unnecessary string cloning from the parser	2024-02-09 15:39:15 -05:00