Apply NFKC normalization to unicode identifiers in the lexer (#10412)

2024-03-18 11:56:56 +00:00
parent bb540718c2
commit 92e6026446
9 changed files with 68 additions and 15 deletions
--- a/crates/ruff_python_parser/src/token.rs
+++ b/crates/ruff_python_parser/src/token.rs
@@ -16,6 +16,9 @@ pub enum Tok {
    /// Token value for a name, commonly known as an identifier.
    Name {
        /// The name value.
+        ///
+        /// Unicode names are NFKC-normalized by the lexer,
+        /// matching [the behaviour of Python's lexer](https://docs.python.org/3/reference/lexical_analysis.html#identifiers)
        name: Box<str>,
    },
    /// Token value for an integer.