Apply NFKC normalization to unicode identifiers in the lexer (#10412)

This commit is contained in:
Alex Waygood
2024-03-18 11:56:56 +00:00
committed by GitHub
parent bb540718c2
commit 92e6026446
9 changed files with 68 additions and 15 deletions

View File

@@ -16,6 +16,9 @@ pub enum Tok {
/// Token value for a name, commonly known as an identifier.
Name {
/// The name value.
///
/// Unicode names are NFKC-normalized by the lexer,
/// matching [the behaviour of Python's lexer](https://docs.python.org/3/reference/lexical_analysis.html#identifiers)
name: Box<str>,
},
/// Token value for an integer.