## Summary
I had always assumed that `NoHashHasher` would be faster when using
integer keys, but benchmarking shows otherwise:
```
linter/default-rules/numpy/globals.py
time: [66.544 µs 66.606 µs 66.678 µs]
thrpt: [44.253 MiB/s 44.300 MiB/s 44.342 MiB/s]
change:
time: [-0.1843% +0.1087% +0.3718%] (p = 0.46 > 0.05)
thrpt: [-0.3704% -0.1086% +0.1847%]
No change in performance detected.
Found 1 outliers among 100 measurements (1.00%)
1 (1.00%) high mild
linter/default-rules/pydantic/types.py
time: [1.3787 ms 1.3811 ms 1.3837 ms]
thrpt: [18.431 MiB/s 18.466 MiB/s 18.498 MiB/s]
change:
time: [-0.4827% -0.1074% +0.1927%] (p = 0.56 > 0.05)
thrpt: [-0.1924% +0.1075% +0.4850%]
No change in performance detected.
linter/default-rules/numpy/ctypeslib.py
time: [624.82 µs 625.96 µs 627.17 µs]
thrpt: [26.550 MiB/s 26.601 MiB/s 26.650 MiB/s]
change:
time: [-0.7071% -0.4908% -0.2736%] (p = 0.00 < 0.05)
thrpt: [+0.2744% +0.4932% +0.7122%]
Change within noise threshold.
linter/default-rules/large/dataset.py
time: [3.1585 ms 3.1634 ms 3.1685 ms]
thrpt: [12.840 MiB/s 12.861 MiB/s 12.880 MiB/s]
change:
time: [-1.5338% -1.3463% -1.1476%] (p = 0.00 < 0.05)
thrpt: [+1.1610% +1.3647% +1.5577%]
Performance has improved.
linter/all-rules/numpy/globals.py
time: [140.17 µs 140.37 µs 140.58 µs]
thrpt: [20.989 MiB/s 21.020 MiB/s 21.051 MiB/s]
change:
time: [-0.1066% +0.3140% +0.7479%] (p = 0.14 > 0.05)
thrpt: [-0.7423% -0.3130% +0.1067%]
No change in performance detected.
Found 3 outliers among 100 measurements (3.00%)
2 (2.00%) high mild
1 (1.00%) high severe
linter/all-rules/pydantic/types.py
time: [2.7030 ms 2.7069 ms 2.7112 ms]
thrpt: [9.4064 MiB/s 9.4216 MiB/s 9.4351 MiB/s]
change:
time: [-0.6721% -0.4874% -0.2974%] (p = 0.00 < 0.05)
thrpt: [+0.2982% +0.4898% +0.6766%]
Change within noise threshold.
Found 14 outliers among 100 measurements (14.00%)
12 (12.00%) high mild
2 (2.00%) high severe
linter/all-rules/numpy/ctypeslib.py
time: [1.4709 ms 1.4727 ms 1.4749 ms]
thrpt: [11.290 MiB/s 11.306 MiB/s 11.320 MiB/s]
change:
time: [-1.1617% -0.9766% -0.8094%] (p = 0.00 < 0.05)
thrpt: [+0.8160% +0.9862% +1.1754%]
Change within noise threshold.
Found 12 outliers among 100 measurements (12.00%)
9 (9.00%) high mild
3 (3.00%) high severe
linter/all-rules/large/dataset.py
time: [5.8086 ms 5.8163 ms 5.8240 ms]
thrpt: [6.9854 MiB/s 6.9946 MiB/s 7.0038 MiB/s]
change:
time: [-1.5651% -1.3536% -1.1584%] (p = 0.00 < 0.05)
thrpt: [+1.1720% +1.3721% +1.5900%]
Performance has improved.
```
My guess is that `NoHashHasher` underperforms because the keys are not
randomly distributed...
Anyway, it's a ~1% (significant) performance gain on some of the above,
plus we get to remove a dependency.
268 lines
8.4 KiB
Rust
268 lines
8.4 KiB
Rust
use std::ops::{Deref, DerefMut};
|
|
|
|
use bitflags::bitflags;
|
|
use rustc_hash::FxHashMap;
|
|
use rustpython_parser::ast;
|
|
|
|
use ruff_index::{newtype_index, Idx, IndexSlice, IndexVec};
|
|
|
|
use crate::binding::BindingId;
|
|
use crate::globals::GlobalsId;
|
|
use crate::star_import::StarImport;
|
|
|
|
#[derive(Debug)]
|
|
pub struct Scope<'a> {
|
|
/// The kind of scope.
|
|
pub kind: ScopeKind<'a>,
|
|
|
|
/// The parent scope, if any.
|
|
pub parent: Option<ScopeId>,
|
|
|
|
/// A list of star imports in this scope. These represent _module_ imports (e.g., `sys` in
|
|
/// `from sys import *`), rather than individual bindings (e.g., individual members in `sys`).
|
|
star_imports: Vec<StarImport<'a>>,
|
|
|
|
/// A map from bound name to binding ID.
|
|
bindings: FxHashMap<&'a str, BindingId>,
|
|
|
|
/// A map from binding ID to binding ID that it shadows.
|
|
///
|
|
/// For example:
|
|
/// ```python
|
|
/// def f():
|
|
/// x = 1
|
|
/// x = 2
|
|
/// ```
|
|
///
|
|
/// In this case, the binding created by `x = 2` shadows the binding created by `x = 1`.
|
|
shadowed_bindings: FxHashMap<BindingId, BindingId>,
|
|
|
|
/// Index into the globals arena, if the scope contains any globally-declared symbols.
|
|
globals_id: Option<GlobalsId>,
|
|
|
|
/// Flags for the [`Scope`].
|
|
flags: ScopeFlags,
|
|
}
|
|
|
|
impl<'a> Scope<'a> {
|
|
pub fn global() -> Self {
|
|
Scope {
|
|
kind: ScopeKind::Module,
|
|
parent: None,
|
|
star_imports: Vec::default(),
|
|
bindings: FxHashMap::default(),
|
|
shadowed_bindings: FxHashMap::default(),
|
|
globals_id: None,
|
|
flags: ScopeFlags::empty(),
|
|
}
|
|
}
|
|
|
|
pub fn local(kind: ScopeKind<'a>, parent: ScopeId) -> Self {
|
|
Scope {
|
|
kind,
|
|
parent: Some(parent),
|
|
star_imports: Vec::default(),
|
|
bindings: FxHashMap::default(),
|
|
shadowed_bindings: FxHashMap::default(),
|
|
globals_id: None,
|
|
flags: ScopeFlags::empty(),
|
|
}
|
|
}
|
|
|
|
/// Returns the [id](BindingId) of the binding bound to the given name.
|
|
pub fn get(&self, name: &str) -> Option<BindingId> {
|
|
self.bindings.get(name).copied()
|
|
}
|
|
|
|
/// Adds a new binding with the given name to this scope.
|
|
pub fn add(&mut self, name: &'a str, id: BindingId) -> Option<BindingId> {
|
|
if let Some(shadowed) = self.bindings.insert(name, id) {
|
|
self.shadowed_bindings.insert(id, shadowed);
|
|
Some(shadowed)
|
|
} else {
|
|
None
|
|
}
|
|
}
|
|
|
|
/// Returns `true` if this scope has a binding with the given name.
|
|
pub fn has(&self, name: &str) -> bool {
|
|
self.bindings.contains_key(name)
|
|
}
|
|
|
|
/// Returns the IDs of all bindings defined in this scope.
|
|
pub fn binding_ids(&self) -> impl Iterator<Item = BindingId> + '_ {
|
|
self.bindings.values().copied()
|
|
}
|
|
|
|
/// Returns a tuple of the name and ID of all bindings defined in this scope.
|
|
pub fn bindings(&self) -> impl Iterator<Item = (&str, BindingId)> + '_ {
|
|
self.bindings.iter().map(|(&name, &id)| (name, id))
|
|
}
|
|
|
|
/// Like [`Scope::get`], but returns all bindings with the given name, including
|
|
/// those that were shadowed by later bindings.
|
|
pub fn get_all(&self, name: &str) -> impl Iterator<Item = BindingId> + '_ {
|
|
std::iter::successors(self.bindings.get(name).copied(), |id| {
|
|
self.shadowed_bindings.get(id).copied()
|
|
})
|
|
}
|
|
|
|
/// Like [`Scope::binding_ids`], but returns all bindings that were added to the scope,
|
|
/// including those that were shadowed by later bindings.
|
|
pub fn all_binding_ids(&self) -> impl Iterator<Item = BindingId> + '_ {
|
|
self.bindings.values().copied().flat_map(|id| {
|
|
std::iter::successors(Some(id), |id| self.shadowed_bindings.get(id).copied())
|
|
})
|
|
}
|
|
|
|
/// Like [`Scope::bindings`], but returns all bindings added to the scope, including those that
|
|
/// were shadowed by later bindings.
|
|
pub fn all_bindings(&self) -> impl Iterator<Item = (&str, BindingId)> + '_ {
|
|
self.bindings.iter().flat_map(|(&name, &id)| {
|
|
std::iter::successors(Some(id), |id| self.shadowed_bindings.get(id).copied())
|
|
.map(move |id| (name, id))
|
|
})
|
|
}
|
|
|
|
/// Returns the ID of the binding that the given binding shadows, if any.
|
|
pub fn shadowed_binding(&self, id: BindingId) -> Option<BindingId> {
|
|
self.shadowed_bindings.get(&id).copied()
|
|
}
|
|
|
|
/// Adds a reference to a star import (e.g., `from sys import *`) to this scope.
|
|
pub fn add_star_import(&mut self, import: StarImport<'a>) {
|
|
self.star_imports.push(import);
|
|
}
|
|
|
|
/// Returns `true` if this scope contains a star import (e.g., `from sys import *`).
|
|
pub fn uses_star_imports(&self) -> bool {
|
|
!self.star_imports.is_empty()
|
|
}
|
|
|
|
/// Returns an iterator over all star imports (e.g., `from sys import *`) in this scope.
|
|
pub fn star_imports(&self) -> impl Iterator<Item = &StarImport<'a>> {
|
|
self.star_imports.iter()
|
|
}
|
|
|
|
/// Set the globals pointer for this scope.
|
|
pub(crate) fn set_globals_id(&mut self, globals: GlobalsId) {
|
|
self.globals_id = Some(globals);
|
|
}
|
|
|
|
/// Returns the globals pointer for this scope.
|
|
pub(crate) fn globals_id(&self) -> Option<GlobalsId> {
|
|
self.globals_id
|
|
}
|
|
|
|
/// Sets the [`ScopeFlags::USES_LOCALS`] flag.
|
|
pub fn set_uses_locals(&mut self) {
|
|
self.flags.insert(ScopeFlags::USES_LOCALS);
|
|
}
|
|
|
|
/// Returns `true` if this scope uses locals (e.g., `locals()`).
|
|
pub const fn uses_locals(&self) -> bool {
|
|
self.flags.intersects(ScopeFlags::USES_LOCALS)
|
|
}
|
|
}
|
|
|
|
bitflags! {
|
|
/// Flags on a [`Scope`].
|
|
#[derive(Debug, Default, Copy, Clone, Eq, PartialEq)]
|
|
pub struct ScopeFlags: u8 {
|
|
/// The scope uses locals (e.g., `locals()`).
|
|
const USES_LOCALS = 1 << 0;
|
|
}
|
|
}
|
|
|
|
#[derive(Debug, is_macro::Is)]
|
|
pub enum ScopeKind<'a> {
|
|
Class(&'a ast::StmtClassDef),
|
|
Function(&'a ast::StmtFunctionDef),
|
|
AsyncFunction(&'a ast::StmtAsyncFunctionDef),
|
|
Generator,
|
|
Module,
|
|
Lambda(&'a ast::ExprLambda),
|
|
}
|
|
|
|
impl ScopeKind<'_> {
|
|
pub const fn is_any_function(&self) -> bool {
|
|
matches!(self, ScopeKind::Function(_) | ScopeKind::AsyncFunction(_))
|
|
}
|
|
}
|
|
|
|
/// Id uniquely identifying a scope in a program.
|
|
///
|
|
/// Using a `u32` is sufficient because Ruff only supports parsing documents with a size of max `u32::max`
|
|
/// and it is impossible to have more scopes than characters in the file (because defining a function or class
|
|
/// requires more than one character).
|
|
#[newtype_index]
|
|
pub struct ScopeId;
|
|
|
|
impl ScopeId {
|
|
/// Returns the ID for the global scope
|
|
#[inline]
|
|
pub const fn global() -> Self {
|
|
ScopeId::from_u32(0)
|
|
}
|
|
|
|
/// Returns `true` if this is the id of the global scope
|
|
pub const fn is_global(&self) -> bool {
|
|
self.index() == 0
|
|
}
|
|
}
|
|
|
|
/// The scopes of a program indexed by [`ScopeId`]
|
|
#[derive(Debug)]
|
|
pub struct Scopes<'a>(IndexVec<ScopeId, Scope<'a>>);
|
|
|
|
impl<'a> Scopes<'a> {
|
|
/// Returns a reference to the global scope
|
|
pub(crate) fn global(&self) -> &Scope<'a> {
|
|
&self[ScopeId::global()]
|
|
}
|
|
|
|
/// Returns a mutable reference to the global scope
|
|
pub(crate) fn global_mut(&mut self) -> &mut Scope<'a> {
|
|
&mut self[ScopeId::global()]
|
|
}
|
|
|
|
/// Pushes a new scope and returns its unique id
|
|
pub(crate) fn push_scope(&mut self, kind: ScopeKind<'a>, parent: ScopeId) -> ScopeId {
|
|
let next_id = ScopeId::new(self.0.len());
|
|
self.0.push(Scope::local(kind, parent));
|
|
next_id
|
|
}
|
|
|
|
/// Returns an iterator over all [`ScopeId`] ancestors, starting from the given [`ScopeId`].
|
|
pub fn ancestor_ids(&self, scope_id: ScopeId) -> impl Iterator<Item = ScopeId> + '_ {
|
|
std::iter::successors(Some(scope_id), |&scope_id| self[scope_id].parent)
|
|
}
|
|
|
|
/// Returns an iterator over all [`Scope`] ancestors, starting from the given [`ScopeId`].
|
|
pub fn ancestors(&self, scope_id: ScopeId) -> impl Iterator<Item = &Scope> + '_ {
|
|
std::iter::successors(Some(&self[scope_id]), |&scope| {
|
|
scope.parent.map(|scope_id| &self[scope_id])
|
|
})
|
|
}
|
|
}
|
|
|
|
impl Default for Scopes<'_> {
|
|
fn default() -> Self {
|
|
Self(IndexVec::from_raw(vec![Scope::global()]))
|
|
}
|
|
}
|
|
|
|
impl<'a> Deref for Scopes<'a> {
|
|
type Target = IndexSlice<ScopeId, Scope<'a>>;
|
|
|
|
fn deref(&self) -> &Self::Target {
|
|
&self.0
|
|
}
|
|
}
|
|
|
|
impl<'a> DerefMut for Scopes<'a> {
|
|
fn deref_mut(&mut self) -> &mut Self::Target {
|
|
&mut self.0
|
|
}
|
|
}
|