Files
ruff/crates/ruff_python_semantic/src/scope.rs
Charlie Marsh ed72c027a3 Replace NoHashHasher usages with FxHashMap (#6049)
## Summary

I had always assumed that `NoHashHasher` would be faster when using
integer keys, but benchmarking shows otherwise:

```
linter/default-rules/numpy/globals.py
                        time:   [66.544 µs 66.606 µs 66.678 µs]
                        thrpt:  [44.253 MiB/s 44.300 MiB/s 44.342 MiB/s]
                 change:
                        time:   [-0.1843% +0.1087% +0.3718%] (p = 0.46 > 0.05)
                        thrpt:  [-0.3704% -0.1086% +0.1847%]
                        No change in performance detected.
Found 1 outliers among 100 measurements (1.00%)
  1 (1.00%) high mild
linter/default-rules/pydantic/types.py
                        time:   [1.3787 ms 1.3811 ms 1.3837 ms]
                        thrpt:  [18.431 MiB/s 18.466 MiB/s 18.498 MiB/s]
                 change:
                        time:   [-0.4827% -0.1074% +0.1927%] (p = 0.56 > 0.05)
                        thrpt:  [-0.1924% +0.1075% +0.4850%]
                        No change in performance detected.
linter/default-rules/numpy/ctypeslib.py
                        time:   [624.82 µs 625.96 µs 627.17 µs]
                        thrpt:  [26.550 MiB/s 26.601 MiB/s 26.650 MiB/s]
                 change:
                        time:   [-0.7071% -0.4908% -0.2736%] (p = 0.00 < 0.05)
                        thrpt:  [+0.2744% +0.4932% +0.7122%]
                        Change within noise threshold.
linter/default-rules/large/dataset.py
                        time:   [3.1585 ms 3.1634 ms 3.1685 ms]
                        thrpt:  [12.840 MiB/s 12.861 MiB/s 12.880 MiB/s]
                 change:
                        time:   [-1.5338% -1.3463% -1.1476%] (p = 0.00 < 0.05)
                        thrpt:  [+1.1610% +1.3647% +1.5577%]
                        Performance has improved.

linter/all-rules/numpy/globals.py
                        time:   [140.17 µs 140.37 µs 140.58 µs]
                        thrpt:  [20.989 MiB/s 21.020 MiB/s 21.051 MiB/s]
                 change:
                        time:   [-0.1066% +0.3140% +0.7479%] (p = 0.14 > 0.05)
                        thrpt:  [-0.7423% -0.3130% +0.1067%]
                        No change in performance detected.
Found 3 outliers among 100 measurements (3.00%)
  2 (2.00%) high mild
  1 (1.00%) high severe
linter/all-rules/pydantic/types.py
                        time:   [2.7030 ms 2.7069 ms 2.7112 ms]
                        thrpt:  [9.4064 MiB/s 9.4216 MiB/s 9.4351 MiB/s]
                 change:
                        time:   [-0.6721% -0.4874% -0.2974%] (p = 0.00 < 0.05)
                        thrpt:  [+0.2982% +0.4898% +0.6766%]
                        Change within noise threshold.
Found 14 outliers among 100 measurements (14.00%)
  12 (12.00%) high mild
  2 (2.00%) high severe
linter/all-rules/numpy/ctypeslib.py
                        time:   [1.4709 ms 1.4727 ms 1.4749 ms]
                        thrpt:  [11.290 MiB/s 11.306 MiB/s 11.320 MiB/s]
                 change:
                        time:   [-1.1617% -0.9766% -0.8094%] (p = 0.00 < 0.05)
                        thrpt:  [+0.8160% +0.9862% +1.1754%]
                        Change within noise threshold.
Found 12 outliers among 100 measurements (12.00%)
  9 (9.00%) high mild
  3 (3.00%) high severe
linter/all-rules/large/dataset.py
                        time:   [5.8086 ms 5.8163 ms 5.8240 ms]
                        thrpt:  [6.9854 MiB/s 6.9946 MiB/s 7.0038 MiB/s]
                 change:
                        time:   [-1.5651% -1.3536% -1.1584%] (p = 0.00 < 0.05)
                        thrpt:  [+1.1720% +1.3721% +1.5900%]
                        Performance has improved.
```

My guess is that `NoHashHasher` underperforms because the keys are not
randomly distributed...

Anyway, it's a ~1% (significant) performance gain on some of the above,
plus we get to remove a dependency.
2023-07-24 23:41:57 +00:00

268 lines
8.4 KiB
Rust

use std::ops::{Deref, DerefMut};
use bitflags::bitflags;
use rustc_hash::FxHashMap;
use rustpython_parser::ast;
use ruff_index::{newtype_index, Idx, IndexSlice, IndexVec};
use crate::binding::BindingId;
use crate::globals::GlobalsId;
use crate::star_import::StarImport;
#[derive(Debug)]
pub struct Scope<'a> {
/// The kind of scope.
pub kind: ScopeKind<'a>,
/// The parent scope, if any.
pub parent: Option<ScopeId>,
/// A list of star imports in this scope. These represent _module_ imports (e.g., `sys` in
/// `from sys import *`), rather than individual bindings (e.g., individual members in `sys`).
star_imports: Vec<StarImport<'a>>,
/// A map from bound name to binding ID.
bindings: FxHashMap<&'a str, BindingId>,
/// A map from binding ID to binding ID that it shadows.
///
/// For example:
/// ```python
/// def f():
/// x = 1
/// x = 2
/// ```
///
/// In this case, the binding created by `x = 2` shadows the binding created by `x = 1`.
shadowed_bindings: FxHashMap<BindingId, BindingId>,
/// Index into the globals arena, if the scope contains any globally-declared symbols.
globals_id: Option<GlobalsId>,
/// Flags for the [`Scope`].
flags: ScopeFlags,
}
impl<'a> Scope<'a> {
pub fn global() -> Self {
Scope {
kind: ScopeKind::Module,
parent: None,
star_imports: Vec::default(),
bindings: FxHashMap::default(),
shadowed_bindings: FxHashMap::default(),
globals_id: None,
flags: ScopeFlags::empty(),
}
}
pub fn local(kind: ScopeKind<'a>, parent: ScopeId) -> Self {
Scope {
kind,
parent: Some(parent),
star_imports: Vec::default(),
bindings: FxHashMap::default(),
shadowed_bindings: FxHashMap::default(),
globals_id: None,
flags: ScopeFlags::empty(),
}
}
/// Returns the [id](BindingId) of the binding bound to the given name.
pub fn get(&self, name: &str) -> Option<BindingId> {
self.bindings.get(name).copied()
}
/// Adds a new binding with the given name to this scope.
pub fn add(&mut self, name: &'a str, id: BindingId) -> Option<BindingId> {
if let Some(shadowed) = self.bindings.insert(name, id) {
self.shadowed_bindings.insert(id, shadowed);
Some(shadowed)
} else {
None
}
}
/// Returns `true` if this scope has a binding with the given name.
pub fn has(&self, name: &str) -> bool {
self.bindings.contains_key(name)
}
/// Returns the IDs of all bindings defined in this scope.
pub fn binding_ids(&self) -> impl Iterator<Item = BindingId> + '_ {
self.bindings.values().copied()
}
/// Returns a tuple of the name and ID of all bindings defined in this scope.
pub fn bindings(&self) -> impl Iterator<Item = (&str, BindingId)> + '_ {
self.bindings.iter().map(|(&name, &id)| (name, id))
}
/// Like [`Scope::get`], but returns all bindings with the given name, including
/// those that were shadowed by later bindings.
pub fn get_all(&self, name: &str) -> impl Iterator<Item = BindingId> + '_ {
std::iter::successors(self.bindings.get(name).copied(), |id| {
self.shadowed_bindings.get(id).copied()
})
}
/// Like [`Scope::binding_ids`], but returns all bindings that were added to the scope,
/// including those that were shadowed by later bindings.
pub fn all_binding_ids(&self) -> impl Iterator<Item = BindingId> + '_ {
self.bindings.values().copied().flat_map(|id| {
std::iter::successors(Some(id), |id| self.shadowed_bindings.get(id).copied())
})
}
/// Like [`Scope::bindings`], but returns all bindings added to the scope, including those that
/// were shadowed by later bindings.
pub fn all_bindings(&self) -> impl Iterator<Item = (&str, BindingId)> + '_ {
self.bindings.iter().flat_map(|(&name, &id)| {
std::iter::successors(Some(id), |id| self.shadowed_bindings.get(id).copied())
.map(move |id| (name, id))
})
}
/// Returns the ID of the binding that the given binding shadows, if any.
pub fn shadowed_binding(&self, id: BindingId) -> Option<BindingId> {
self.shadowed_bindings.get(&id).copied()
}
/// Adds a reference to a star import (e.g., `from sys import *`) to this scope.
pub fn add_star_import(&mut self, import: StarImport<'a>) {
self.star_imports.push(import);
}
/// Returns `true` if this scope contains a star import (e.g., `from sys import *`).
pub fn uses_star_imports(&self) -> bool {
!self.star_imports.is_empty()
}
/// Returns an iterator over all star imports (e.g., `from sys import *`) in this scope.
pub fn star_imports(&self) -> impl Iterator<Item = &StarImport<'a>> {
self.star_imports.iter()
}
/// Set the globals pointer for this scope.
pub(crate) fn set_globals_id(&mut self, globals: GlobalsId) {
self.globals_id = Some(globals);
}
/// Returns the globals pointer for this scope.
pub(crate) fn globals_id(&self) -> Option<GlobalsId> {
self.globals_id
}
/// Sets the [`ScopeFlags::USES_LOCALS`] flag.
pub fn set_uses_locals(&mut self) {
self.flags.insert(ScopeFlags::USES_LOCALS);
}
/// Returns `true` if this scope uses locals (e.g., `locals()`).
pub const fn uses_locals(&self) -> bool {
self.flags.intersects(ScopeFlags::USES_LOCALS)
}
}
bitflags! {
/// Flags on a [`Scope`].
#[derive(Debug, Default, Copy, Clone, Eq, PartialEq)]
pub struct ScopeFlags: u8 {
/// The scope uses locals (e.g., `locals()`).
const USES_LOCALS = 1 << 0;
}
}
#[derive(Debug, is_macro::Is)]
pub enum ScopeKind<'a> {
Class(&'a ast::StmtClassDef),
Function(&'a ast::StmtFunctionDef),
AsyncFunction(&'a ast::StmtAsyncFunctionDef),
Generator,
Module,
Lambda(&'a ast::ExprLambda),
}
impl ScopeKind<'_> {
pub const fn is_any_function(&self) -> bool {
matches!(self, ScopeKind::Function(_) | ScopeKind::AsyncFunction(_))
}
}
/// Id uniquely identifying a scope in a program.
///
/// Using a `u32` is sufficient because Ruff only supports parsing documents with a size of max `u32::max`
/// and it is impossible to have more scopes than characters in the file (because defining a function or class
/// requires more than one character).
#[newtype_index]
pub struct ScopeId;
impl ScopeId {
/// Returns the ID for the global scope
#[inline]
pub const fn global() -> Self {
ScopeId::from_u32(0)
}
/// Returns `true` if this is the id of the global scope
pub const fn is_global(&self) -> bool {
self.index() == 0
}
}
/// The scopes of a program indexed by [`ScopeId`]
#[derive(Debug)]
pub struct Scopes<'a>(IndexVec<ScopeId, Scope<'a>>);
impl<'a> Scopes<'a> {
/// Returns a reference to the global scope
pub(crate) fn global(&self) -> &Scope<'a> {
&self[ScopeId::global()]
}
/// Returns a mutable reference to the global scope
pub(crate) fn global_mut(&mut self) -> &mut Scope<'a> {
&mut self[ScopeId::global()]
}
/// Pushes a new scope and returns its unique id
pub(crate) fn push_scope(&mut self, kind: ScopeKind<'a>, parent: ScopeId) -> ScopeId {
let next_id = ScopeId::new(self.0.len());
self.0.push(Scope::local(kind, parent));
next_id
}
/// Returns an iterator over all [`ScopeId`] ancestors, starting from the given [`ScopeId`].
pub fn ancestor_ids(&self, scope_id: ScopeId) -> impl Iterator<Item = ScopeId> + '_ {
std::iter::successors(Some(scope_id), |&scope_id| self[scope_id].parent)
}
/// Returns an iterator over all [`Scope`] ancestors, starting from the given [`ScopeId`].
pub fn ancestors(&self, scope_id: ScopeId) -> impl Iterator<Item = &Scope> + '_ {
std::iter::successors(Some(&self[scope_id]), |&scope| {
scope.parent.map(|scope_id| &self[scope_id])
})
}
}
impl Default for Scopes<'_> {
fn default() -> Self {
Self(IndexVec::from_raw(vec![Scope::global()]))
}
}
impl<'a> Deref for Scopes<'a> {
type Target = IndexSlice<ScopeId, Scope<'a>>;
fn deref(&self) -> &Self::Target {
&self.0
}
}
impl<'a> DerefMut for Scopes<'a> {
fn deref_mut(&mut self) -> &mut Self::Target {
&mut self.0
}
}