From 3560f864508daceb0895ac3370d432229829781e Mon Sep 17 00:00:00 2001 From: Micha Reiser Date: Mon, 14 Jul 2025 13:50:58 +0200 Subject: [PATCH] [ty] Use an interval map for scopes by expression (#19025) --- crates/ruff_python_ast/src/node_index.rs | 10 +++ .../ty_python_semantic/src/semantic_index.rs | 59 +++++++++++---- .../src/semantic_index/builder.rs | 71 +++++++++++++++++-- .../ty_python_semantic/src/semantic_model.rs | 18 ++++- 4 files changed, 136 insertions(+), 22 deletions(-) diff --git a/crates/ruff_python_ast/src/node_index.rs b/crates/ruff_python_ast/src/node_index.rs index 094a5460d7..cb8042b04d 100644 --- a/crates/ruff_python_ast/src/node_index.rs +++ b/crates/ruff_python_ast/src/node_index.rs @@ -49,6 +49,16 @@ impl NodeIndex { pub fn as_usize(self) -> usize { self.0 as _ } + + pub fn as_u32(self) -> u32 { + self.0 + } +} + +impl From for NodeIndex { + fn from(value: u32) -> Self { + NodeIndex(value) + } } impl From for AtomicNodeIndex { diff --git a/crates/ty_python_semantic/src/semantic_index.rs b/crates/ty_python_semantic/src/semantic_index.rs index 5cb8b00238..e5219dce43 100644 --- a/crates/ty_python_semantic/src/semantic_index.rs +++ b/crates/ty_python_semantic/src/semantic_index.rs @@ -5,6 +5,7 @@ use ruff_db::files::File; use ruff_db::parsed::parsed_module; use ruff_index::{IndexSlice, IndexVec}; +use ruff_python_ast::NodeIndex; use ruff_python_parser::semantic_errors::SemanticSyntaxError; use rustc_hash::{FxHashMap, FxHashSet}; use salsa::Update; @@ -24,6 +25,7 @@ use crate::semantic_index::place::{ ScopeKind, ScopedPlaceId, }; use crate::semantic_index::use_def::{EagerSnapshotKey, ScopedEagerSnapshotId, UseDefMap}; +use crate::semantic_model::HasTrackedScope; use crate::util::get_size::untracked_arc_size; pub mod ast_ids; @@ -203,7 +205,7 @@ pub(crate) struct SemanticIndex<'db> { scopes: IndexVec, /// Map expressions to their corresponding scope. - scopes_by_expression: FxHashMap, + scopes_by_expression: ExpressionsScopeMap, /// Map from a node creating a definition to its definition. definitions_by_node: FxHashMap>, @@ -268,25 +270,26 @@ impl<'db> SemanticIndex<'db> { /// Returns the ID of the `expression`'s enclosing scope. #[track_caller] - pub(crate) fn expression_scope_id( - &self, - expression: impl Into, - ) -> FileScopeId { - self.scopes_by_expression[&expression.into()] + pub(crate) fn expression_scope_id(&self, expression: &E) -> FileScopeId + where + E: HasTrackedScope, + { + self.try_expression_scope_id(expression) + .expect("Expression to be part of a scope if it is from the same module") } /// Returns the ID of the `expression`'s enclosing scope. - pub(crate) fn try_expression_scope_id( - &self, - expression: impl Into, - ) -> Option { - self.scopes_by_expression.get(&expression.into()).copied() + pub(crate) fn try_expression_scope_id(&self, expression: &E) -> Option + where + E: HasTrackedScope, + { + self.scopes_by_expression.try_get(expression) } /// Returns the [`Scope`] of the `expression`'s enclosing scope. #[allow(unused)] #[track_caller] - pub(crate) fn expression_scope(&self, expression: impl Into) -> &Scope { + pub(crate) fn expression_scope(&self, expression: &impl HasTrackedScope) -> &Scope { &self.scopes[self.expression_scope_id(expression)] } @@ -614,6 +617,38 @@ impl<'a> Iterator for ChildrenIter<'a> { impl FusedIterator for ChildrenIter<'_> {} +/// Interval map that maps a range of expression node ids to their corresponding scopes. +/// +/// Lookups require `O(log n)` time, where `n` is roughly the number of scopes (roughly +/// because sub-scopes can be interleaved with expressions in the outer scope, e.g. function, some statements, a function). +#[derive(Eq, PartialEq, Debug, get_size2::GetSize, Default)] +struct ExpressionsScopeMap(Box<[(std::ops::RangeInclusive, FileScopeId)]>); + +impl ExpressionsScopeMap { + fn try_get(&self, node: &E) -> Option + where + E: HasTrackedScope, + { + let node_index = node.node_index().load(); + + let entry = self + .0 + .binary_search_by_key(&node_index, |(range, _)| *range.start()); + + let index = match entry { + Ok(index) => index, + Err(index) => index.checked_sub(1)?, + }; + + let (range, scope) = &self.0[index]; + if range.contains(&node_index) { + Some(*scope) + } else { + None + } + } +} + #[cfg(test)] mod tests { use ruff_db::files::{File, system_path_to_file}; diff --git a/crates/ty_python_semantic/src/semantic_index/builder.rs b/crates/ty_python_semantic/src/semantic_index/builder.rs index 5d862bfa0f..3b53e9c5a9 100644 --- a/crates/ty_python_semantic/src/semantic_index/builder.rs +++ b/crates/ty_python_semantic/src/semantic_index/builder.rs @@ -10,7 +10,7 @@ use ruff_db::source::{SourceText, source_text}; use ruff_index::IndexVec; use ruff_python_ast::name::Name; use ruff_python_ast::visitor::{Visitor, walk_expr, walk_pattern, walk_stmt}; -use ruff_python_ast::{self as ast, PySourceType, PythonVersion}; +use ruff_python_ast::{self as ast, NodeIndex, PySourceType, PythonVersion}; use ruff_python_parser::semantic_errors::{ SemanticSyntaxChecker, SemanticSyntaxContext, SemanticSyntaxError, SemanticSyntaxErrorKind, }; @@ -45,7 +45,8 @@ use crate::semantic_index::reachability_constraints::{ use crate::semantic_index::use_def::{ EagerSnapshotKey, FlowSnapshot, ScopedEagerSnapshotId, UseDefMapBuilder, }; -use crate::semantic_index::{ArcUseDefMap, SemanticIndex}; +use crate::semantic_index::{ArcUseDefMap, ExpressionsScopeMap, SemanticIndex}; +use crate::semantic_model::HasTrackedScope; use crate::unpack::{Unpack, UnpackKind, UnpackPosition, UnpackValue}; use crate::{Db, Program}; @@ -102,7 +103,7 @@ pub(super) struct SemanticIndexBuilder<'db, 'ast> { ast_ids: IndexVec, use_def_maps: IndexVec>, scopes_by_node: FxHashMap, - scopes_by_expression: FxHashMap, + scopes_by_expression: ExpressionsScopeMapBuilder, definitions_by_node: FxHashMap>, expressions_by_node: FxHashMap>, imported_modules: FxHashSet, @@ -136,7 +137,7 @@ impl<'db, 'ast> SemanticIndexBuilder<'db, 'ast> { scope_ids_by_scope: IndexVec::new(), use_def_maps: IndexVec::new(), - scopes_by_expression: FxHashMap::default(), + scopes_by_expression: ExpressionsScopeMapBuilder::new(), scopes_by_node: FxHashMap::default(), definitions_by_node: FxHashMap::default(), expressions_by_node: FxHashMap::default(), @@ -1038,7 +1039,6 @@ impl<'db, 'ast> SemanticIndexBuilder<'db, 'ast> { place_tables.shrink_to_fit(); use_def_maps.shrink_to_fit(); ast_ids.shrink_to_fit(); - self.scopes_by_expression.shrink_to_fit(); self.definitions_by_node.shrink_to_fit(); self.scope_ids_by_scope.shrink_to_fit(); @@ -1053,7 +1053,7 @@ impl<'db, 'ast> SemanticIndexBuilder<'db, 'ast> { expressions_by_node: self.expressions_by_node, scope_ids_by_scope: self.scope_ids_by_scope, ast_ids, - scopes_by_expression: self.scopes_by_expression, + scopes_by_expression: self.scopes_by_expression.build(), scopes_by_node: self.scopes_by_node, use_def_maps, imported_modules: Arc::new(self.imported_modules), @@ -2016,7 +2016,7 @@ impl<'ast> Visitor<'ast> for SemanticIndexBuilder<'_, 'ast> { self.with_semantic_checker(|semantic, context| semantic.visit_expr(expr, context)); self.scopes_by_expression - .insert(expr.into(), self.current_scope()); + .record_expression(expr, self.current_scope()); let node_key = NodeKey::from_node(expr); @@ -2654,3 +2654,60 @@ fn dunder_all_extend_argument(value: &ast::Expr) -> Option<&ast::Expr> { (attr == "__all__").then_some(value) } + +/// Builds an interval-map that matches expressions (by their node index) to their enclosing scopes. +/// +/// The interval map is built in a two-step process because the expression ids are assigned in source order, +/// but we visit the expressions in semantic order. Few expressions are registered out of order. +/// +/// 1. build a point vector that maps node indices to their corresponding file scopes. +/// 2. Sort the expressions by their starting id. Then condense the point vector into an interval map +/// by collapsing adjacent node indices with the same scope +/// into a single interval. +struct ExpressionsScopeMapBuilder { + expression_and_scope: Vec<(NodeIndex, FileScopeId)>, +} + +impl ExpressionsScopeMapBuilder { + fn new() -> Self { + Self { + expression_and_scope: vec![], + } + } + + fn record_expression(&mut self, expression: &impl HasTrackedScope, scope: FileScopeId) { + self.expression_and_scope + .push((expression.node_index().load(), scope)); + } + + fn build(mut self) -> ExpressionsScopeMap { + self.expression_and_scope + .sort_unstable_by_key(|(index, _)| *index); + + let mut iter = self.expression_and_scope.into_iter(); + let Some(first) = iter.next() else { + return ExpressionsScopeMap::default(); + }; + + let mut interval_map = Vec::new(); + + let mut current_scope = first.1; + let mut range = first.0..=NodeIndex::from(first.0.as_u32() + 1); + + for (index, scope) in iter { + if scope == current_scope { + range = *range.start()..=index; + continue; + } + + interval_map.push((range, current_scope)); + + current_scope = scope; + range = index..=index; + } + + interval_map.push((range, current_scope)); + + ExpressionsScopeMap(interval_map.into_boxed_slice()) + } +} diff --git a/crates/ty_python_semantic/src/semantic_model.rs b/crates/ty_python_semantic/src/semantic_model.rs index 3f2ef429e3..d15eaff9d9 100644 --- a/crates/ty_python_semantic/src/semantic_model.rs +++ b/crates/ty_python_semantic/src/semantic_model.rs @@ -1,7 +1,7 @@ use ruff_db::files::{File, FilePath}; use ruff_db::source::line_index; use ruff_python_ast as ast; -use ruff_python_ast::{Expr, ExprRef, name::Name}; +use ruff_python_ast::{Expr, ExprRef, HasNodeIndex, name::Name}; use ruff_source_file::LineIndex; use crate::Db; @@ -126,7 +126,7 @@ impl<'db> SemanticModel<'db> { // expression that we're in, then just // fall back to the global scope. None => Some(FileScopeId::global()), - Some(expr) => index.try_expression_scope_id(expr), + Some(expr) => index.try_expression_scope_id(&expr), }, }) else { return vec![]; @@ -297,7 +297,7 @@ pub trait HasType { impl HasType for ast::ExprRef<'_> { fn inferred_type<'db>(&self, model: &SemanticModel<'db>) -> Type<'db> { let index = semantic_index(model.db, model.file); - let file_scope = index.expression_scope_id(*self); + let file_scope = index.expression_scope_id(self); let scope = file_scope.to_scope_id(model.db, model.file); infer_scope_types(model.db, scope).expression_type(*self) @@ -419,6 +419,18 @@ impl HasType for ast::Alias { } } +/// Implemented by types for which the semantic index tracks their scope. +pub(crate) trait HasTrackedScope: HasNodeIndex {} + +impl HasTrackedScope for ast::Expr {} + +impl HasTrackedScope for ast::ExprRef<'_> {} +impl HasTrackedScope for &ast::ExprRef<'_> {} + +// See https://github.com/astral-sh/ty/issues/572 why this implementation exists +// even when we never register identifiers during semantic index building. +impl HasTrackedScope for ast::Identifier {} + #[cfg(test)] mod tests { use ruff_db::files::system_path_to_file;