Compare commits

...

1 Commits

Author SHA1 Message Date
Charlie Marsh
2fdde079a8 Try reserving 2024-07-19 15:48:33 -04:00
10 changed files with 184 additions and 9 deletions

75
analyze.py Normal file
View File

@@ -0,0 +1,75 @@
import sys
source_counts = []
lines_counts = []
nodes_counts = []
scopes_counts = []
bindings_counts = []
definitions_counts = []
resolved_references_counts = []
unresolved_references_counts = []
globals_counts = []
resolved_names_counts = []
shadowed_bindings_counts = []
with open(sys.argv[1], 'r') as fp:
for line in fp:
if line.startswith('source'):
source_counts.append(int(line.split()[1]))
if line.startswith('lines'):
lines_counts.append(int(line.split()[1]))
if line.startswith('nodes'):
nodes_counts.append(int(line.split()[1]))
if line.startswith('scopes'):
scopes_counts.append(int(line.split()[1]))
if line.startswith('bindings'):
bindings_counts.append(int(line.split()[1]))
if line.startswith('definitions'):
definitions_counts.append(int(line.split()[1]))
if line.startswith('resolved_references'):
resolved_references_counts.append(int(line.split()[1]))
if line.startswith('unresolved_references'):
unresolved_references_counts.append(int(line.split()[1]))
if line.startswith('globals'):
globals_counts.append(int(line.split()[1]))
if line.startswith('resolved_names'):
resolved_names_counts.append(int(line.split()[1]))
if line.startswith('shadowed_bindings'):
shadowed_bindings_counts.append(int(line.split()[1]))
# Each line represents a file.
# Let's compute (e.g.) the average number of bindings per line.
print('Average number of nodes per file:', sum(nodes_counts) / len(lines_counts))
print('Average number of scopes per file:', sum(scopes_counts) / len(lines_counts))
print('Average number of bindings per file:', sum(bindings_counts) / len(lines_counts))
print('Average number of definitions per file:', sum(definitions_counts) / len(lines_counts))
print('Average number of resolved references per file:', sum(resolved_references_counts) / len(lines_counts))
print('Average number of unresolved references per file:', sum(unresolved_references_counts) / len(lines_counts))
print('Average number of globals per file:', sum(globals_counts) / len(lines_counts))
print('Average number of resolved names per file:', sum(resolved_names_counts) / len(lines_counts))
print('Average number of shadowed bindings per file:', sum(shadowed_bindings_counts) / len(lines_counts))
print()
print('Max nodes per file:', max(nodes_counts))
print('Max scopes per file:', max(scopes_counts))
print('Max bindings per file:', max(bindings_counts))
print('Max definitions per file:', max(definitions_counts))
print('Max resolved references per file:', max(resolved_references_counts))
print('Max unresolved references per file:', max(unresolved_references_counts))
print('Max globals per file:', max(globals_counts))
print('Max resolved names per file:', max(resolved_names_counts))
print('Max shadowed bindings per file:', max(shadowed_bindings_counts))
print()
# Let's compute (e.g.) the average number of bindings per line.
print('Average number of nodes per byte:', sum(nodes_counts) / sum(source_counts))
print('Average number of scopes per byte:', sum(scopes_counts) / sum(source_counts))
print('Average number of bindings per byte:', sum(bindings_counts) / sum(source_counts))
print('Average number of definitions per byte:', sum(definitions_counts) / sum(source_counts))
print('Average number of resolved references per byte:', sum(resolved_references_counts) / sum(source_counts))
print('Average number of unresolved references per byte:', sum(unresolved_references_counts) / sum(source_counts))
print('Average number of globals per byte:', sum(globals_counts) / sum(source_counts))
print('Average number of resolved names per byte:', sum(resolved_names_counts) / sum(source_counts))
print('Average number of shadowed bindings per byte:', sum(shadowed_bindings_counts) / sum(source_counts))

View File

@@ -22,6 +22,10 @@ impl<I: Idx, T> IndexVec<I, T> {
}
}
pub fn reserve(&mut self, additional: usize) {
self.raw.reserve(additional)
}
#[inline]
pub fn with_capacity(capacity: usize) -> Self {
Self {

View File

@@ -1914,6 +1914,10 @@ impl<'a> Checker<'a> {
binding_id
}
fn reserve(&mut self) {
self.semantic.reserve(self.locator.contents().len());
}
fn bind_builtins(&mut self) {
for builtin in PYTHON_BUILTINS
.iter()
@@ -2411,6 +2415,7 @@ pub(crate) fn check_ast(
cell_offsets,
notebook_index,
);
checker.reserve();
checker.bind_builtins();
// Iterate over the AST.
@@ -2436,5 +2441,29 @@ pub(crate) fn check_ast(
checker.analyze.scopes.push(ScopeId::global());
analyze::deferred_scopes(&mut checker);
// println!("source: {:?}", checker.locator.contents().len());
// println!("nodes: {:?}", checker.semantic().nodes.len());
// println!("lines: {:?}", checker.locator.contents().lines().count());
// println!("scopes: {:?}", checker.semantic.scopes.len());
// println!("bindings: {:?}", checker.semantic.bindings.len());
// println!("definitions: {:?}", checker.semantic.definitions.len());
// println!(
// "resolved_references: {:?}",
// checker.semantic.resolved_references.len()
// );
// println!(
// "unresolved_references: {:?}",
// checker.semantic.unresolved_references.len()
// );
// println!("globals: {:?}", checker.semantic.globals.len());
// println!(
// "resolved_names: {:?}",
// checker.semantic.resolved_names.len()
// );
// println!(
// "shadowed_bindings: {:?}",
// checker.semantic.shadowed_bindings.len()
// );
checker.diagnostics
}

View File

@@ -357,6 +357,10 @@ pub struct BindingId;
pub struct Bindings<'a>(IndexVec<BindingId, Binding<'a>>);
impl<'a> Bindings<'a> {
pub fn reserve(&mut self, additional: usize) {
self.0.reserve(additional)
}
/// Pushes a new [`Binding`] and returns its [`BindingId`].
pub fn push(&mut self, binding: Binding<'a>) -> BindingId {
self.0.push(binding)

View File

@@ -187,6 +187,10 @@ impl<'a> Definition<'a> {
pub struct Definitions<'a>(IndexVec<DefinitionId, Definition<'a>>);
impl<'a> Definitions<'a> {
pub fn reserve(&mut self, additional: usize) {
self.0.reserve(additional)
}
pub fn for_module(definition: Module<'a>) -> Self {
Self(IndexVec::from_raw(vec![Definition::Module(definition)]))
}

View File

@@ -16,13 +16,21 @@ use ruff_python_ast::statement_visitor::{walk_stmt, StatementVisitor};
pub struct GlobalsId;
#[derive(Debug, Default)]
pub(crate) struct GlobalsArena<'a>(IndexVec<GlobalsId, Globals<'a>>);
pub struct GlobalsArena<'a>(IndexVec<GlobalsId, Globals<'a>>);
impl<'a> GlobalsArena<'a> {
pub fn reserve(&mut self, additional: usize) {
self.0.reserve(additional)
}
/// Inserts a new set of global names into the global names arena and returns its unique id.
pub(crate) fn push(&mut self, globals: Globals<'a>) -> GlobalsId {
self.0.push(globals)
}
pub fn len(&self) -> usize {
self.0.len()
}
}
impl<'a> Index<GlobalsId> for GlobalsArena<'a> {

View File

@@ -33,7 +33,7 @@ pub struct SemanticModel<'a> {
module: Module<'a>,
/// Stack of all AST nodes in the program.
nodes: Nodes<'a>,
pub nodes: Nodes<'a>,
/// The ID of the current AST node.
node_id: Option<NodeId>,
@@ -58,13 +58,13 @@ pub struct SemanticModel<'a> {
pub bindings: Bindings<'a>,
/// Stack of all references created in any scope, at any point in execution.
resolved_references: ResolvedReferences,
pub resolved_references: ResolvedReferences,
/// Stack of all unresolved references created in any scope, at any point in execution.
unresolved_references: UnresolvedReferences,
pub unresolved_references: UnresolvedReferences,
/// Arena of global bindings.
globals: GlobalsArena<'a>,
pub globals: GlobalsArena<'a>,
/// Map from binding ID to binding ID that it shadows (in another scope).
///
@@ -129,7 +129,7 @@ pub struct SemanticModel<'a> {
/// Map from [`ast::ExprName`] node (represented as a [`NameId`]) to the [`Binding`] to which
/// it resolved (represented as a [`BindingId`]).
resolved_names: FxHashMap<NameId, BindingId>,
pub resolved_names: FxHashMap<NameId, BindingId>,
}
impl<'a> SemanticModel<'a> {
@@ -159,6 +159,37 @@ impl<'a> SemanticModel<'a> {
}
}
pub fn reserve(&mut self, capacity: usize) {
const NODES_PER_BYTE: f64 = 0.0735463782722034;
const SCOPES_PER_BYTE: f64 = 0.003789696886327717;
const BINDINGS_PER_BYTE: f64 = 0.033044323498783695;
const DEFINITIONS_PER_BYTE: f64 = 0.001704180043788056;
const RESOLVED_REFERENCES_PER_BYTE: f64 = 0.01967324945679754;
const UNRESOLVED_REFERENCES_PER_BYTE: f64 = 0.0001548020396830927;
const GLOBALS_PER_BYTE: f64 = 3.8755626455402864e-06;
const RESOLVED_NAMES_PER_BYTE: f64 = 0.019565557549871618;
const SHADOWED_BINDINGS_PER_BYTE: f64 = 0.00017560708107067872;
self.nodes
.reserve((NODES_PER_BYTE * capacity as f64) as usize);
self.scopes
.reserve((SCOPES_PER_BYTE * capacity as f64) as usize);
self.bindings
.reserve((BINDINGS_PER_BYTE * capacity as f64) as usize);
self.definitions
.reserve((DEFINITIONS_PER_BYTE * capacity as f64) as usize);
self.resolved_references
.reserve((RESOLVED_REFERENCES_PER_BYTE * capacity as f64) as usize);
self.unresolved_references
.reserve((UNRESOLVED_REFERENCES_PER_BYTE * capacity as f64) as usize);
self.globals
.reserve((GLOBALS_PER_BYTE * capacity as f64) as usize);
self.resolved_names
.reserve((RESOLVED_NAMES_PER_BYTE * capacity as f64) as usize);
self.shadowed_bindings
.reserve((SHADOWED_BINDINGS_PER_BYTE * capacity as f64) as usize);
}
/// Return the [`Binding`] for the given [`BindingId`].
#[inline]
pub fn binding(&self, id: BindingId) -> &Binding<'a> {
@@ -2328,7 +2359,7 @@ impl Ranged for ImportedName {
/// A unique identifier for an [`ast::ExprName`]. No two names can even appear at the same location
/// in the source code, so the starting offset is a cheap and sufficient unique identifier.
#[derive(Debug, Hash, PartialEq, Eq)]
struct NameId(TextSize);
pub struct NameId(TextSize);
impl From<&ast::ExprName> for NameId {
fn from(name: &ast::ExprName) -> Self {

View File

@@ -47,6 +47,14 @@ impl<'a> Nodes<'a> {
})
}
pub fn reserve(&mut self, additional: usize) {
self.nodes.reserve(additional)
}
pub fn len(&self) -> usize {
self.nodes.len()
}
/// Return the [`NodeId`] of the parent node.
#[inline]
pub fn parent_id(&self, node_id: NodeId) -> Option<NodeId> {

View File

@@ -114,9 +114,13 @@ pub struct ResolvedReferenceId;
/// The references of a program indexed by [`ResolvedReferenceId`].
#[derive(Debug, Default)]
pub(crate) struct ResolvedReferences(IndexVec<ResolvedReferenceId, ResolvedReference>);
pub struct ResolvedReferences(IndexVec<ResolvedReferenceId, ResolvedReference>);
impl ResolvedReferences {
pub fn reserve(&mut self, additional: usize) {
self.0.reserve(additional)
}
/// Pushes a new [`ResolvedReference`] and returns its [`ResolvedReferenceId`].
pub(crate) fn push(
&mut self,
@@ -195,9 +199,13 @@ bitflags! {
}
#[derive(Debug, Default)]
pub(crate) struct UnresolvedReferences(Vec<UnresolvedReference>);
pub struct UnresolvedReferences(Vec<UnresolvedReference>);
impl UnresolvedReferences {
pub fn reserve(&mut self, additional: usize) {
self.0.reserve(additional)
}
/// Pushes a new [`UnresolvedReference`].
pub(crate) fn push(
&mut self,

View File

@@ -203,6 +203,10 @@ impl ScopeId {
pub struct Scopes<'a>(IndexVec<ScopeId, Scope<'a>>);
impl<'a> Scopes<'a> {
pub fn reserve(&mut self, additional: usize) {
self.0.reserve(additional)
}
/// Returns a reference to the global scope
pub(crate) fn global(&self) -> &Scope<'a> {
&self[ScopeId::global()]