[ty] AST garbage collection (#18482)

## Summary

Garbage collect ASTs once we are done checking a given file. Queries
with a cross-file dependency on the AST will reparse the file on demand.
This reduces ty's peak memory usage by ~20-30%.

The primary change of this PR is adding a `node_index` field to every
AST node, that is assigned by the parser. `ParsedModule` can use this to
create a flat index of AST nodes any time the file is parsed (or
reparsed). This allows `AstNodeRef` to simply index into the current
instance of the `ParsedModule`, instead of storing a pointer directly.

The indices are somewhat hackily (using an atomic integer) assigned by
the `parsed_module` query instead of by the parser directly. Assigning
the indices in source-order in the (recursive) parser turns out to be
difficult, and collecting the nodes during semantic indexing is
impossible as `SemanticIndex` does not hold onto a specific
`ParsedModuleRef`, which the pointers in the flat AST are tied to. This
means that we have to do an extra AST traversal to assign and collect
the nodes into a flat index, but the small performance impact (~3% on
cold runs) seems worth it for the memory savings.

Part of https://github.com/astral-sh/ty/issues/214.
This commit is contained in:
Ibraheem Ahmed
2025-06-13 08:40:11 -04:00
committed by GitHub
parent 76d9009a6e
commit c9dff5c7d5
824 changed files with 25243 additions and 804 deletions

View File

@@ -1,5 +1,7 @@
use ruff_python_ast::name::Name;
use ruff_python_ast::{self as ast, Expr, ExprContext, Number, Operator, Pattern, Singleton};
use ruff_python_ast::{
self as ast, AtomicNodeIndex, Expr, ExprContext, Number, Operator, Pattern, Singleton,
};
use ruff_text_size::{Ranged, TextSize};
use crate::ParseErrorType;
@@ -110,6 +112,7 @@ impl Parser<'_> {
lhs = Pattern::MatchOr(ast::PatternMatchOr {
range: self.node_range(start),
patterns,
node_index: AtomicNodeIndex::dummy(),
});
}
@@ -125,6 +128,7 @@ impl Parser<'_> {
range: self.node_range(start),
name: Some(ident),
pattern: Some(Box::new(lhs)),
node_index: AtomicNodeIndex::dummy(),
});
}
@@ -200,18 +204,25 @@ impl Parser<'_> {
} else {
let key = match parser.parse_match_pattern_lhs(AllowStarPattern::No) {
Pattern::MatchValue(ast::PatternMatchValue { value, .. }) => *value,
Pattern::MatchSingleton(ast::PatternMatchSingleton { value, range }) => {
match value {
Singleton::None => Expr::NoneLiteral(ast::ExprNoneLiteral { range }),
Singleton::True => {
Expr::BooleanLiteral(ast::ExprBooleanLiteral { value: true, range })
}
Singleton::False => Expr::BooleanLiteral(ast::ExprBooleanLiteral {
value: false,
range,
}),
Pattern::MatchSingleton(ast::PatternMatchSingleton {
value,
range,
node_index,
}) => match value {
Singleton::None => {
Expr::NoneLiteral(ast::ExprNoneLiteral { range, node_index })
}
}
Singleton::True => Expr::BooleanLiteral(ast::ExprBooleanLiteral {
value: true,
range,
node_index,
}),
Singleton::False => Expr::BooleanLiteral(ast::ExprBooleanLiteral {
value: false,
range,
node_index,
}),
},
pattern => {
parser.add_error(
ParseErrorType::OtherError("Invalid mapping pattern key".to_string()),
@@ -244,6 +255,7 @@ impl Parser<'_> {
keys,
patterns,
rest,
node_index: AtomicNodeIndex::dummy(),
}
}
@@ -267,6 +279,7 @@ impl Parser<'_> {
} else {
Some(ident)
},
node_index: AtomicNodeIndex::dummy(),
}
}
@@ -306,6 +319,7 @@ impl Parser<'_> {
return Pattern::MatchSequence(ast::PatternMatchSequence {
patterns: vec![],
range: self.node_range(start),
node_index: AtomicNodeIndex::dummy(),
});
}
@@ -360,6 +374,7 @@ impl Parser<'_> {
ast::PatternMatchSequence {
range: self.node_range(start),
patterns,
node_index: AtomicNodeIndex::dummy(),
}
}
@@ -374,6 +389,7 @@ impl Parser<'_> {
Pattern::MatchSingleton(ast::PatternMatchSingleton {
value: Singleton::None,
range: self.node_range(start),
node_index: AtomicNodeIndex::dummy(),
})
}
TokenKind::True => {
@@ -381,6 +397,7 @@ impl Parser<'_> {
Pattern::MatchSingleton(ast::PatternMatchSingleton {
value: Singleton::True,
range: self.node_range(start),
node_index: AtomicNodeIndex::dummy(),
})
}
TokenKind::False => {
@@ -388,6 +405,7 @@ impl Parser<'_> {
Pattern::MatchSingleton(ast::PatternMatchSingleton {
value: Singleton::False,
range: self.node_range(start),
node_index: AtomicNodeIndex::dummy(),
})
}
TokenKind::String | TokenKind::FStringStart | TokenKind::TStringStart => {
@@ -396,6 +414,7 @@ impl Parser<'_> {
Pattern::MatchValue(ast::PatternMatchValue {
value: Box::new(str),
range: self.node_range(start),
node_index: AtomicNodeIndex::dummy(),
})
}
TokenKind::Complex => {
@@ -408,8 +427,10 @@ impl Parser<'_> {
value: Box::new(Expr::NumberLiteral(ast::ExprNumberLiteral {
value: Number::Complex { real, imag },
range,
node_index: AtomicNodeIndex::dummy(),
})),
range,
node_index: AtomicNodeIndex::dummy(),
})
}
TokenKind::Int => {
@@ -422,8 +443,10 @@ impl Parser<'_> {
value: Box::new(Expr::NumberLiteral(ast::ExprNumberLiteral {
value: Number::Int(value),
range,
node_index: AtomicNodeIndex::dummy(),
})),
range,
node_index: AtomicNodeIndex::dummy(),
})
}
TokenKind::Float => {
@@ -436,8 +459,10 @@ impl Parser<'_> {
value: Box::new(Expr::NumberLiteral(ast::ExprNumberLiteral {
value: Number::Float(value),
range,
node_index: AtomicNodeIndex::dummy(),
})),
range,
node_index: AtomicNodeIndex::dummy(),
})
}
kind => {
@@ -464,6 +489,7 @@ impl Parser<'_> {
return Pattern::MatchValue(ast::PatternMatchValue {
value: Box::new(Expr::UnaryOp(unary_expr)),
range: self.node_range(start),
node_index: AtomicNodeIndex::dummy(),
});
}
}
@@ -483,6 +509,7 @@ impl Parser<'_> {
Pattern::MatchValue(ast::PatternMatchValue {
value: Box::new(attribute),
range: self.node_range(start),
node_index: AtomicNodeIndex::dummy(),
})
} else {
// test_ok match_as_pattern_soft_keyword
@@ -503,6 +530,7 @@ impl Parser<'_> {
range: ident.range,
pattern: None,
name: if &ident == "_" { None } else { Some(ident) },
node_index: AtomicNodeIndex::dummy(),
})
}
} else {
@@ -516,10 +544,12 @@ impl Parser<'_> {
range: self.missing_node_range(),
id: Name::empty(),
ctx: ExprContext::Invalid,
node_index: AtomicNodeIndex::dummy(),
});
Pattern::MatchValue(ast::PatternMatchValue {
range: invalid_node.range(),
value: Box::new(invalid_node),
node_index: AtomicNodeIndex::dummy(),
})
}
}
@@ -575,8 +605,10 @@ impl Parser<'_> {
op: operator,
right: rhs_value,
range,
node_index: AtomicNodeIndex::dummy(),
})),
range,
node_index: AtomicNodeIndex::dummy(),
}
}
@@ -616,12 +648,14 @@ impl Parser<'_> {
range: ident.range(),
id: ident.id,
ctx: ExprContext::Load,
node_index: AtomicNodeIndex::dummy(),
}))
} else {
Box::new(Expr::Name(ast::ExprName {
range: ident.range(),
id: Name::empty(),
ctx: ExprContext::Invalid,
node_index: AtomicNodeIndex::dummy(),
}))
}
}
@@ -673,6 +707,7 @@ impl Parser<'_> {
ast::Identifier {
id: Name::empty(),
range: parser.missing_node_range(),
node_index: AtomicNodeIndex::dummy(),
}
};
@@ -682,6 +717,7 @@ impl Parser<'_> {
attr: key,
pattern: value_pattern,
range: parser.node_range(pattern_start),
node_index: AtomicNodeIndex::dummy(),
});
} else {
has_seen_pattern = true;
@@ -707,8 +743,10 @@ impl Parser<'_> {
patterns,
keywords,
range: self.node_range(arguments_start),
node_index: AtomicNodeIndex::dummy(),
},
range: self.node_range(start),
node_index: AtomicNodeIndex::dummy(),
}
}
}