[ty] AST garbage collection (#18482)
## Summary Garbage collect ASTs once we are done checking a given file. Queries with a cross-file dependency on the AST will reparse the file on demand. This reduces ty's peak memory usage by ~20-30%. The primary change of this PR is adding a `node_index` field to every AST node, that is assigned by the parser. `ParsedModule` can use this to create a flat index of AST nodes any time the file is parsed (or reparsed). This allows `AstNodeRef` to simply index into the current instance of the `ParsedModule`, instead of storing a pointer directly. The indices are somewhat hackily (using an atomic integer) assigned by the `parsed_module` query instead of by the parser directly. Assigning the indices in source-order in the (recursive) parser turns out to be difficult, and collecting the nodes during semantic indexing is impossible as `SemanticIndex` does not hold onto a specific `ParsedModuleRef`, which the pointers in the flat AST are tied to. This means that we have to do an extra AST traversal to assign and collect the nodes into a flat index, but the small performance impact (~3% on cold runs) seems worth it for the memory savings. Part of https://github.com/astral-sh/ty/issues/214.
This commit is contained in:
@@ -1,5 +1,7 @@
|
||||
use ruff_python_ast::name::Name;
|
||||
use ruff_python_ast::{self as ast, Expr, ExprContext, Number, Operator, Pattern, Singleton};
|
||||
use ruff_python_ast::{
|
||||
self as ast, AtomicNodeIndex, Expr, ExprContext, Number, Operator, Pattern, Singleton,
|
||||
};
|
||||
use ruff_text_size::{Ranged, TextSize};
|
||||
|
||||
use crate::ParseErrorType;
|
||||
@@ -110,6 +112,7 @@ impl Parser<'_> {
|
||||
lhs = Pattern::MatchOr(ast::PatternMatchOr {
|
||||
range: self.node_range(start),
|
||||
patterns,
|
||||
node_index: AtomicNodeIndex::dummy(),
|
||||
});
|
||||
}
|
||||
|
||||
@@ -125,6 +128,7 @@ impl Parser<'_> {
|
||||
range: self.node_range(start),
|
||||
name: Some(ident),
|
||||
pattern: Some(Box::new(lhs)),
|
||||
node_index: AtomicNodeIndex::dummy(),
|
||||
});
|
||||
}
|
||||
|
||||
@@ -200,18 +204,25 @@ impl Parser<'_> {
|
||||
} else {
|
||||
let key = match parser.parse_match_pattern_lhs(AllowStarPattern::No) {
|
||||
Pattern::MatchValue(ast::PatternMatchValue { value, .. }) => *value,
|
||||
Pattern::MatchSingleton(ast::PatternMatchSingleton { value, range }) => {
|
||||
match value {
|
||||
Singleton::None => Expr::NoneLiteral(ast::ExprNoneLiteral { range }),
|
||||
Singleton::True => {
|
||||
Expr::BooleanLiteral(ast::ExprBooleanLiteral { value: true, range })
|
||||
}
|
||||
Singleton::False => Expr::BooleanLiteral(ast::ExprBooleanLiteral {
|
||||
value: false,
|
||||
range,
|
||||
}),
|
||||
Pattern::MatchSingleton(ast::PatternMatchSingleton {
|
||||
value,
|
||||
range,
|
||||
node_index,
|
||||
}) => match value {
|
||||
Singleton::None => {
|
||||
Expr::NoneLiteral(ast::ExprNoneLiteral { range, node_index })
|
||||
}
|
||||
}
|
||||
Singleton::True => Expr::BooleanLiteral(ast::ExprBooleanLiteral {
|
||||
value: true,
|
||||
range,
|
||||
node_index,
|
||||
}),
|
||||
Singleton::False => Expr::BooleanLiteral(ast::ExprBooleanLiteral {
|
||||
value: false,
|
||||
range,
|
||||
node_index,
|
||||
}),
|
||||
},
|
||||
pattern => {
|
||||
parser.add_error(
|
||||
ParseErrorType::OtherError("Invalid mapping pattern key".to_string()),
|
||||
@@ -244,6 +255,7 @@ impl Parser<'_> {
|
||||
keys,
|
||||
patterns,
|
||||
rest,
|
||||
node_index: AtomicNodeIndex::dummy(),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -267,6 +279,7 @@ impl Parser<'_> {
|
||||
} else {
|
||||
Some(ident)
|
||||
},
|
||||
node_index: AtomicNodeIndex::dummy(),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -306,6 +319,7 @@ impl Parser<'_> {
|
||||
return Pattern::MatchSequence(ast::PatternMatchSequence {
|
||||
patterns: vec![],
|
||||
range: self.node_range(start),
|
||||
node_index: AtomicNodeIndex::dummy(),
|
||||
});
|
||||
}
|
||||
|
||||
@@ -360,6 +374,7 @@ impl Parser<'_> {
|
||||
ast::PatternMatchSequence {
|
||||
range: self.node_range(start),
|
||||
patterns,
|
||||
node_index: AtomicNodeIndex::dummy(),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -374,6 +389,7 @@ impl Parser<'_> {
|
||||
Pattern::MatchSingleton(ast::PatternMatchSingleton {
|
||||
value: Singleton::None,
|
||||
range: self.node_range(start),
|
||||
node_index: AtomicNodeIndex::dummy(),
|
||||
})
|
||||
}
|
||||
TokenKind::True => {
|
||||
@@ -381,6 +397,7 @@ impl Parser<'_> {
|
||||
Pattern::MatchSingleton(ast::PatternMatchSingleton {
|
||||
value: Singleton::True,
|
||||
range: self.node_range(start),
|
||||
node_index: AtomicNodeIndex::dummy(),
|
||||
})
|
||||
}
|
||||
TokenKind::False => {
|
||||
@@ -388,6 +405,7 @@ impl Parser<'_> {
|
||||
Pattern::MatchSingleton(ast::PatternMatchSingleton {
|
||||
value: Singleton::False,
|
||||
range: self.node_range(start),
|
||||
node_index: AtomicNodeIndex::dummy(),
|
||||
})
|
||||
}
|
||||
TokenKind::String | TokenKind::FStringStart | TokenKind::TStringStart => {
|
||||
@@ -396,6 +414,7 @@ impl Parser<'_> {
|
||||
Pattern::MatchValue(ast::PatternMatchValue {
|
||||
value: Box::new(str),
|
||||
range: self.node_range(start),
|
||||
node_index: AtomicNodeIndex::dummy(),
|
||||
})
|
||||
}
|
||||
TokenKind::Complex => {
|
||||
@@ -408,8 +427,10 @@ impl Parser<'_> {
|
||||
value: Box::new(Expr::NumberLiteral(ast::ExprNumberLiteral {
|
||||
value: Number::Complex { real, imag },
|
||||
range,
|
||||
node_index: AtomicNodeIndex::dummy(),
|
||||
})),
|
||||
range,
|
||||
node_index: AtomicNodeIndex::dummy(),
|
||||
})
|
||||
}
|
||||
TokenKind::Int => {
|
||||
@@ -422,8 +443,10 @@ impl Parser<'_> {
|
||||
value: Box::new(Expr::NumberLiteral(ast::ExprNumberLiteral {
|
||||
value: Number::Int(value),
|
||||
range,
|
||||
node_index: AtomicNodeIndex::dummy(),
|
||||
})),
|
||||
range,
|
||||
node_index: AtomicNodeIndex::dummy(),
|
||||
})
|
||||
}
|
||||
TokenKind::Float => {
|
||||
@@ -436,8 +459,10 @@ impl Parser<'_> {
|
||||
value: Box::new(Expr::NumberLiteral(ast::ExprNumberLiteral {
|
||||
value: Number::Float(value),
|
||||
range,
|
||||
node_index: AtomicNodeIndex::dummy(),
|
||||
})),
|
||||
range,
|
||||
node_index: AtomicNodeIndex::dummy(),
|
||||
})
|
||||
}
|
||||
kind => {
|
||||
@@ -464,6 +489,7 @@ impl Parser<'_> {
|
||||
return Pattern::MatchValue(ast::PatternMatchValue {
|
||||
value: Box::new(Expr::UnaryOp(unary_expr)),
|
||||
range: self.node_range(start),
|
||||
node_index: AtomicNodeIndex::dummy(),
|
||||
});
|
||||
}
|
||||
}
|
||||
@@ -483,6 +509,7 @@ impl Parser<'_> {
|
||||
Pattern::MatchValue(ast::PatternMatchValue {
|
||||
value: Box::new(attribute),
|
||||
range: self.node_range(start),
|
||||
node_index: AtomicNodeIndex::dummy(),
|
||||
})
|
||||
} else {
|
||||
// test_ok match_as_pattern_soft_keyword
|
||||
@@ -503,6 +530,7 @@ impl Parser<'_> {
|
||||
range: ident.range,
|
||||
pattern: None,
|
||||
name: if &ident == "_" { None } else { Some(ident) },
|
||||
node_index: AtomicNodeIndex::dummy(),
|
||||
})
|
||||
}
|
||||
} else {
|
||||
@@ -516,10 +544,12 @@ impl Parser<'_> {
|
||||
range: self.missing_node_range(),
|
||||
id: Name::empty(),
|
||||
ctx: ExprContext::Invalid,
|
||||
node_index: AtomicNodeIndex::dummy(),
|
||||
});
|
||||
Pattern::MatchValue(ast::PatternMatchValue {
|
||||
range: invalid_node.range(),
|
||||
value: Box::new(invalid_node),
|
||||
node_index: AtomicNodeIndex::dummy(),
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -575,8 +605,10 @@ impl Parser<'_> {
|
||||
op: operator,
|
||||
right: rhs_value,
|
||||
range,
|
||||
node_index: AtomicNodeIndex::dummy(),
|
||||
})),
|
||||
range,
|
||||
node_index: AtomicNodeIndex::dummy(),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -616,12 +648,14 @@ impl Parser<'_> {
|
||||
range: ident.range(),
|
||||
id: ident.id,
|
||||
ctx: ExprContext::Load,
|
||||
node_index: AtomicNodeIndex::dummy(),
|
||||
}))
|
||||
} else {
|
||||
Box::new(Expr::Name(ast::ExprName {
|
||||
range: ident.range(),
|
||||
id: Name::empty(),
|
||||
ctx: ExprContext::Invalid,
|
||||
node_index: AtomicNodeIndex::dummy(),
|
||||
}))
|
||||
}
|
||||
}
|
||||
@@ -673,6 +707,7 @@ impl Parser<'_> {
|
||||
ast::Identifier {
|
||||
id: Name::empty(),
|
||||
range: parser.missing_node_range(),
|
||||
node_index: AtomicNodeIndex::dummy(),
|
||||
}
|
||||
};
|
||||
|
||||
@@ -682,6 +717,7 @@ impl Parser<'_> {
|
||||
attr: key,
|
||||
pattern: value_pattern,
|
||||
range: parser.node_range(pattern_start),
|
||||
node_index: AtomicNodeIndex::dummy(),
|
||||
});
|
||||
} else {
|
||||
has_seen_pattern = true;
|
||||
@@ -707,8 +743,10 @@ impl Parser<'_> {
|
||||
patterns,
|
||||
keywords,
|
||||
range: self.node_range(arguments_start),
|
||||
node_index: AtomicNodeIndex::dummy(),
|
||||
},
|
||||
range: self.node_range(start),
|
||||
node_index: AtomicNodeIndex::dummy(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user