From 6282402a8cb44ac6362c6007fc911c3d75729648 Mon Sep 17 00:00:00 2001 From: Alex Waygood Date: Wed, 16 Oct 2024 14:03:59 +0100 Subject: [PATCH] [red-knot] Add control flow for try/except blocks (#13729) --- .../mdtest/exception/control_flow.md | 641 ++++++++++++++++++ .../src/semantic_index/builder.rs | 122 +++- .../semantic_index/builder/except_handlers.rs | 102 +++ crates/ruff_benchmark/benches/red_knot.rs | 26 +- 4 files changed, 867 insertions(+), 24 deletions(-) create mode 100644 crates/red_knot_python_semantic/resources/mdtest/exception/control_flow.md create mode 100644 crates/red_knot_python_semantic/src/semantic_index/builder/except_handlers.rs diff --git a/crates/red_knot_python_semantic/resources/mdtest/exception/control_flow.md b/crates/red_knot_python_semantic/resources/mdtest/exception/control_flow.md new file mode 100644 index 0000000000..ead639e490 --- /dev/null +++ b/crates/red_knot_python_semantic/resources/mdtest/exception/control_flow.md @@ -0,0 +1,641 @@ +# Control flow for exception handlers + +These tests assert that we understand the possible "definition states" (which +symbols might or might not be defined) in the various branches of a +`try`/`except`/`else`/`finally` block. + +For a full writeup on the semantics of exception handlers, +see [this document][1]. + +The tests throughout this Markdown document use functions with names starting +with `could_raise_*` to mark definitions that might or might not succeed +(as the function could raise an exception). A type checker must assume that any +arbitrary function call could raise an exception in Python; this is just a +naming convention used in these tests for clarity, and to future-proof the +tests against possible future improvements whereby certain statements or +expressions could potentially be inferred as being incapable of causing an +exception to be raised. + +## A single bare `except` + +Consider the following `try`/`except` block, with a single bare `except:`. +There are different types for the variable `x` in the two branches of this +block, and we can't determine which branch might have been taken from the +perspective of code following this block. The inferred type after the block's +conclusion is therefore the union of the type at the end of the `try` suite +(`str`) and the type at the end of the `except` suite (`Literal[2]`). + +*Within* the `except` suite, we must infer a union of all possible "definition +states" we could have been in at any point during the `try` suite. This is +because control flow could have jumped to the `except` suite without any of the +`try`-suite definitions successfully completing, with only *some* of the +`try`-suite definitions successfully completing, or indeed with *all* of them +successfully completing. The type of `x` at the beginning of the `except` suite +in this example is therefore `Literal[1] | str`, taking into account that we +might have jumped to the `except` suite before the +`x = could_raise_returns_str()` redefinition, but we *also* could have jumped +to the `except` suite *after* that redefinition. + +```py path=union_type_inferred.py +def could_raise_returns_str() -> str: + return 'foo' + +x = 1 + +try: + reveal_type(x) # revealed: Literal[1] + x = could_raise_returns_str() + reveal_type(x) # revealed: str +except: + reveal_type(x) # revealed: Literal[1] | str + x = 2 + reveal_type(x) # revealed: Literal[2] + +reveal_type(x) # revealed: str | Literal[2] +``` + +If `x` has the same type at the end of both branches, however, the branches +unify and `x` is not inferred as having a union type following the +`try`/`except` block: + +```py path=branches_unify_to_non_union_type.py +def could_raise_returns_str() -> str: + return 'foo' + +x = 1 + +try: + x = could_raise_returns_str() +except: + x = could_raise_returns_str() + +reveal_type(x) # revealed: str +``` + +## A non-bare `except` + +For simple `try`/`except` blocks, an `except TypeError:` handler has the same +control flow semantics as an `except:` handler. An `except TypeError:` handler +will not catch *all* exceptions: if this is the only handler, it opens up the +possibility that an exception might occur that would not be handled. However, +as described in [the document on exception-handling semantics][1], that would +lead to termination of the scope. It's therefore irrelevant to consider this +possibility when it comes to control-flow analysis. + +```py +def could_raise_returns_str() -> str: + return 'foo' + +x = 1 + +try: + reveal_type(x) # revealed: Literal[1] + x = could_raise_returns_str() + reveal_type(x) # revealed: str +except TypeError: + reveal_type(x) # revealed: Literal[1] | str + x = 2 + reveal_type(x) # revealed: Literal[2] + +reveal_type(x) # revealed: str | Literal[2] +``` + +## Multiple `except` branches + +If the scope reaches the final `reveal_type` call in this example, +either the `try`-block suite of statements was executed in its entirety, +or exactly one `except` suite was executed in its entirety. +The inferred type of `x` at this point is the union of the types at the end of +the three suites: + +- At the end of `try`, `type(x) == str` +- At the end of `except TypeError`, `x == 2` +- At the end of `except ValueError`, `x == 3` + +```py +def could_raise_returns_str() -> str: + return 'foo' + +x = 1 + +try: + reveal_type(x) # revealed: Literal[1] + x = could_raise_returns_str() + reveal_type(x) # revealed: str +except TypeError: + reveal_type(x) # revealed: Literal[1] | str + x = 2 + reveal_type(x) # revealed: Literal[2] +except ValueError: + reveal_type(x) # revealed: Literal[1] | str + x = 3 + reveal_type(x) # revealed: Literal[3] + +reveal_type(x) # revealed: str | Literal[2, 3] +``` + +## Exception handlers with `else` branches (but no `finally`) + +If we reach the `reveal_type` call at the end of this scope, +either the `try` and `else` suites were both executed in their entireties, +or the `except` suite was executed in its entirety. The type of `x` at this +point is the union of the type at the end of the `else` suite and the type at +the end of the `except` suite: + +- At the end of `else`, `x == 3` +- At the end of `except`, `x == 2` + +```py path=single_except.py +def could_raise_returns_str() -> str: + return 'foo' + +x = 1 + +try: + reveal_type(x) # revealed: Literal[1] + x = could_raise_returns_str() + reveal_type(x) # revealed: str +except TypeError: + reveal_type(x) # revealed: Literal[1] | str + x = 2 + reveal_type(x) # revealed: Literal[2] +else: + reveal_type(x) # revealed: str + x = 3 + reveal_type(x) # revealed: Literal[3] + +reveal_type(x) # revealed: Literal[2, 3] +``` + +For a block that has multiple `except` branches and an `else` branch, the same +principle applies. In order to reach the final `reveal_type` call, +either exactly one of the `except` suites must have been executed in its +entirety, or the `try` suite and the `else` suite must both have been executed +in their entireties: + +```py +def could_raise_returns_str() -> str: + return 'foo' + +x = 1 + +try: + reveal_type(x) # revealed: Literal[1] + x = could_raise_returns_str() + reveal_type(x) # revealed: str +except TypeError: + reveal_type(x) # revealed: Literal[1] | str + x = 2 + reveal_type(x) # revealed: Literal[2] +except ValueError: + reveal_type(x) # revealed: Literal[1] | str + x = 3 + reveal_type(x) # revealed: Literal[3] +else: + reveal_type(x) # revealed: str + x = 4 + reveal_type(x) # revealed: Literal[4] + +reveal_type(x) # revealed: Literal[2, 3, 4] +``` + +## Exception handlers with `finally` branches (but no `except` branches) + +A `finally` suite is *always* executed. As such, if we reach the `reveal_type` +call at the end of this example, we know that `x` *must* have been reassigned +to `2` during the `finally` suite. The type of `x` at the end of the example is +therefore `Literal[2]`: + +```py path=redef_in_finally.py +def could_raise_returns_str() -> str: + return 'foo' + +x = 1 + +try: + reveal_type(x) # revealed: Literal[1] + x = could_raise_returns_str() + reveal_type(x) # revealed: str +finally: + x = 2 + reveal_type(x) # revealed: Literal[2] + +reveal_type(x) # revealed: Literal[2] +``` + +If `x` was *not* redefined in the `finally` suite, however, things are somewhat +more complicated. If we reach the final `reveal_type` call, +unlike the state when we're visiting the `finally` suite, +we know that the `try`-block suite ran to completion. +This means that there are fewer possible states at this point than there were +when we were inside the `finally` block. + +(Our current model does *not* correctly infer the types *inside* `finally` +suites, however; this is still a TODO item for us.) + +```py path=no_redef_in_finally.py +def could_raise_returns_str() -> str: + return 'foo' + +x = 1 + +try: + reveal_type(x) # revealed: Literal[1] + x = could_raise_returns_str() + reveal_type(x) # revealed: str +finally: + # TODO: should be Literal[1] | str + reveal_type(x) # revealed: str + +reveal_type(x) # revealed: str +``` + +## Combining an `except` branch with a `finally` branch + +As previously stated, we do not yet have accurate inference for types *inside* +`finally` suites. When we do, however, we will have to take account of the +following possibilities inside `finally` suites: + +- The `try` suite could have run to completion +- Or we could have jumped from halfway through the `try` suite to an `except` + suite, and the `except` suite ran to completion +- Or we could have jumped from halfway through the `try` suite straight to the + `finally` suite due to an unhandled exception +- Or we could have jumped from halfway through the `try` suite to an + `except` suite, only for an exception raised in the `except` suite to cause + us to jump to the `finally` suite before the `except` suite ran to completion + +```py path=redef_in_finally.py +def could_raise_returns_str() -> str: + return 'foo' + +def could_raise_returns_bytes() -> bytes: + return b'foo' + +def could_raise_returns_bool() -> bool: + return True + +x = 1 + +try: + reveal_type(x) # revealed: Literal[1] + x = could_raise_returns_str() + reveal_type(x) # revealed: str +except TypeError: + reveal_type(x) # revealed: Literal[1] | str + x = could_raise_returns_bytes() + reveal_type(x) # revealed: bytes + x = could_raise_returns_bool() + reveal_type(x) # revealed: bool +finally: + # TODO: should be `Literal[1] | str | bytes | bool` + reveal_type(x) # revealed: str | bool + x = 2 + reveal_type(x) # revealed: Literal[2] + +reveal_type(x) # revealed: Literal[2] +``` + +Now for an example without a redefinition in the `finally` suite. +As before, there *should* be fewer possibilities after completion of the +`finally` suite than there were during the `finally` suite itself. +(In some control-flow possibilities, some exceptions were merely *suspended* +during the `finally` suite; these lead to the scope's termination following the +conclusion of the `finally` suite.) + +```py path=no_redef_in_finally.py +def could_raise_returns_str() -> str: + return 'foo' + +def could_raise_returns_bytes() -> bytes: + return b'foo' + +def could_raise_returns_bool() -> bool: + return True + +x = 1 + +try: + reveal_type(x) # revealed: Literal[1] + x = could_raise_returns_str() + reveal_type(x) # revealed: str +except TypeError: + reveal_type(x) # revealed: Literal[1] | str + x = could_raise_returns_bytes() + reveal_type(x) # revealed: bytes + x = could_raise_returns_bool() + reveal_type(x) # revealed: bool +finally: + # TODO: should be `Literal[1] | str | bytes | bool` + reveal_type(x) # revealed: str | bool + +reveal_type(x) # revealed: str | bool +``` + +An example with multiple `except` branches and a `finally` branch: + +```py path=multiple_except_branches.py +def could_raise_returns_str() -> str: + return 'foo' + +def could_raise_returns_bytes() -> bytes: + return b'foo' + +def could_raise_returns_bool() -> bool: + return True + +def could_raise_returns_memoryview() -> memoryview: + return memoryview(b"") + +def could_raise_returns_float() -> float: + return 3.14 + +x = 1 + +try: + reveal_type(x) # revealed: Literal[1] + x = could_raise_returns_str() + reveal_type(x) # revealed: str +except TypeError: + reveal_type(x) # revealed: Literal[1] | str + x = could_raise_returns_bytes() + reveal_type(x) # revealed: bytes + x = could_raise_returns_bool() + reveal_type(x) # revealed: bool +except ValueError: + reveal_type(x) # revealed: Literal[1] | str + x = could_raise_returns_memoryview() + reveal_type(x) # revealed: memoryview + x = could_raise_returns_float() + reveal_type(x) # revealed: float +finally: + # TODO: should be `Literal[1] | str | bytes | bool | memoryview | float` + reveal_type(x) # revealed: str | bool | float + +reveal_type(x) # revealed: str | bool | float +``` + +## Combining `except`, `else` and `finally` branches + +If the exception handler has an `else` branch, we must also take into account +the possibility that control flow could have jumped to the `finally` suite from +partway through the `else` suite due to an exception raised *there*. + +```py path=single_except_branch.py +def could_raise_returns_str() -> str: + return 'foo' + +def could_raise_returns_bytes() -> bytes: + return b'foo' + +def could_raise_returns_bool() -> bool: + return True + +def could_raise_returns_memoryview() -> memoryview: + return memoryview(b"") + +def could_raise_returns_float() -> float: + return 3.14 + +x = 1 + +try: + reveal_type(x) # revealed: Literal[1] + x = could_raise_returns_str() + reveal_type(x) # revealed: str +except TypeError: + reveal_type(x) # revealed: Literal[1] | str + x = could_raise_returns_bytes() + reveal_type(x) # revealed: bytes + x = could_raise_returns_bool() + reveal_type(x) # revealed: bool +else: + reveal_type(x) # revealed: str + x = could_raise_returns_memoryview() + reveal_type(x) # revealed: memoryview + x = could_raise_returns_float() + reveal_type(x) # revealed: float +finally: + # TODO: should be `Literal[1] | str | bytes | bool | memoryview | float` + reveal_type(x) # revealed: bool | float + +reveal_type(x) # revealed: bool | float +``` + +The same again, this time with multiple `except` branches: + +```py path=multiple_except_branches.py +def could_raise_returns_str() -> str: + return 'foo' + +def could_raise_returns_bytes() -> bytes: + return b'foo' + +def could_raise_returns_bool() -> bool: + return True + +def could_raise_returns_memoryview() -> memoryview: + return memoryview(b"") + +def could_raise_returns_float() -> float: + return 3.14 + +def could_raise_returns_range() -> range: + return range(42) + +def could_raise_returns_slice() -> slice: + return slice(None) + +x = 1 + +try: + reveal_type(x) # revealed: Literal[1] + x = could_raise_returns_str() + reveal_type(x) # revealed: str +except TypeError: + reveal_type(x) # revealed: Literal[1] | str + x = could_raise_returns_bytes() + reveal_type(x) # revealed: bytes + x = could_raise_returns_bool() + reveal_type(x) # revealed: bool +except ValueError: + reveal_type(x) # revealed: Literal[1] | str + x = could_raise_returns_memoryview() + reveal_type(x) # revealed: memoryview + x = could_raise_returns_float() + reveal_type(x) # revealed: float +else: + reveal_type(x) # revealed: str + x = could_raise_returns_range() + reveal_type(x) # revealed: range + x = could_raise_returns_slice() + reveal_type(x) # revealed: slice +finally: + # TODO: should be `Literal[1] | str | bytes | bool | memoryview | float | range | slice` + reveal_type(x) # revealed: bool | float | slice + +reveal_type(x) # revealed: bool | float | slice +``` + +## Nested `try`/`except` blocks + +It would take advanced analysis, which we are not yet capable of, to be able +to determine that an exception handler always suppresses all exceptions. This +is partly because it is possible for statements in `except`, `else` and +`finally` suites to raise exceptions as well as statements in `try` suites. +This means that if an exception handler is nested inside the `try` statement of +an enclosing exception handler, it should (at least for now) be treated the +same as any other node: as a suite containing statements that could possibly +raise exceptions, which would lead to control flow jumping out of that suite +prior to the suite running to completion. + +```py +def could_raise_returns_str() -> str: + return 'foo' + +def could_raise_returns_bytes() -> bytes: + return b'foo' + +def could_raise_returns_bool() -> bool: + return True + +def could_raise_returns_memoryview() -> memoryview: + return memoryview(b"") + +def could_raise_returns_float() -> float: + return 3.14 + +def could_raise_returns_range() -> range: + return range(42) + +def could_raise_returns_slice() -> slice: + return slice(None) + +def could_raise_returns_complex() -> complex: + return 3j + +def could_raise_returns_bytearray() -> bytearray: + return bytearray() + +class Foo: ... +class Bar: ... + +def could_raise_returns_Foo() -> Foo: + return Foo() + +def could_raise_returns_Bar() -> Bar: + return Bar() + +x = 1 + +try: + try: + reveal_type(x) # revealed: Literal[1] + x = could_raise_returns_str() + reveal_type(x) # revealed: str + except TypeError: + reveal_type(x) # revealed: Literal[1] | str + x = could_raise_returns_bytes() + reveal_type(x) # revealed: bytes + x = could_raise_returns_bool() + reveal_type(x) # revealed: bool + except ValueError: + reveal_type(x) # revealed: Literal[1] | str + x = could_raise_returns_memoryview() + reveal_type(x) # revealed: memoryview + x = could_raise_returns_float() + reveal_type(x) # revealed: float + else: + reveal_type(x) # revealed: str + x = could_raise_returns_range() + reveal_type(x) # revealed: range + x = could_raise_returns_slice() + reveal_type(x) # revealed: slice + finally: + # TODO: should be `Literal[1] | str | bytes | bool | memoryview | float | range | slice` + reveal_type(x) # revealed: bool | float | slice + x = 2 + reveal_type(x) # revealed: Literal[2] + reveal_type(x) # revealed: Literal[2] +except: + reveal_type(x) # revealed: Literal[1, 2] | str | bytes | bool | memoryview | float | range | slice + x = could_raise_returns_complex() + reveal_type(x) # revealed: complex + x = could_raise_returns_bytearray() + reveal_type(x) # revealed: bytearray +else: + reveal_type(x) # revealed: Literal[2] + x = could_raise_returns_Foo() + reveal_type(x) # revealed: Foo + x = could_raise_returns_Bar() + reveal_type(x) # revealed: Bar +finally: + # TODO: should be `Literal[1, 2] | str | bytes | bool | memoryview | float | range | slice | complex | bytearray | Foo | Bar` + reveal_type(x) # revealed: bytearray | Bar + +# Either one `except` branch or the `else` +# must have been taken and completed to get here: +reveal_type(x) # revealed: bytearray | Bar +``` + +## Nested scopes inside `try` blocks + +Shadowing a variable in an inner scope has no effect on type inference of the +variable by that name in the outer scope: + +```py +def could_raise_returns_str() -> str: + return 'foo' + +def could_raise_returns_bytes() -> bytes: + return b'foo' + +def could_raise_returns_range() -> range: + return range(42) + +def could_raise_returns_bytearray() -> bytearray: + return bytearray() + +def could_raise_returns_float() -> float: + return 3.14 + +x = 1 + +try: + def foo(param=could_raise_returns_str()): + x = could_raise_returns_str() + + try: + reveal_type(x) # revealed: str + x = could_raise_returns_bytes() + reveal_type(x) # revealed: bytes + except: + reveal_type(x) # revealed: str | bytes + x = could_raise_returns_bytearray() + reveal_type(x) # revealed: bytearray + x = could_raise_returns_float() + reveal_type(x) # revealed: float + finally: + # TODO: should be `str | bytes | bytearray | float` + reveal_type(x) # revealed: bytes | float + reveal_type(x) # revealed: bytes | float + + x = foo + reveal_type(x) # revealed: Literal[foo] +except: + reveal_type(x) # revealed: Literal[1] | Literal[foo] + + class Bar: + x = could_raise_returns_range() + reveal_type(x) # revealed: range + + x = Bar + reveal_type(x) # revealed: Literal[Bar] +finally: + # TODO: should be `Literal[1] | Literal[foo] | Literal[Bar]` + reveal_type(x) # revealed: Literal[foo] | Literal[Bar] + +reveal_type(x) # revealed: Literal[foo] | Literal[Bar] +``` + +[1]: https://astral-sh.notion.site/Exception-handler-control-flow-11348797e1ca80bb8ce1e9aedbbe439d diff --git a/crates/red_knot_python_semantic/src/semantic_index/builder.rs b/crates/red_knot_python_semantic/src/semantic_index/builder.rs index 82a865248b..61e810ffb3 100644 --- a/crates/red_knot_python_semantic/src/semantic_index/builder.rs +++ b/crates/red_knot_python_semantic/src/semantic_index/builder.rs @@ -1,5 +1,6 @@ use std::sync::Arc; +use except_handlers::TryNodeContextStackManager; use rustc_hash::FxHashMap; use ruff_db::files::File; @@ -32,6 +33,8 @@ use super::definition::{ MatchPatternDefinitionNodeRef, WithItemDefinitionNodeRef, }; +mod except_handlers; + pub(super) struct SemanticIndexBuilder<'db> { // Builder state db: &'db dyn Db, @@ -45,6 +48,8 @@ pub(super) struct SemanticIndexBuilder<'db> { current_match_case: Option>, /// Flow states at each `break` in the current loop. loop_break_states: Vec, + /// Per-scope contexts regarding nested `try`/`except` statements + try_node_context_stack_manager: TryNodeContextStackManager, /// Flags about the file's global scope has_future_annotations: bool, @@ -71,6 +76,7 @@ impl<'db> SemanticIndexBuilder<'db> { current_assignments: vec![], current_match_case: None, loop_break_states: vec![], + try_node_context_stack_manager: TryNodeContextStackManager::default(), has_future_annotations: false, @@ -111,6 +117,7 @@ impl<'db> SemanticIndexBuilder<'db> { kind: node.scope_kind(), descendents: children_start..children_start, }; + self.try_node_context_stack_manager.enter_nested_scope(); let file_scope_id = self.scopes.push(scope); self.symbol_tables.push(SymbolTableBuilder::new()); @@ -140,6 +147,7 @@ impl<'db> SemanticIndexBuilder<'db> { let children_end = self.scopes.next_index(); let scope = &mut self.scopes[id]; scope.descendents = scope.descendents.start..children_end; + self.try_node_context_stack_manager.exit_scope(); id } @@ -228,6 +236,10 @@ impl<'db> SemanticIndexBuilder<'db> { DefinitionCategory::Binding => use_def.record_binding(symbol, definition), } + let mut try_node_stack_manager = std::mem::take(&mut self.try_node_context_stack_manager); + try_node_stack_manager.record_definition(self); + self.try_node_context_stack_manager = try_node_stack_manager; + definition } @@ -781,40 +793,104 @@ where is_star, range: _, }) => { + // Save the state prior to visiting any of the `try` block. + // + // Potentially none of the `try` block could have been executed prior to executing + // the `except` block(s) and/or the `finally` block. + // We will merge this state with all of the intermediate + // states during the `try` block before visiting those suites. + let pre_try_block_state = self.flow_snapshot(); + + self.try_node_context_stack_manager.push_context(); + + // Visit the `try` block! self.visit_body(body); - for except_handler in handlers { - let ast::ExceptHandler::ExceptHandler(except_handler) = except_handler; - let ast::ExceptHandlerExceptHandler { - name: symbol_name, - type_: handled_exceptions, - body: handler_body, - range: _, - } = except_handler; + let mut post_except_states = vec![]; - if let Some(handled_exceptions) = handled_exceptions { - self.visit_expr(handled_exceptions); + // Take a record also of all the intermediate states we encountered + // while visiting the `try` block + let try_block_snapshots = self.try_node_context_stack_manager.pop_context(); + + if !handlers.is_empty() { + // Save the state immediately *after* visiting the `try` block + // but *before* we prepare for visiting the `except` block(s). + // + // We will revert to this state prior to visiting the the `else` block, + // as there necessarily must have been 0 `except` blocks executed + // if we hit the `else` block. + let post_try_block_state = self.flow_snapshot(); + + // Prepare for visiting the `except` block(s) + self.flow_restore(pre_try_block_state); + for state in try_block_snapshots { + self.flow_merge(state); } - // If `handled_exceptions` above was `None`, it's something like `except as e:`, - // which is invalid syntax. However, it's still pretty obvious here that the user - // *wanted* `e` to be bound, so we should still create a definition here nonetheless. - if let Some(symbol_name) = symbol_name { - let symbol = self.add_symbol(symbol_name.id.clone()); + let pre_except_state = self.flow_snapshot(); + let num_handlers = handlers.len(); - self.add_definition( - symbol, - DefinitionNodeRef::ExceptHandler(ExceptHandlerDefinitionNodeRef { - handler: except_handler, - is_star: *is_star, - }), - ); + for (i, except_handler) in handlers.iter().enumerate() { + let ast::ExceptHandler::ExceptHandler(except_handler) = except_handler; + let ast::ExceptHandlerExceptHandler { + name: symbol_name, + type_: handled_exceptions, + body: handler_body, + range: _, + } = except_handler; + + if let Some(handled_exceptions) = handled_exceptions { + self.visit_expr(handled_exceptions); + } + + // If `handled_exceptions` above was `None`, it's something like `except as e:`, + // which is invalid syntax. However, it's still pretty obvious here that the user + // *wanted* `e` to be bound, so we should still create a definition here nonetheless. + if let Some(symbol_name) = symbol_name { + let symbol = self.add_symbol(symbol_name.id.clone()); + + self.add_definition( + symbol, + DefinitionNodeRef::ExceptHandler(ExceptHandlerDefinitionNodeRef { + handler: except_handler, + is_star: *is_star, + }), + ); + } + + self.visit_body(handler_body); + // Each `except` block is mutually exclusive with all other `except` blocks. + post_except_states.push(self.flow_snapshot()); + + // It's unnecessary to do the `self.flow_restore()` call for the final except handler, + // as we'll immediately call `self.flow_restore()` to a different state + // as soon as this loop over the handlers terminates. + if i < (num_handlers - 1) { + self.flow_restore(pre_except_state.clone()); + } } - self.visit_body(handler_body); + // If we get to the `else` block, we know that 0 of the `except` blocks can have been executed, + // and the entire `try` block must have been executed: + self.flow_restore(post_try_block_state); } self.visit_body(orelse); + + for post_except_state in post_except_states { + self.flow_merge(post_except_state); + } + + // TODO: there's lots of complexity here that isn't yet handled by our model. + // In order to accurately model the semantics of `finally` suites, we in fact need to visit + // the suite twice: once under the (current) assumption that either the `try + else` suite + // ran to completion or exactly one `except` branch ran to completion, and then again under + // the assumption that potentially none of the branches ran to completion and we in fact + // jumped from a `try`, `else` or `except` branch straight into the `finally` branch. + // This requires rethinking some fundamental assumptions semantic indexing makes. + // For more details, see: + // - https://astral-sh.notion.site/Exception-handler-control-flow-11348797e1ca80bb8ce1e9aedbbe439d + // - https://github.com/astral-sh/ruff/pull/13633#discussion_r1788626702 self.visit_body(finalbody); } _ => { diff --git a/crates/red_knot_python_semantic/src/semantic_index/builder/except_handlers.rs b/crates/red_knot_python_semantic/src/semantic_index/builder/except_handlers.rs new file mode 100644 index 0000000000..6438d1996f --- /dev/null +++ b/crates/red_knot_python_semantic/src/semantic_index/builder/except_handlers.rs @@ -0,0 +1,102 @@ +use crate::semantic_index::use_def::FlowSnapshot; + +use super::SemanticIndexBuilder; + +/// An abstraction over the fact that each scope should have its own [`TryNodeContextStack`] +#[derive(Debug, Default)] +pub(super) struct TryNodeContextStackManager(Vec); + +impl TryNodeContextStackManager { + /// Push a new [`TryNodeContextStack`] onto the stack of stacks. + /// + /// Each [`TryNodeContextStack`] is only valid for a single scope + pub(super) fn enter_nested_scope(&mut self) { + self.0.push(TryNodeContextStack::default()); + } + + /// Pop a new [`TryNodeContextStack`] off the stack of stacks. + /// + /// Each [`TryNodeContextStack`] is only valid for a single scope + pub(super) fn exit_scope(&mut self) { + let popped_context = self.0.pop(); + debug_assert!( + popped_context.is_some(), + "exit_scope() should never be called on an empty stack \ +(this indicates an unbalanced `enter_nested_scope()`/`exit_scope()` pair of calls)" + ); + } + + /// Push a [`TryNodeContext`] onto the [`TryNodeContextStack`] + /// at the top of our stack of stacks + pub(super) fn push_context(&mut self) { + self.current_try_context_stack().push_context(); + } + + /// Pop a [`TryNodeContext`] off the [`TryNodeContextStack`] + /// at the top of our stack of stacks. Return the Vec of [`FlowSnapshot`]s + /// recorded while we were visiting the `try` suite. + pub(super) fn pop_context(&mut self) -> Vec { + self.current_try_context_stack().pop_context() + } + + /// Retrieve the stack that is at the top of our stack of stacks. + /// For each `try` block on that stack, push the snapshot onto the `try` block + pub(super) fn record_definition(&mut self, builder: &SemanticIndexBuilder) { + self.current_try_context_stack().record_definition(builder); + } + + /// Retrieve the [`TryNodeContextStack`] that is relevant for the current scope. + fn current_try_context_stack(&mut self) -> &mut TryNodeContextStack { + self.0 + .last_mut() + .expect("There should always be at least one `TryBlockContexts` on the stack") + } +} + +/// The contexts of nested `try`/`except` blocks for a single scope +#[derive(Debug, Default)] +struct TryNodeContextStack(Vec); + +impl TryNodeContextStack { + /// Push a new [`TryNodeContext`] for recording intermediate states + /// while visiting a [`ruff_python_ast::StmtTry`] node that has a `finally` branch. + fn push_context(&mut self) { + self.0.push(TryNodeContext::default()); + } + + /// Pop a [`TryNodeContext`] off the stack. Return the Vec of [`FlowSnapshot`]s + /// recorded while we were visiting the `try` suite. + fn pop_context(&mut self) -> Vec { + let TryNodeContext { + try_suite_snapshots, + } = self + .0 + .pop() + .expect("Cannot pop a `try` block off an empty `TryBlockContexts` stack"); + try_suite_snapshots + } + + /// For each `try` block on the stack, push the snapshot onto the `try` block + fn record_definition(&mut self, builder: &SemanticIndexBuilder) { + for context in &mut self.0 { + context.record_definition(builder.flow_snapshot()); + } + } +} + +/// Context for tracking definitions over the course of a single +/// [`ruff_python_ast::StmtTry`] node +/// +/// It will likely be necessary to add more fields to this struct in the future +/// when we add more advanced handling of `finally` branches. +#[derive(Debug, Default)] +struct TryNodeContext { + try_suite_snapshots: Vec, +} + +impl TryNodeContext { + /// Take a record of what the internal state looked like after a definition + fn record_definition(&mut self, snapshot: FlowSnapshot) { + self.try_suite_snapshots.push(snapshot); + } +} diff --git a/crates/ruff_benchmark/benches/red_knot.rs b/crates/ruff_benchmark/benches/red_knot.rs index eb8b186646..1a002f190b 100644 --- a/crates/ruff_benchmark/benches/red_knot.rs +++ b/crates/ruff_benchmark/benches/red_knot.rs @@ -22,10 +22,15 @@ struct Case { const TOMLLIB_312_URL: &str = "https://raw.githubusercontent.com/python/cpython/8e8a4baf652f6e1cee7acde9d78c4b6154539748/Lib/tomllib"; -// The failed import from 'collections.abc' is due to lack of support for 'import *'. static EXPECTED_DIAGNOSTICS: &[&str] = &[ + // We don't support `ModuleType`-attributes as globals yet: "/src/tomllib/__init__.py:10:30: Name `__name__` used when not defined", + // We don't support `*` imports yet: "/src/tomllib/_parser.py:7:29: Module `collections.abc` has no member `Iterable`", + // We don't support terminal statements in control flow yet: + "/src/tomllib/_parser.py:353:5: Method `__getitem__` of type `Unbound | @Todo` is not callable on object of type `Unbound | @Todo`", + "/src/tomllib/_parser.py:455:9: Method `__getitem__` of type `Unbound | @Todo` is not callable on object of type `Unbound | @Todo`", + // True positives! "Line 69 is too long (89 characters)", "Use double quotes for strings", "Use double quotes for strings", @@ -34,6 +39,25 @@ static EXPECTED_DIAGNOSTICS: &[&str] = &[ "Use double quotes for strings", "Use double quotes for strings", "Use double quotes for strings", + // We don't support terminal statements in control flow yet: + "/src/tomllib/_parser.py:66:18: Name `s` used when possibly not defined", + "/src/tomllib/_parser.py:98:12: Name `char` used when possibly not defined", + "/src/tomllib/_parser.py:101:12: Name `char` used when possibly not defined", + "/src/tomllib/_parser.py:104:14: Name `char` used when possibly not defined", + "/src/tomllib/_parser.py:104:14: Name `char` used when possibly not defined", + "/src/tomllib/_parser.py:115:14: Name `char` used when possibly not defined", + "/src/tomllib/_parser.py:115:14: Name `char` used when possibly not defined", + "/src/tomllib/_parser.py:126:12: Name `char` used when possibly not defined", + "/src/tomllib/_parser.py:348:20: Name `nest` used when possibly not defined", + "/src/tomllib/_parser.py:353:5: Name `nest` used when possibly not defined", + "/src/tomllib/_parser.py:453:24: Name `nest` used when possibly not defined", + "/src/tomllib/_parser.py:455:9: Name `nest` used when possibly not defined", + "/src/tomllib/_parser.py:482:16: Name `char` used when possibly not defined", + "/src/tomllib/_parser.py:566:12: Name `char` used when possibly not defined", + "/src/tomllib/_parser.py:573:12: Name `char` used when possibly not defined", + "/src/tomllib/_parser.py:579:12: Name `char` used when possibly not defined", + "/src/tomllib/_parser.py:580:63: Name `char` used when possibly not defined", + "/src/tomllib/_parser.py:629:38: Name `datetime_obj` used when possibly not defined" ]; fn get_test_file(name: &str) -> TestFile {