lint on the global keyword if there's no explicit definition in the global scope

This commit is contained in:
Jack O'Connor
2025-07-14 15:44:02 -07:00
parent a1edb69ea5
commit e73a8ba571
8 changed files with 280 additions and 78 deletions

View File

@@ -343,6 +343,8 @@ def _():
## Load before `global` declaration
```py
x: int
def f():
x = 1
global x # error: [invalid-syntax] "name `x` is used prior to global declaration"

View File

@@ -1295,13 +1295,16 @@ reveal_type(Nope) # revealed: Unknown
## `global` statements in non-global scopes
A `global` statement in a nested function scope, combined with a definition in the same function
scope of the name that was declared `global`, can add a symbol to the global namespace.
Python allows `global` statements in function bodies to add new variables to the global scope, but
we require a matching global binding or declaration. We lint on unresolved `global` statements, and
we don't include the symbols they might define in `*` imports:
`a.py`:
```py
def f():
# error: [unresolved-global] "Invalid global declaration of `g`: `g` has no declarations or bindings in the global scope"
# error: [unresolved-global] "Invalid global declaration of `h`: `h` has no declarations or bindings in the global scope"
global g, h
g = True
@@ -1316,16 +1319,12 @@ from a import *
reveal_type(f) # revealed: def f() -> Unknown
# TODO: we're undecided about whether we should consider this a false positive or not.
# Mutating the global scope to add a symbol from an inner scope will not *necessarily* result
# in the symbol being bound from the perspective of other modules (the function that creates
# the inner scope, and adds the symbol to the global scope, might never be called!)
# See discussion in https://github.com/astral-sh/ruff/pull/16959
#
# This could be considered a false positive, since this use of `g` isn't a runtime error, but we're
# being conservative.
# error: [unresolved-reference]
reveal_type(g) # revealed: Unknown
# this diagnostic is accurate, though!
# However, this is a true positive: `h` is unbound at runtime.
# error: [unresolved-reference]
reveal_type(h) # revealed: Unknown
```

View File

@@ -103,6 +103,9 @@ x = 2
Using a name prior to its `global` declaration in the same scope is a syntax error.
```py
x = 1
y = 2
def f():
print(x)
global x # error: [invalid-syntax] "name `x` is used prior to global declaration"
@@ -224,3 +227,25 @@ def f():
# TODO: reveal_type(x) # revealed: Unknown | Literal["1"]
```
## Global variables need an explicit definition in the global scope
You're allowed to use the `global` keyword to define new global variables that don't have any
explicit definition in the global scope, but we consider that fishy and prefer to lint on it:
```py
x = 1
y: int
# z is neither bound nor declared in the global scope
def f():
global x, y, z # error: [unresolved-global] "Invalid global declaration of `z`: `z` has no declarations or bindings in the global scope"
```
You don't need a definition for implicit globals, but you do for built-ins:
```py
def f():
global __file__ # allowed, implicit global
global int # error: [unresolved-global] "Invalid global declaration of `int`: `int` has no declarations or bindings in the global scope"
```

View File

@@ -219,8 +219,10 @@ def f1():
But a `global` statement breaks the chain:
```py
x = 1
def f():
x = 1
x = 2
def g():
global x
def h():
@@ -240,8 +242,13 @@ def f():
## A complicated mixture of `nonlocal` chaining, empty scopes, class scopes, and the `global` keyword
```py
# Global definitions of `x`, `y`, and `z`.
x: bool = True
y: bool = True
z: bool = True
def f1():
# The original bindings of `x`, `y`, and `z` with type declarations.
# Local definitions of `x`, `y`, and `z`.
x: int = 1
y: int = 2
z: int = 3
@@ -263,7 +270,6 @@ def f1():
x = 4
y = 5
global z
z = 6
def f4():
# This scope sees `x` from `f1` and `y` from `f3`. It *can't* declare `z`
@@ -272,7 +278,6 @@ def f1():
nonlocal x, y, z # error: [invalid-syntax] "no binding for nonlocal `z` found"
x = "string" # error: [invalid-assignment]
y = "string" # allowed, because `f3`'s `y` is untyped
reveal_type(z) # revealed: Unknown | Literal[6]
```
## TODO: `nonlocal` affects the inferred type in the outer scope

View File

@@ -85,6 +85,7 @@ pub(crate) fn register_lints(registry: &mut LintRegistryBuilder) {
registry.register_lint(&STATIC_ASSERT_ERROR);
registry.register_lint(&INVALID_ATTRIBUTE_ACCESS);
registry.register_lint(&REDUNDANT_CAST);
registry.register_lint(&UNRESOLVED_GLOBAL);
// String annotations
registry.register_lint(&BYTE_STRING_TYPE_ANNOTATION);
@@ -1560,6 +1561,56 @@ declare_lint! {
}
}
declare_lint! {
/// ## What it does
/// Detects variables declared as `global` in an inner scope that have no explicit
/// bindings or declarations in the global scope.
///
/// ## Why is this bad?
/// Function bodies with `global` statements can run in any order (or not at all), which makes
/// it hard for static analysis tools to infer the types of globals without
/// explicit definitions or declarations.
///
/// ## Example
/// ```python
/// def f():
/// global x # unresolved global
/// x = 42
///
/// def g():
/// print(x) # unresolved reference
/// ```
///
/// Use instead:
/// ```python
/// x: int
///
/// def f():
/// global x
/// x = 42
///
/// def g():
/// print(x)
/// ```
///
/// Or:
/// ```python
/// x: int | None = None
///
/// def f():
/// global x
/// x = 42
///
/// def g():
/// print(x)
/// ```
pub(crate) static UNRESOLVED_GLOBAL = {
summary: "detects `global` statements with no definition in the global scope",
status: LintStatus::preview("1.0.0"),
default_level: Level::Warn,
}
}
/// A collection of type check diagnostics.
#[derive(Default, Eq, PartialEq, get_size2::GetSize)]
pub struct TypeCheckDiagnostics {

View File

@@ -94,12 +94,13 @@ use crate::types::diagnostic::{
INVALID_DECLARATION, INVALID_GENERIC_CLASS, INVALID_PARAMETER_DEFAULT, INVALID_TYPE_FORM,
INVALID_TYPE_GUARD_CALL, INVALID_TYPE_VARIABLE_CONSTRAINTS, IncompatibleBases,
POSSIBLY_UNBOUND_IMPLICIT_CALL, POSSIBLY_UNBOUND_IMPORT, TypeCheckDiagnostics,
UNDEFINED_REVEAL, UNRESOLVED_ATTRIBUTE, UNRESOLVED_IMPORT, UNRESOLVED_REFERENCE,
UNSUPPORTED_OPERATOR, report_implicit_return_type, report_instance_layout_conflict,
report_invalid_argument_number_to_special_form, report_invalid_arguments_to_annotated,
report_invalid_arguments_to_callable, report_invalid_assignment,
report_invalid_attribute_assignment, report_invalid_generator_function_return_type,
report_invalid_return_type, report_possibly_unbound_attribute,
UNDEFINED_REVEAL, UNRESOLVED_ATTRIBUTE, UNRESOLVED_GLOBAL, UNRESOLVED_IMPORT,
UNRESOLVED_REFERENCE, UNSUPPORTED_OPERATOR, report_implicit_return_type,
report_instance_layout_conflict, report_invalid_argument_number_to_special_form,
report_invalid_arguments_to_annotated, report_invalid_arguments_to_callable,
report_invalid_assignment, report_invalid_attribute_assignment,
report_invalid_generator_function_return_type, report_invalid_return_type,
report_possibly_unbound_attribute,
};
use crate::types::function::{
FunctionDecorators, FunctionLiteral, FunctionType, KnownFunction, OverloadLiteral,
@@ -2255,11 +2256,11 @@ impl<'db, 'ast> TypeInferenceBuilder<'db, 'ast> {
ast::Stmt::Return(ret) => self.infer_return_statement(ret),
ast::Stmt::Delete(delete) => self.infer_delete_statement(delete),
ast::Stmt::Nonlocal(nonlocal) => self.infer_nonlocal_statement(nonlocal),
ast::Stmt::Global(global) => self.infer_global_statement(global),
ast::Stmt::Break(_)
| ast::Stmt::Continue(_)
| ast::Stmt::Pass(_)
| ast::Stmt::IpyEscapeCommand(_)
| ast::Stmt::Global(_) => {
| ast::Stmt::IpyEscapeCommand(_) => {
// No-op
}
}
@@ -4653,6 +4654,61 @@ impl<'db, 'ast> TypeInferenceBuilder<'db, 'ast> {
}
}
fn infer_global_statement(&mut self, global: &ast::StmtGlobal) {
// CPython allows examples like this, where a global variable is never explicitly defined
// in the global scope:
//
// ```py
// def f():
// global x
// x = 1
// def g():
// print(x)
// ```
//
// However, allowing this pattern would make it hard for us to guarantee
// accurate analysis about the types and boundness of global-scope symbols,
// so we require the variable to be explicitly defined (either bound or declared)
// in the global scope.
let ast::StmtGlobal {
node_index: _,
range,
names,
} = global;
let global_place_table = self.index.place_table(FileScopeId::global());
for name in names {
if let Some(place_id) = global_place_table.place_id_by_name(name) {
let place = global_place_table.place_expr(place_id);
if place.is_bound() || place.is_declared() {
// This name is explicitly defined in the global scope (not just in function
// bodies that mark it `global`).
continue;
}
}
if !module_type_implicit_global_symbol(self.db(), name)
.place
.is_unbound()
{
// This name is an implicit global like `__file__` (but not a built-in like `int`).
continue;
}
// This variable isn't explicitly defined in the global scope, nor is it an
// implicit global from `types.ModuleType`, so we consider this `global` statement invalid.
let Some(builder) = self.context.report_lint(&UNRESOLVED_GLOBAL, range) else {
return;
};
let mut diag =
builder.into_diagnostic(format_args!("Invalid global declaration of `{name}`"));
diag.set_primary_message(format_args!(
"`{name}` has no declarations or bindings in the global scope"
));
diag.info("This limits ty's ability to make accurate inferences about the boundness and types of global-scope symbols");
diag.info(format_args!(
"Consider adding a declaration to the global scope, e.g. `{name}: int`"
));
}
}
fn infer_nonlocal_statement(&mut self, nonlocal: &ast::StmtNonlocal) {
let ast::StmtNonlocal {
node_index: _,