diff --git a/crates/red_knot_python_semantic/resources/mdtest/expression/len.md b/crates/red_knot_python_semantic/resources/mdtest/expression/len.md new file mode 100644 index 0000000000..04f6efff5f --- /dev/null +++ b/crates/red_knot_python_semantic/resources/mdtest/expression/len.md @@ -0,0 +1,219 @@ +# Length (`len()`) + +## Literal and constructed iterables + +### Strings and bytes literals + +```py +reveal_type(len("no\rmal")) # revealed: Literal[6] +reveal_type(len(r"aw stri\ng")) # revealed: Literal[10] +reveal_type(len(r"conca\t" "ena\tion")) # revealed: Literal[14] +reveal_type(len(b"ytes lite" rb"al")) # revealed: Literal[11] +reveal_type(len("π’°π•ΉπŸ„ΈΒ©πŸ•²π••β„‡")) # revealed: Literal[7] + +reveal_type( # revealed: Literal[7] + len( + """foo +bar""" + ) +) +reveal_type( # revealed: Literal[9] + len( + r"""foo\r +bar""" + ) +) +reveal_type( # revealed: Literal[7] + len( + b"""foo +bar""" + ) +) +reveal_type( # revealed: Literal[9] + len( + rb"""foo\r +bar""" + ) +) +``` + +### Tuples + +```py +reveal_type(len(())) # revealed: Literal[0] +reveal_type(len((1,))) # revealed: Literal[1] +reveal_type(len((1, 2))) # revealed: Literal[2] + +# TODO: Handle constructor calls +reveal_type(len(tuple())) # revealed: int + +# TODO: Handle star unpacks; Should be: Literal[0] +reveal_type(len((*[],))) # revealed: Literal[1] + +# TODO: Handle star unpacks; Should be: Literal[1] +reveal_type( # revealed: Literal[2] + len( + ( + *[], + 1, + ) + ) +) + +# TODO: Handle star unpacks; Should be: Literal[2] +reveal_type(len((*[], 1, 2))) # revealed: Literal[3] + +# TODO: Handle star unpacks; Should be: Literal[0] +reveal_type(len((*[], *{}))) # revealed: Literal[2] +``` + +### Lists, sets and dictionaries + +```py +reveal_type(len([])) # revealed: int +reveal_type(len([1])) # revealed: int +reveal_type(len([1, 2])) # revealed: int +reveal_type(len([*{}, *dict()])) # revealed: int + +reveal_type(len({})) # revealed: int +reveal_type(len({**{}})) # revealed: int +reveal_type(len({**{}, **{}})) # revealed: int + +reveal_type(len({1})) # revealed: int +reveal_type(len({1, 2})) # revealed: int +reveal_type(len({*[], 2})) # revealed: int + +reveal_type(len(list())) # revealed: int +reveal_type(len(set())) # revealed: int +reveal_type(len(dict())) # revealed: int +reveal_type(len(frozenset())) # revealed: int +``` + +## `__len__` + +The returned value of `__len__` is implicitly and recursively converted to `int`. + +### Literal integers + +```py +from typing import Literal + +class Zero: + def __len__(self) -> Literal[0]: ... + +class ZeroOrOne: + def __len__(self) -> Literal[0, 1]: ... + +class ZeroOrTrue: + def __len__(self) -> Literal[0, True]: ... + +class OneOrFalse: + def __len__(self) -> Literal[1] | Literal[False]: ... + +class OneOrFoo: + def __len__(self) -> Literal[1, "foo"]: ... + +class ZeroOrStr: + def __len__(self) -> Literal[0] | str: ... + +reveal_type(len(Zero())) # revealed: Literal[0] +reveal_type(len(ZeroOrOne())) # revealed: Literal[0, 1] +reveal_type(len(ZeroOrTrue())) # revealed: Literal[0, 1] +reveal_type(len(OneOrFalse())) # revealed: Literal[0, 1] + +# TODO: Emit a diagnostic +reveal_type(len(OneOrFoo())) # revealed: int + +# TODO: Emit a diagnostic +reveal_type(len(ZeroOrStr())) # revealed: int +``` + +### Literal booleans + +```py +from typing import Literal + +class LiteralTrue: + def __len__(self) -> Literal[True]: ... + +class LiteralFalse: + def __len__(self) -> Literal[False]: ... + +reveal_type(len(LiteralTrue())) # revealed: Literal[1] +reveal_type(len(LiteralFalse())) # revealed: Literal[0] +``` + +### Enums + +```py +from enum import Enum, auto +from typing import Literal + +class SomeEnum(Enum): + AUTO = auto() + INT = 2 + STR = "4" + TUPLE = (8, "16") + INT_2 = 3_2 + +class Auto: + def __len__(self) -> Literal[SomeEnum.AUTO]: ... + +class Int: + def __len__(self) -> Literal[SomeEnum.INT]: ... + +class Str: + def __len__(self) -> Literal[SomeEnum.STR]: ... + +class Tuple: + def __len__(self) -> Literal[SomeEnum.TUPLE]: ... + +class IntUnion: + def __len__(self) -> Literal[SomeEnum.INT, SomeEnum.INT_2]: ... + +reveal_type(len(Auto())) # revealed: int +reveal_type(len(Int())) # revealed: Literal[2] +reveal_type(len(Str())) # revealed: int +reveal_type(len(Tuple())) # revealed: int +reveal_type(len(IntUnion())) # revealed: Literal[2, 32] +``` + +### Negative integers + +```py +from typing import Literal + +class Negative: + def __len__(self) -> Literal[-1]: ... + +# TODO: Emit a diagnostic +reveal_type(len(Negative())) # revealed: int +``` + +### Wrong signature + +```py +from typing import Literal + +class SecondOptionalArgument: + def __len__(self, v: int = 0) -> Literal[0]: ... + +class SecondRequiredArgument: + def __len__(self, v: int) -> Literal[1]: ... + +# TODO: Emit a diagnostic +reveal_type(len(SecondOptionalArgument())) # revealed: Literal[0] + +# TODO: Emit a diagnostic +reveal_type(len(SecondRequiredArgument())) # revealed: Literal[1] +``` + +### No `__len__` + +```py +class NoDunderLen: + pass + +# TODO: Emit a diagnostic +reveal_type(len(NoDunderLen())) # revealed: int +``` diff --git a/crates/red_knot_python_semantic/resources/mdtest/unpacking.md b/crates/red_knot_python_semantic/resources/mdtest/unpacking.md index 0e9c3943a9..1da17a4f27 100644 --- a/crates/red_knot_python_semantic/resources/mdtest/unpacking.md +++ b/crates/red_knot_python_semantic/resources/mdtest/unpacking.md @@ -267,3 +267,42 @@ reveal_type(b) # revealed: LiteralString # TODO: Should be list[int] once support for assigning to starred expression is added reveal_type(c) # revealed: @Todo(starred unpacking) ``` + +### Unicode + +```py +# TODO: Add diagnostic (need more values to unpack) +(a, b) = "Γ©" + +reveal_type(a) # revealed: LiteralString +reveal_type(b) # revealed: Unknown +``` + +### Unicode escape (1) + +```py +# TODO: Add diagnostic (need more values to unpack) +(a, b) = "\u9E6C" + +reveal_type(a) # revealed: LiteralString +reveal_type(b) # revealed: Unknown +``` + +### Unicode escape (2) + +```py +# TODO: Add diagnostic (need more values to unpack) +(a, b) = "\U0010FFFF" + +reveal_type(a) # revealed: LiteralString +reveal_type(b) # revealed: Unknown +``` + +### Surrogates + +```py +(a, b) = "\uD800\uDFFF" + +reveal_type(a) # revealed: LiteralString +reveal_type(b) # revealed: LiteralString +``` diff --git a/crates/red_knot_python_semantic/src/types.rs b/crates/red_knot_python_semantic/src/types.rs index 97a79434fc..e1e047db03 100644 --- a/crates/red_knot_python_semantic/src/types.rs +++ b/crates/red_knot_python_semantic/src/types.rs @@ -1417,21 +1417,76 @@ impl<'db> Type<'db> { } } + /// Return the type of `len()` on a type if it is known more precisely than `int`, + /// or `None` otherwise. + /// + /// In the second case, the return type of `len()` in `typeshed` (`int`) + /// is used as a fallback. + fn len(&self, db: &'db dyn Db) -> Option> { + fn non_negative_int_literal<'db>(db: &'db dyn Db, ty: Type<'db>) -> Option> { + match ty { + // TODO: Emit diagnostic for non-integers and negative integers + Type::IntLiteral(value) => (value >= 0).then_some(ty), + Type::BooleanLiteral(value) => Some(Type::IntLiteral(value.into())), + Type::Union(union) => { + let mut builder = UnionBuilder::new(db); + for element in union.elements(db) { + builder = builder.add(non_negative_int_literal(db, *element)?); + } + Some(builder.build()) + } + _ => None, + } + } + + let usize_len = match self { + Type::BytesLiteral(bytes) => Some(bytes.python_len(db)), + Type::StringLiteral(string) => Some(string.python_len(db)), + Type::Tuple(tuple) => Some(tuple.len(db)), + _ => None, + }; + + if let Some(usize_len) = usize_len { + return usize_len.try_into().ok().map(Type::IntLiteral); + } + + let return_ty = match self.call_dunder(db, "__len__", &[*self]) { + // TODO: emit a diagnostic + CallDunderResult::MethodNotAvailable => return None, + + CallDunderResult::CallOutcome(outcome) | CallDunderResult::PossiblyUnbound(outcome) => { + outcome.return_ty(db)? + } + }; + + non_negative_int_literal(db, return_ty) + } + /// Return the outcome of calling an object of this type. #[must_use] fn call(self, db: &'db dyn Db, arg_types: &[Type<'db>]) -> CallOutcome<'db> { match self { // TODO validate typed call arguments vs callable signature - Type::FunctionLiteral(function_type) => { - if function_type.is_known(db, KnownFunction::RevealType) { - CallOutcome::revealed( - function_type.signature(db).return_ty, - *arg_types.first().unwrap_or(&Type::Unknown), - ) - } else { - CallOutcome::callable(function_type.signature(db).return_ty) + Type::FunctionLiteral(function_type) => match function_type.known(db) { + Some(KnownFunction::RevealType) => CallOutcome::revealed( + function_type.signature(db).return_ty, + *arg_types.first().unwrap_or(&Type::Unknown), + ), + + Some(KnownFunction::Len) => { + let normal_return_ty = function_type.signature(db).return_ty; + + let [only_arg] = arg_types else { + // TODO: Emit a diagnostic + return CallOutcome::callable(normal_return_ty); + }; + let len_ty = only_arg.len(db); + + CallOutcome::callable(len_ty.unwrap_or(normal_return_ty)) } - } + + _ => CallOutcome::callable(function_type.signature(db).return_ty), + }, // TODO annotated return type on `__new__` or metaclass `__call__` Type::ClassLiteral(ClassLiteralType { class }) => { @@ -2597,13 +2652,15 @@ pub enum KnownFunction { ConstraintFunction(KnownConstraintFunction), /// `builtins.reveal_type`, `typing.reveal_type` or `typing_extensions.reveal_type` RevealType, + /// `builtins.len` + Len, } impl KnownFunction { pub fn constraint_function(self) -> Option { match self { Self::ConstraintFunction(f) => Some(f), - Self::RevealType => None, + Self::RevealType | Self::Len => None, } } @@ -2620,6 +2677,7 @@ impl KnownFunction { "issubclass" if definition.is_builtin_definition(db) => Some( KnownFunction::ConstraintFunction(KnownConstraintFunction::IsSubclass), ), + "len" if definition.is_builtin_definition(db) => Some(KnownFunction::Len), _ => None, } } @@ -3074,8 +3132,9 @@ pub struct StringLiteralType<'db> { } impl<'db> StringLiteralType<'db> { - pub fn len(&self, db: &'db dyn Db) -> usize { - self.value(db).len() + /// The length of the string, as would be returned by Python's `len()`. + pub fn python_len(&self, db: &'db dyn Db) -> usize { + self.value(db).chars().count() } } @@ -3085,6 +3144,12 @@ pub struct BytesLiteralType<'db> { value: Box<[u8]>, } +impl<'db> BytesLiteralType<'db> { + pub fn python_len(&self, db: &'db dyn Db) -> usize { + self.value(db).len() + } +} + #[salsa::interned] pub struct SliceLiteralType<'db> { start: Option, diff --git a/crates/red_knot_python_semantic/src/types/unpacker.rs b/crates/red_knot_python_semantic/src/types/unpacker.rs index dc96fac252..99d45ecd3e 100644 --- a/crates/red_knot_python_semantic/src/types/unpacker.rs +++ b/crates/red_knot_python_semantic/src/types/unpacker.rs @@ -95,7 +95,8 @@ impl<'db> Unpacker<'db> { // there would be a cost and it's not clear that it's worth it. let value_ty = Type::tuple( self.db, - std::iter::repeat(Type::LiteralString).take(string_literal_ty.len(self.db)), + std::iter::repeat(Type::LiteralString) + .take(string_literal_ty.python_len(self.db)), ); self.unpack(target, value_ty, scope); }