[ty] Improve completion ranking based on origin of symbols

Part of this was already done, but it was half-assed. We now look at the
search path that a symbol came from and centralize a symbol's origin
classification.

The preference ordering here is maybe not the right one, but we can
iterate as users give us feedback. Note also that the preference
ordering based on the origin is pretty low in the relevance sorting.
This means that other more specific criteria will and can override this.

This results in some nice improvements to our evaluation tasks.
This commit is contained in:
Andrew Gallant
2026-01-08 10:41:06 -05:00
committed by Andrew Gallant
parent 2196ef3a33
commit 64117c1146
4 changed files with 157 additions and 53 deletions

View File

@@ -19,7 +19,7 @@ import-keyword-completion,main.py,0,1
internal-typeshed-hidden,main.py,0,2
local-over-auto-import,main.py,0,1
none-completion,main.py,0,1
numpy-array,main.py,0,161
numpy-array,main.py,0,56
numpy-array,main.py,1,1
object-attr-instance-methods,main.py,0,1
object-attr-instance-methods,main.py,1,1
@@ -28,14 +28,14 @@ raise-uses-base-exception,main.py,0,1
scope-existing-over-new-import,main.py,0,1
scope-prioritize-closer,main.py,0,2
scope-simple-long-identifier,main.py,0,1
third-party-over-stdlib,main.py,0,3
third-party-over-stdlib,main.py,0,1
tighter-over-looser-scope,main.py,0,3
tstring-completions,main.py,0,1
ty-extensions-lower-stdlib,main.py,0,1
type-var-typing-over-ast,main.py,0,1
type-var-typing-over-ast,main.py,1,14
type-var-typing-over-ast,main.py,1,10
typing-gets-priority,main.py,0,1
typing-gets-priority,main.py,1,1
typing-gets-priority,main.py,2,1
typing-gets-priority,main.py,3,3
typing-gets-priority,main.py,4,2
typing-gets-priority,main.py,4,1
1 name file index rank
19 internal-typeshed-hidden main.py 0 2
20 local-over-auto-import main.py 0 1
21 none-completion main.py 0 1
22 numpy-array main.py 0 161 56
23 numpy-array main.py 1 1
24 object-attr-instance-methods main.py 0 1
25 object-attr-instance-methods main.py 1 1
28 scope-existing-over-new-import main.py 0 1
29 scope-prioritize-closer main.py 0 2
30 scope-simple-long-identifier main.py 0 1
31 third-party-over-stdlib main.py 0 3 1
32 tighter-over-looser-scope main.py 0 3
33 tstring-completions main.py 0 1
34 ty-extensions-lower-stdlib main.py 0 1
35 type-var-typing-over-ast main.py 0 1
36 type-var-typing-over-ast main.py 1 14 10
37 typing-gets-priority main.py 0 1
38 typing-gets-priority main.py 1 1
39 typing-gets-priority main.py 2 1
40 typing-gets-priority main.py 3 3
41 typing-gets-priority main.py 4 2 1

View File

@@ -1,3 +1,6 @@
# Many of these came from discussion in:
# <https://github.com/astral-sh/ty/issues/1274>
# We should prefer `typing` over `asyncio` here.
class Foo(Protoco<CURSOR: typing.Protocol>): ...

View File

@@ -11,7 +11,7 @@ use ruff_python_ast::{self as ast, AnyNodeRef};
use ruff_python_codegen::Stylist;
use ruff_text_size::{Ranged, TextRange, TextSize};
use rustc_hash::FxHashSet;
use ty_module_resolver::{KnownModule, ModuleName};
use ty_module_resolver::{KnownModule, Module, ModuleName};
use ty_python_semantic::HasType;
use ty_python_semantic::types::UnionType;
use ty_python_semantic::{
@@ -49,32 +49,36 @@ pub fn completion<'db>(
}
ContextKind::NonImport(ref non_import) => {
let model = SemanticModel::new(db, file);
let (semantic_completions, scoped) = match non_import.target {
match non_import.target {
CompletionTargetAst::ObjectDot { expr } => {
(model.attribute_completions(expr), None)
completions.extend(model.attribute_completions(expr));
}
CompletionTargetAst::Scoped(scoped) => {
(model.scoped_completions(scoped.node), Some(scoped))
}
};
completions.extend(semantic_completions);
if scoped.is_some() {
add_keyword_completions(db, &mut completions);
add_argument_completions(db, &model, &context.cursor, &mut completions);
}
if settings.auto_import {
if let Some(scoped) = scoped {
add_unimported_completions(
db,
file,
&parsed,
scoped,
|module_name: &ModuleName, symbol: &str| {
ImportRequest::import_from(module_name.as_str(), symbol)
},
&mut completions,
);
for semantic_completion in model.scoped_completions(scoped.node) {
let module_dependency_kind = if semantic_completion.builtin {
ModuleDependencyKind::Builtin
} else {
ModuleDependencyKind::Current
};
completions.add(
CompletionBuilder::from_semantic_completion(db, semantic_completion)
.module_dependency_kind(module_dependency_kind),
);
}
add_keyword_completions(db, &mut completions);
add_argument_completions(db, &model, &context.cursor, &mut completions);
if settings.auto_import {
add_unimported_completions(
db,
file,
&parsed,
scoped,
|module_name: &ModuleName, symbol: &str| {
ImportRequest::import_from(module_name.as_str(), symbol)
},
&mut completions,
);
}
}
}
}
@@ -309,6 +313,7 @@ struct CompletionBuilder<'db> {
is_context_specific: bool,
is_type_check_only: bool,
documentation: Option<Docstring>,
module_dependency_kind: Option<ModuleDependencyKind>,
}
impl<'db> CompletionBuilder<'db> {
@@ -330,6 +335,7 @@ impl<'db> CompletionBuilder<'db> {
is_context_specific: false,
is_type_check_only: false,
documentation: None,
module_dependency_kind: None,
}
}
@@ -463,6 +469,11 @@ impl<'db> CompletionBuilder<'db> {
self
}
fn module_dependency_kind(mut self, kind: ModuleDependencyKind) -> CompletionBuilder<'db> {
self.module_dependency_kind = Some(kind);
self
}
/// Returns true when this completion refers to the
/// `NotImplemented` builtin.
fn is_notimplemented(&self, db: &dyn Db) -> bool {
@@ -1019,11 +1030,6 @@ struct Relevance {
/// symbols that we know for sure are usable should get ranked
/// above symbols that we're unsure about.
definitively_usable: Sort,
/// This lets one sort completions based on whether they're in the
/// current module or not. e.g., `Sort::Lower` for symbols outside
/// the module and `Sort::Higher` for symbols inside the module
/// that are already in scope.
current_module: Sort,
/// At time of writing (2025-11-11), keyword completions are
/// classified as builtins, which makes them sort after everything
/// else. But we probably want keyword completions to sort *before*
@@ -1033,17 +1039,23 @@ struct Relevance {
/// completion evaluation framework should be more representative
/// of real world conditions.
keyword: Sort,
/// In some instances, a symbol is from a very commonly used module
/// that we want to boost over other symbols.
special_module: Sort,
/// Sorts based on whether the symbol comes from the `builtins`
/// module. i.e., Python's initial basis. We usually sort these
/// lower to give priority to symbols in a tighter scope.
builtin: Sort,
/// Sorts based on the "kind" of a name. i.e., Its export status.
/// We sort normal names the highest. Then dunder names and finally
/// any other name that starts with a single underscore.
name_kind: NameKind,
/// The "dependency kind" of the module where this symbol
/// originates from.
///
/// This lets us, e.g., prioritize first party project modules
/// over third party dependencies. This applies to both symbols
/// already in scope and unimported symbols, essentially forming a
/// preference ordering for symbols based on where they came from.
///
/// Not all completions have this set. For example, keywords or
/// arguments. We assume that if it's not set, then there is some
/// other sorting criteria being applied or that it is generally
/// more specific than completions where this is set.
module_dependency_kind: Option<ModuleDependencyKind>,
/// Sorts based on whether this symbol is only available during
/// type checking and not at runtime.
type_check_only: Sort,
@@ -1061,24 +1073,13 @@ impl Relevance {
} else {
Sort::Even
},
current_module: c.module_name.map(|_| Sort::Lower).unwrap_or(Sort::Higher),
keyword: if c.kind == Some(CompletionKind::Keyword) {
Sort::Higher
} else {
Sort::Even
},
special_module: c
.module_name
.and_then(|name| {
if name.as_str() == "typing" {
Some(Sort::Higher)
} else {
None
}
})
.unwrap_or(Sort::Even),
builtin: if c.builtin { Sort::Lower } else { Sort::Even },
name_kind: NameKind::classify(&c.name),
module_dependency_kind: c.module_dependency_kind,
type_check_only: if c.is_type_check_only {
Sort::Lower
} else {
@@ -1088,6 +1089,98 @@ impl Relevance {
}
}
/// The dependency "kind" of a module.
///
/// Everything above "current" is applied to unimported symbols. It
/// categorizes them by where the module is defined. We only support
/// three broad categories right now: stdlib, third party and project.
/// Ideally, we would distinguish between _direct_ third party code and
/// _indirect_ third party code, but ty doesn't yet understand how to
/// do this (as of 2026-01-08).
///
/// Note that these are defined in a particular order. That
/// is, modules in the project get higher priority than those
/// not in the project.
#[derive(Clone, Copy, Debug, Eq, PartialEq, PartialOrd, Ord)]
enum ModuleDependencyKind {
/// Symbols already in scope in the user's current module.
///
/// Note that this doesn't necessarily mean that the symbol is
/// *defined* in the current module. e.g., `numpy.arra<CURSOR>`
/// will return `array` from `numpy`, and its module dependency
/// kind is considered `Current`.
Current,
/// Reserved for the Python initial basis. We want these
/// symbols to appear high since they are used so frequently,
/// but not higher than symbols already in scope in the
/// current module.
Builtin,
/// Symbols defined somewhere in the user's project.
Project,
/// A namespace package somewhat defies classification, since
/// it can exist over multiple search paths. Since std doesn't
/// use namespace packages, we just assume that they are roughly
/// equivalent to third party packages.
///
/// This is an erroneous assumption when the namespace
/// package is within the user's project. Probably we
/// could do better once we know how to navigate namespace
/// packages better. Regardless, we put this between
/// `Project` and `ThirdParty` as a bad compromise for now.
Namespace,
/// Symbols defined somewhere in a dependency, direct or
/// indirect.
ThirdParty,
/// Symbols from "special" standard library modules that
/// are so commonly used---but commonly have names in
/// conflict with other stdlib modules---that we want to
/// prioritize them above other stdlib modules.
///
/// `typing` is a good example of this. It has lots of
/// symbols that also exist in other modules. e.g.,
/// `TypeVar` in `ast`, `cast` in `ctypes` and
/// `Protocol` in `asyncio`.
StdlibSpecial,
/// Symbols from the standard library get ranked last by
/// the logic that they are least specific to the end user's
/// context.
///
/// This is somewhat specious since while they are least
/// specific, some stdlib modules are very commonly used.
Stdlib,
}
impl ModuleDependencyKind {
/// Determines the "kind" of a symbol based on the module it is
/// defined in.
///
/// Note that this can never return `ModuleDependencyKind::Current`.
/// Callers are expected to handle that case themselves.
fn from_module(db: &dyn Db, module: Module<'_>) -> ModuleDependencyKind {
if module.is_known(db, KnownModule::Builtins) {
return ModuleDependencyKind::Builtin;
}
let Some(sp) = module.search_path(db) else {
return ModuleDependencyKind::Namespace;
};
if sp.is_standard_library() {
if module.is_known(db, KnownModule::Typing) {
ModuleDependencyKind::StdlibSpecial
} else {
ModuleDependencyKind::Stdlib
}
} else if sp.is_site_packages() {
ModuleDependencyKind::ThirdParty
} else {
// We assume anything else, including
// "extra" search paths and editable installs,
// are part of the end user's code.
ModuleDependencyKind::Project
}
}
}
/// An instruction to indicate an ordering preference.
#[derive(Clone, Debug, Default, Eq, PartialEq, PartialOrd, Ord)]
enum Sort {
@@ -1385,7 +1478,8 @@ fn add_unimported_completions<'db>(
.insert(import_action.symbol_text())
.kind(symbol.kind().to_completion_kind())
.module_name(module_name)
.import(import_action.import().cloned()),
.import(import_action.import().cloned())
.module_dependency_kind(ModuleDependencyKind::from_module(db, symbol.module())),
);
}
}

View File

@@ -554,10 +554,17 @@ impl SearchPath {
)
}
/// Is this search path in "first party" code? i.e., The
/// end user's project code.
pub fn is_first_party(&self) -> bool {
matches!(&*self.0, SearchPathInner::FirstParty(_))
}
/// Is the module in a site-packages directory?
pub fn is_site_packages(&self) -> bool {
matches!(&*self.0, SearchPathInner::SitePackages(_))
}
fn is_valid_extension(&self, extension: &str) -> bool {
if self.is_standard_library() {
extension == "pyi"