Compare commits

..

389 Commits

Author SHA1 Message Date
Charlie Marsh
c23bf395e2 Warn on invalid # noqa rule codes 2024-08-11 19:23:33 -04:00
Charlie Marsh
383676e332 Avoid parsing joint rule codes as distinct codes in # noqa 2024-08-11 19:23:00 -04:00
Yury Fedotov
feba5031dc [Minor typo] Fix article in "an fix" (#12797) 2024-08-10 21:22:00 -04:00
Dylan
0c2b88f224 [flake8-simplify] Further simplify to binary in preview for if-else-block-instead-of-if-exp (SIM108) (#12796)
In most cases we should suggest a ternary operator, but there are three
edge cases where a binary operator is more appropriate.

Given an if-else block of the form

```python
if test:
    target_var = body_value
else:
    target_var = else_value
```
This PR updates the check for SIM108 to the following:

- If `test == body_value` and preview enabled, suggest to replace with
`target_var = test or else_value`
- If `test == not body_value` and preview enabled, suggest to replace
with `target_var = body_value and else_value`
- If `not test == body_value` and preview enabled, suggest to replace
with `target_var = body_value and else_value`
- Otherwise, suggest to replace with `target_var = body_value if test
else else_value`

Closes #12189.
2024-08-10 16:49:25 +00:00
Alex Waygood
cf1a57df5a Remove red_knot_python_semantic::python_version::TargetVersion (#12790) 2024-08-10 14:28:31 +01:00
renovate[bot]
597c5f9124 Update dependency black to v24 (#12728) 2024-08-10 18:04:37 +05:30
Charlie Marsh
69e1c567d4 Treat type(Protocol) et al as metaclass base (#12770)
## Summary

Closes https://github.com/astral-sh/ruff/issues/12736.
2024-08-09 20:10:12 +00:00
Alex Waygood
37b9bac403 [red-knot] Add support for --system-site-packages virtual environments (#12759) 2024-08-09 21:02:16 +01:00
Alex Waygood
83db48d316 RUF031: Ignore unparenthesized tuples in subscripts when the subscript is obviously a type annotation or type alias (#12762) 2024-08-09 20:31:27 +01:00
Alex Waygood
c4e651921b [red-knot] Move, rename and make public the PyVersion type (#12782) 2024-08-09 16:49:17 +01:00
Dylan
b595346213 [ruff] Do not remove parens for tuples with starred expressions in Python <=3.10 RUF031 (#12784) 2024-08-09 17:30:29 +02:00
Ryan Hoban
253474b312 Document that BLE001 supports both BaseException and Exception (#12788) 2024-08-09 17:28:50 +02:00
Micha Reiser
a176679b24 Log warnings when skipping editable installations (#12779) 2024-08-09 16:29:43 +02:00
Charlie Marsh
1f51048fa4 Don't enforce returns and yields in abstract methods (#12771)
## Summary

Closes https://github.com/astral-sh/ruff/issues/12685.
2024-08-09 13:34:14 +00:00
Micha Reiser
2abfab0f9b Move Program and related structs to red_knot_python_semantic (#12777) 2024-08-09 11:50:45 +02:00
Dylan
64f1f3468d [ruff] Skip tuples with slice expressions in incorrectly-parenthesized-tuple-in-subscript (RUF031) (#12768)
## Summary

Adding parentheses to a tuple in a subscript with elements that include
slice expressions causes a syntax error. For example, `d[(1,2,:)]` is a
syntax error.

So, when `lint.ruff.parenthesize-tuple-in-subscript = true` and the
tuple includes a slice expression, we skip this check and fix.

Closes #12766.
2024-08-09 09:22:58 +00:00
Micha Reiser
ffaa35eafe Add test helper to setup tracing (#12741) 2024-08-09 07:04:04 +00:00
Charlie Marsh
c906b0183b Add known problems warning to type-comparison rule (#12769)
## Summary

See: https://github.com/astral-sh/ruff/issues/4560
2024-08-09 01:41:15 +00:00
Carl Meyer
bc5b9b81dd [red-knot] add dev dependency on ruff_db os feature from red_knot_pyt… (#12760) 2024-08-08 18:10:30 +01:00
Dhruv Manilawala
221ea662e0 Bump version to 0.5.7 (#12756) 2024-08-08 20:56:15 +05:30
Alex Waygood
d28c5afd14 [red-knot] Remove mentions of Ruff from the CLI help (#12752)
Co-authored-by: Micha Reiser <micha@reiser.io>
2024-08-08 15:35:10 +01:00
Alex Waygood
f1de08c2a0 [red-knot] Merge the semantic and module-resolver crates (#12751) 2024-08-08 15:34:11 +01:00
Christian Clauss
33e9a6a54e SIM110: any() is ~3x slower than the code it replaces (#12746)
> ~Builtins are also more efficient than `for` loops.~

Let's not promise performance because this code transformation does not
deliver.

Benchmark written by @dcbaker

> `any()` seems to be about 1/3 as fast (Python 3.11.9, NixOS):
```python
loop = 'abcdef'.split()
found = 'f'
nfound = 'g'


def test1():
    for x in loop:
        if x == found:
            return True
    return False


def test2():
    return any(x == found for x in loop)


def test3():
    for x in loop:
        if x == nfound:
            return True
    return False


def test4():
    return any(x == nfound for x in loop)


if __name__ == "__main__":
    import timeit

    print('for loop (found)    :', timeit.timeit(test1))
    print('for loop (not found):', timeit.timeit(test3))
    print('any() (found)       :', timeit.timeit(test2))
    print('any() (not found)   :', timeit.timeit(test4))
```
```
for loop (found)    : 0.051076093994197436
for loop (not found): 0.04388196699437685
any() (found)       : 0.15422860698890872
any() (not found)   : 0.15568504799739458
```
I have retested with longer lists and on multiple Python versions with
similar results.
2024-08-08 08:25:43 -04:00
Dylan
f577e03021 [ruff] Ignore empty tuples for incorrectly-parenthesized-tuple-in-subscript (RUF031) (#12749) 2024-08-08 13:18:03 +02:00
Micha Reiser
f53733525c Remove all useEffect usages (#12659) 2024-08-08 13:16:38 +02:00
Micha Reiser
2daa914334 Gracefully handle errors in CLI (#12747) 2024-08-08 11:02:47 +00:00
Steve C
6d9205e346 [ruff_linter] - Use LibCST in adjust_indentation for mixed whitespace (#12740) 2024-08-08 10:49:58 +02:00
Micha Reiser
df7345e118 Exit with an error if there are check failures (#12735) 2024-08-08 07:10:18 +00:00
Micha Reiser
dc6aafecc2 Setup tracing and document tracing usage (#12730) 2024-08-08 06:28:40 +00:00
Charlie Marsh
5107a50ae7 Parenthesize conditions based on precedence when merging if arms (#12737)
## Summary

Closes https://github.com/astral-sh/ruff/issues/12732.
2024-08-07 23:03:24 -04:00
Micha Reiser
a631d600ac Fix cache invalidation for nested pyproject.toml files (#12727) 2024-08-07 21:53:45 +02:00
Alex Waygood
f34b9a77f0 [red-knot] Cleanups to logic resolving site-packages from a venv path (#12731) 2024-08-07 15:48:15 +01:00
Dylan
7997da47f5 [ruff] Implement incorrectly-parenthesized-tuple-in-subscript (RUF031) (#12480)
Implements the new fixable lint rule `RUF031` which checks for the use or omission of parentheses around tuples in subscripts, depending on the setting `lint.ruff.parenthesize-tuple-in-getitem`. By default, the use of parentheses is considered a violation.
2024-08-07 13:11:29 +00:00
Alex Waygood
d380b37a09 Add a new Binding::is_unused method (#12729) 2024-08-07 11:17:56 +01:00
Alex Waygood
b14fee9320 [ruff] Mark RUF023 fix as unsafe if __slots__ is not a set and the binding is used elsewhere (#12692) 2024-08-07 10:41:03 +01:00
Dhruv Manilawala
037e817450 Use struct instead of type alias for workspace settings index (#12726)
## Summary

Follow-up from https://github.com/astral-sh/ruff/pull/12725, this is
just a small refactor to use a wrapper struct instead of type alias for
workspace settings index. This avoids the need to have the
`register_workspace_settings` as a static method on `Index` and instead
is a method on the new struct itself.
2024-08-07 09:26:59 +00:00
Dhruv Manilawala
7fcfedd430 Ignore non-file workspace URL (#12725)
## Summary

This PR updates the server to ignore non-file workspace URL.

This is to avoid crashing the server if the URL scheme is not "file".
We'd still raise an error if the URL to file path conversion fails.

Also, as per the docs of
[`to_file_path`](https://docs.rs/url/2.5.2/url/struct.Url.html#method.to_file_path):

> Note: This does not actually check the URL’s scheme, and may give
nonsensical results for other schemes. It is the user’s responsibility
to check the URL’s scheme before calling this.

resolves: #12660

## Test Plan

I'm not sure how to test this locally but the change is small enough to
validate on its own.
2024-08-07 09:15:55 +00:00
Dhruv Manilawala
50ff5c7544 Include docs requirements for Renovate upgrades (#12724)
## Summary

This PR updates the Renovate config to account for the
`requirements*.txt` files in `docs/` directory.

The `mkdocs-material` upgrade is ignored because we use commit SHA for
the insider version and it should match the corresponding public version
as per the docs:
https://squidfunk.github.io/mkdocs-material/insiders/upgrade/
(`9.x.x-insiders-4.x.x`).

## Test Plan

```console
❯ renovate-config-validator
(node:83193) [DEP0040] DeprecationWarning: The `punycode` module is deprecated. Please use a userland alternative instead.
(Use `node --trace-deprecation ...` to show where the warning was created)
 INFO: Validating .github/renovate.json5
 INFO: Config validated successfully
```
2024-08-07 13:11:18 +05:30
Charlie Marsh
90e5bc2bd9 Avoid false-positives for list concatenations in SQL construction (#12720)
## Summary

Closes https://github.com/astral-sh/ruff/issues/12710.
2024-08-06 16:26:03 -04:00
Alex Waygood
aae9619d3d [red-knot] Fix build on Windows (#12719)
## Summary

Tests are failing on `main` because automerge landed
https://github.com/astral-sh/ruff/pull/12716 despite the Windows tests
failing.
2024-08-06 20:21:25 +01:00
Alex Waygood
7fa76a2b2b [red-knot] Derive site-packages from a venv path (#12716) 2024-08-06 18:34:37 +00:00
Dhruv Manilawala
14dd6d980e [red-knot] Keep subcommands optional for the binary (#12715)
## Summary

This PR updates the `red_knot` CLI to make the subcommand optional.

## Test Plan

Run the following commands:
* `cargo run --bin red_knot --
--current-directory=~/playground/ruff/type_inference` (no subcommand
requirement)
* `cargo run --bin red_knot -- server` (should start the server)
2024-08-06 20:24:49 +05:30
Micha Reiser
846f57fd15 Update salsa (#12711) 2024-08-06 13:17:39 +00:00
Micha Reiser
8e6aa78796 Remove 'cli' module from red_knot (#12714) 2024-08-06 12:10:36 +00:00
Dhruv Manilawala
e91a0fe94a [red-knot] Implement basic LSP server (#12624)
## Summary

This PR adds basic LSP implementation for the Red Knot project.

This is basically a fork of the existing `ruff_server` crate into a
`red_knot_server` crate. The following are the main differences:
1. The `Session` stores a map from workspace root to the corresponding
Red Knot database (`RootDatabase`).
2. The database is initialized with the newly implemented `LSPSystem`
(implementation of `System` trait)
3. The `LSPSystem` contains the server index corresponding to each
workspace and an underlying OS system implementation. For certain
methods, the system first checks if there's an open document in LSP
system and returns the information from that. Otherwise, it falls back
to the OS system to get that information. These methods are
`path_metadata`, `read_to_string` and `read_to_notebook`
4. Add `as_any_mut` method for `System`

**Why fork?**

Forking allows us to experiment with the functionalities that are
specific to Red Knot. The architecture is completely different and so
the requirements for an LSP implementation are different as well. For
example, Red Knot only supports a single workspace, so the LSP system
needs to map the multi-workspace support to each Red Knot instance. In
the end, the server code isn't too big, it will be easier to implement
Red Knot specific functionality without worrying about existing server
limitations and it shouldn't be difficult to port the existing server.

## Review

Most of the server files hasn't been changed. I'm going to list down the
files that have been changed along with highlight the specific part of
the file that's changed from the existing server code.

Changed files:
* Red Knot CLI implementation:
https://github.com/astral-sh/ruff/pull/12624/files#diff-579596339a29d3212a641232e674778c339b446de33b890c7fdad905b5eb50e1
* In
https://github.com/astral-sh/ruff/pull/12624/files#diff-b9a9041a8a2bace014bf3687c3ef0512f25e0541f112fad6131b14242f408db6,
server capabilities have been updated, dynamic capability registration
is removed
* In
https://github.com/astral-sh/ruff/pull/12624/files#diff-b9a9041a8a2bace014bf3687c3ef0512f25e0541f112fad6131b14242f408db6,
the API for `clear_diagnostics` now take in a `Url` instead of
`DocumentQuery` as the document version doesn't matter when clearing
diagnostics after a document is closed
*
[`did_close`](https://github.com/astral-sh/ruff/pull/12624/files#diff-9271370102a6f3be8defaca40c82485b0048731942520b491a3bdd2ee0e25493),
[`did_close_notebook`](https://github.com/astral-sh/ruff/pull/12624/files#diff-96fb53ffb12c1694356e17313e4bb37b3f0931e887878b5d7c896c19ff60283b),
[`did_open`](https://github.com/astral-sh/ruff/pull/12624/files#diff-60e852cf1aa771e993131cabf98eb4c467963a8328f10eccdb43b3e8f0f1fb12),
[`did_open_notebook`](https://github.com/astral-sh/ruff/pull/12624/files#diff-ac356eb5e36c3b2c1c135eda9dfbcab5c12574d1cb77c71f7da8dbcfcfb2d2f1)
are updated to open / close file from the corresponding Red Knot
workspace
* The [diagnostic
handler](https://github.com/astral-sh/ruff/pull/12624/files#diff-4475f318fd0290d0292834569a7df5699debdcc0a453b411b8c3d329f1b879d9)
is updated to request diagnostics from Red Knot
* The [`Session::new`] method in
https://github.com/astral-sh/ruff/pull/12624/files#diff-55c96201296200c1cab37c8b0407b6c733381374b94be7ae50563bfe95264e4d
is updated to construct the Red Knot databases for each workspace. It
also contains the `index_mut` and `MutIndexGuard` implementation
* And, `LSPSystem` implementation is in
https://github.com/astral-sh/ruff/pull/12624/files#diff-4ed62bd359c43b0bf1a13f04349dcd954966934bb8d544de7813f974182b489e

## Test Plan

First, configure VS Code to use the `red_knot` binary

1. Build the `red_knot` binary by `cargo build`
2. Update the VS Code extension to specify the path to this binary
```json
{
	"ruff.path": ["/path/to/ruff/target/debug/red_knot"]
}
```
3. Restart VS Code

Now, open a file containing red-knot specific diagnostics, close the
file and validate that diagnostics disappear.
2024-08-06 11:27:30 +00:00
Micha Reiser
d2c627efb3 Use standard allocator for wasm (#12713) 2024-08-06 11:20:47 +00:00
Micha Reiser
10e977d5f5 [red-knot] Add basic WASM API (#12654) 2024-08-06 09:21:42 +02:00
Auguste Lalande
f0318ff889 [pydoclint] Consider DOC201 satisfied if docstring begins with "Returns" (#12675)
<!--
Thank you for contributing to Ruff! To help us out with reviewing,
please consider the following:

- Does this pull request include a summary of the change? (See below.)
- Does this pull request include a descriptive title?
- Does this pull request include references to any relevant issues?
-->

## Summary

Resolves #12636

Consider docstrings which begin with the word "Returns" as having
satisfactorily documented they're returns. For example
```python
def f():
    """Returns 1."""
    return 1
```
is valid.

## Test Plan

Added example to test fixture.

---------

Co-authored-by: Dhruv Manilawala <dhruvmanila@gmail.com>
2024-08-06 06:46:38 +00:00
Dhruv Manilawala
5cc3fed9a8 [red-knot] Infer float and complex literal expressions (#12689)
## Summary

This PR implements type inference for float and complex literal
expressions.

## Test Plan

Add test cases for both types.
2024-08-06 06:24:28 +00:00
Steve C
39dd732e27 [refurb] - fix unused autofix for implicit-cwd (FURB177) (#12708) 2024-08-06 08:09:35 +02:00
Dylan
52630a1d55 [flake8-comprehensions] Set comprehensions not a violation for sum in unnecessary-comprehension-in-call (C419) (#12691)
## Summary

Removes set comprehension as a violation for `sum` when checking `C419`,
because set comprehension may de-duplicate entries in a generator,
thereby modifying the value of the sum.

Closes #12690.
2024-08-06 02:30:58 +00:00
Steve C
7b5fd63ce8 [flake8-pyi] - add autofix for future-annotations-in-stub (PYI044) (#12676)
## Summary

add autofix for `PYI044`

## Test Plan

`cargo test`
2024-08-05 22:27:55 -04:00
Alex Waygood
5499821c67 [red-knot] Rename workspace_root variables in the module resolver to src_root (#12697)
Fixes #12337
2024-08-05 23:07:18 +01:00
Alex Waygood
7ee7c68f36 Add a new script to generate builtin module names (#12696) 2024-08-05 21:33:36 +01:00
Carl Meyer
2393d19f91 [red-knot] infer instance types for builtins (#12695)
Previously we wrongly inferred the type of the builtin type itself (e.g.
`Literal[int]`); we need to infer the instance type instead.
2024-08-05 13:32:42 -07:00
Dhruv Manilawala
a8e2ba508e [red-knot] Infer boolean literal expression (#12688)
## Summary

This PR implements type inference for boolean literal expressions.

## Test Plan

Add test cases for `True` and `False`.
2024-08-05 11:30:53 -07:00
Alex Waygood
0b4d3ce39b TRY002: fixup docs (#12683) 2024-08-05 08:56:12 +00:00
epenet
0a345dc627 [tryceratops] Add BaseException to raise-vanilla-class rule (TRY002) (#12620) 2024-08-05 09:45:49 +01:00
Micha Reiser
ff2aa3ea00 Revert "Remove criterion/codspeed compat layer (#12524)" (#12680) 2024-08-05 07:49:04 +00:00
Micha Reiser
0d3bad877d Fix module resolver symlink test on macOs (#12682) 2024-08-05 07:22:54 +00:00
Micha Reiser
756060d676 Upgrade Salsa to a version with a 32bit compatible concurrent vec (#12679) 2024-08-05 08:50:32 +02:00
Micha Reiser
b647f3fba8 Disable testing ruff_benchmark by default (#12678) 2024-08-05 06:15:52 +00:00
Dhruv Manilawala
82e69ebf23 Update broken links in the documentation (#12677)
## Summary

Running `mkdocs server -f mkdocs.insiders.yml` gave warnings about these
broken links.

## Test plan

I built the docs locally and verified that the updated links work
properly.
2024-08-05 05:35:23 +00:00
renovate[bot]
2c79045342 Update Rust crate pep440_rs to v0.6.6 (#12666) 2024-08-04 22:42:43 -04:00
Charlie Marsh
3497f5257b Add preview note to unnecessary-comprehension-in-call (#12673) 2024-08-05 02:27:00 +00:00
Dylan
25aabec814 [flake8-comprehensions] Account for list and set comprehensions in unnecessary-literal-within-tuple-call (C409) (#12657)
## Summary

Make it a violation of `C409` to call `tuple` with a list or set
comprehension, and
implement the (unsafe) fix of calling the `tuple` with the underlying
generator instead.

Closes #12648.

## Test Plan

Test fixture updated, cargo test, docs checked for updated description.
2024-08-04 22:14:52 -04:00
renovate[bot]
0e71485ea9 Update Rust crate regex to v1.10.6 (#12667) 2024-08-04 22:10:40 -04:00
renovate[bot]
43a9d282f7 Update Rust crate ordermap to v0.5.1 (#12665) 2024-08-04 22:10:32 -04:00
renovate[bot]
6f357b8b45 Update Rust crate tempfile to v3.11.0 (#12671) 2024-08-05 02:08:20 +00:00
renovate[bot]
73d9f11a9c Update pre-commit dependencies (#12670) 2024-08-05 02:08:07 +00:00
renovate[bot]
d6c6db5a44 Update NPM Development dependencies (#12672) 2024-08-04 22:07:55 -04:00
renovate[bot]
56d985a972 Update Rust crate toml to v0.8.19 (#12669) 2024-08-04 22:07:44 -04:00
renovate[bot]
b3e0655cc9 Update Rust crate serde_json to v1.0.122 (#12668) 2024-08-04 22:07:35 -04:00
renovate[bot]
06baffec9e Update Rust crate clap to v4.5.13 (#12664) 2024-08-04 22:07:26 -04:00
Steve C
67a2ae800a [ruff] - add autofix zip-instead-of-pairwise (RUF007) (#12663)
## Summary

Adds autofix for `RUF007`

## Test Plan

`cargo test`, however I get errors for `test resolver::tests::symlink
... FAILED` which seems to not be my fault
2024-08-04 21:57:50 -04:00
InSync
7a2c75e2fc Replace ruff-lsp links in README.md with links to new documentation page (#12618)
Since `ruff-lsp` has been (semi-)deprecated for sometime, it wouldn't
make sense to mention it in the most prominent sections of the `README`.
Instead, they should point to the new <i>[Editor
Integrations](https://docs.astral.sh/ruff/editors/)</i> documentation
page.
2024-08-04 15:31:36 +05:30
DavideRagazzon
9ee44637ca Fix typo in configuration docs (#12655) 2024-08-04 09:43:51 +02:00
Charlie Marsh
733341ab39 Ignore DOC errors for stub functions (#12651)
## Summary

Closes https://github.com/astral-sh/ruff/issues/12650.
2024-08-03 08:13:21 -04:00
Micha Reiser
341a25eec1 Fix file watching on macOS if a module-search path is a symlink (#12634) 2024-08-03 07:24:07 +00:00
Charlie Marsh
38e178e914 Try both 'Raises' section styles when convention is unspecified (#12649)
## Summary

Closes https://github.com/astral-sh/ruff/issues/12647.
2024-08-02 21:04:46 -04:00
Alex Waygood
daccb3f4f3 [pydoclint] Deduplicate collected exceptions after traversing function bodies (#12642) 2024-08-02 23:17:06 +01:00
Charlie Marsh
c858afe03a [flake8-bugbear] Treat return as equivalent to break (B909) (#12646)
Closes https://github.com/astral-sh/ruff/issues/12640.
2024-08-02 18:14:17 -04:00
Alex Waygood
3c1c3199d0 [pydoclint] Teach rules to understand reraised exceptions as being explicitly raised (#12639)
## Summary

Fixes #12630.

DOC501 and DOC502 now understand functions with constructs like this to
be explicitly raising `TypeError` (which should be documented in a
function's docstring):

```py
try:
    foo():
except TypeError:
    ...
    raise
```

I made an exception for `Exception` and `BaseException`, however.
Constructs like this are reasonably common, and I don't think anybody
would say that it's worth putting in the docstring that it raises "some
kind of generic exception":

```py
try:
    foo()
except BaseException:
    do_some_logging()
    raise
```

## Test Plan

`cargo test -p ruff_linter --lib`
2024-08-02 22:47:22 +01:00
Ran Benita
fbfe2cb2f5 [flake8-async] Fix false positives with multiple async with items (ASYNC100) (#12643)
## Summary

Please see
https://github.com/astral-sh/ruff/pull/12605#discussion_r1699957443 for
a description of the issue.

They way I fixed it is to get the *last* timeout item in the `with`, and
if it's an `async with` and there are items after it, then don't trigger
the lint.

## Test Plan

Updated the fixture with some more cases.
2024-08-02 21:25:13 +00:00
Carl Meyer
1c311e4fdb [red-knot] update benchmark to run on tomllib (#12635)
Changes the red-knot benchmark to run on the stdlib "tomllib" library
(which is self-contained, four files, uses type annotations) instead of
on very small bits of handwritten code.

Also remove the `without_parse` benchmark: now that we are running on
real code that uses typeshed, we'd either have to pre-parse all of
typeshed (slow) or find some way to determine which typeshed modules
will be used by the benchmark (not feasible with reasonable complexity.)

## Test Plan

`cargo bench -p ruff_benchmark --bench red_knot`
2024-08-02 11:23:52 -07:00
Micha Reiser
12177a42e3 Set durabilities for low-durability fields on high-durability inputs (#12627) 2024-08-02 19:42:34 +02:00
Micha Reiser
dfb08856eb Fix file watcher stop data race (#12626) 2024-08-02 19:02:49 +02:00
Auguste Lalande
94d817e1a5 [pydoclint] Add docstring-missing-yields amd docstring-extraneous-yields (DOC402, DOC403) (#12538) 2024-08-02 17:55:42 +01:00
ember91
9296bd4e3f Fix a typo (#12633)
Co-authored-by: Emil Berg <emil.berg@ericsson.com>
2024-08-02 16:39:27 +01:00
Micha Reiser
da824ba316 Release Ruff 0.5.6 (#12629)
Co-authored-by: Alex Waygood <Alex.Waygood@Gmail.com>
2024-08-02 17:35:14 +02:00
Micha Reiser
012198a1b0 Enable notebooks by default in preview mode (#12621) 2024-08-02 13:36:53 +00:00
Alex Waygood
fbab04fbe1 [red-knot] Allow multiple site-packages search paths (#12609) 2024-08-02 13:33:19 +00:00
Dhruv Manilawala
9aa43d5f91 Separate red_knot into CLI and red_knot_workspace crates (#12623)
## Summary

This PR separates the current `red_knot` crate into two crates:
1. `red_knot` - This will be similar to the `ruff` crate, it'll act as
the CLI crate
2. `red_knot_workspace` - This includes everything except for the CLI
functionality from the existing `red_knot` crate

Note that the code related to the file watcher is in
`red_knot_workspace` for now but might be required to extract it out in
the future.

The main motivation for this change is so that we can have a `red_knot
server` command. This makes it easier to test the server out without
making any changes in the VS Code extension. All we need is to specify
the `red_knot` executable path in `ruff.path` extension setting.

## Test Plan

- `cargo build`
- `cargo clippy --workspace --all-targets --all-features`
- `cargo shear --fix`
2024-08-02 11:24:36 +00:00
Micha Reiser
966563c79b Add tests for hard and soft links (#12590) 2024-08-02 10:14:28 +00:00
Micha Reiser
27edadec29 Make server panic hook more error resilient (#12610) 2024-08-02 12:10:06 +02:00
InSync
2e2b1b460f Fix a typo in docs/editors/settings.md (#12614)
Diff:

```diff
-- `false: Same as`off\`
+- `false`: Same as `off`
```
2024-08-01 11:23:55 -05:00
Charlie Marsh
a3e67abf4c Add newlines before comments in E305 (#12606)
## Summary

There's still a problem here. Given:

```python
class Class():
    pass

    # comment

    # another comment
a = 1
```

We only add one newline before `a = 1` on the first pass, because
`max_precedling_blank_lines` is 1... We then add the second newline on
the second pass, so it ends up in the right state, but the logic is
clearly wonky.

Closes https://github.com/astral-sh/ruff/issues/11508.
2024-07-31 23:11:00 -04:00
Carl Meyer
ee0518e8f7 [red-knot] implement attribute of union (#12601)
I hit this `todo!` trying to run type inference over some real modules.
Since it's a one-liner to implement it, I just did that rather than
changing to `Type::Unknown`.
2024-07-31 19:45:24 -07:00
Charlie Marsh
d774a3bd48 Avoid unused async when context manager includes TaskGroup (#12605)
## Summary

Closes https://github.com/astral-sh/ruff/issues/12354.
2024-08-01 02:12:43 +00:00
Charlie Marsh
7e6b19048e Don't attach comments with mismatched indents (#12604)
## Summary

Given:

```python
def test_update():
    pass
    # comment
def test_clientmodel():
    pass
```

We don't want `# comment` to be attached to `def test_clientmodel()`.

Closes https://github.com/astral-sh/ruff/issues/12589.
2024-07-31 22:09:05 -04:00
Charlie Marsh
8e383b9587 Respect start index in unnecessary-list-index-lookup (#12603)
## Summary

Closes https://github.com/astral-sh/ruff/issues/12594.
2024-08-01 01:21:15 +00:00
github-actions[bot]
3f49ab126f Sync vendored typeshed stubs (#12602) 2024-08-01 01:44:56 +01:00
Chris Krycho
c1bc7f4dee Remove ecosystem_ci flag from Ruff CLI (#12596)
## Summary

@zanieb noticed while we were discussing #12595 that this flag is now
unnecessary, so remove it and the flags which reference it.

## Test Plan

Question for maintainers: is there a test to add *or* remove here? (I’ve
opened this as a draft PR with that in view!)
2024-07-31 11:40:03 -05:00
Bowen Liang
a44d579f21 Add Dify to Ruff users (#12593)
## Summary

<!-- What's the purpose of the change? What does it do, and why? -->
- Add the popular LLM Ops project Dify to the user list in Readme, as
Dify introduced Ruff for lining since Feb 2024 in
https://github.com/langgenius/dify/pull/2366
2024-07-31 08:56:52 -04:00
Alex Waygood
a3900d2b0b [pyflakes] Fix preview-mode bugs in F401 when attempting to autofix unused first-party submodule imports in an __init__.py file (#12569) 2024-07-31 13:34:30 +01:00
Alex Waygood
83b1c48a93 Make setting and retrieving pydocstyle settings less tedious (#12582) 2024-07-31 10:39:33 +01:00
Micha Reiser
138e70bd5c Upgrade to Rust 1.80 (#12586) 2024-07-30 19:18:08 +00:00
Eero Vaher
ee103ffb25 Fix an argument name in B905 description (#12588)
The description of `zip-without-explicit-strict` erroneously mentions a
non-existing `check` argument for `zip()`.
2024-07-30 14:40:56 -04:00
Micha Reiser
18f87b9497 Flaky file watching tests, add debug assertions (#12587) 2024-07-30 18:09:55 +00:00
Micha Reiser
adc8d4e1e7 File watch events: Add dynamic wait period before writing new changes (#12585) 2024-07-30 19:18:43 +02:00
Alex Waygood
90db361199 Consider more stdlib decorators to be property-like (#12583) 2024-07-30 17:18:23 +00:00
Alex Waygood
4738135801 Improve consistency between linter rules in determining whether a function is property (#12581) 2024-07-30 17:42:04 +01:00
Micha Reiser
264cd750e9 Add delay between updating a file (#12576) 2024-07-30 18:31:29 +02:00
Alex Waygood
7a4419a2a5 Improve handling of metaclasses in various linter rules (#12579) 2024-07-30 14:48:36 +01:00
Alex Waygood
ac1666d6e2 Remove several incorrect uses of map_callable() (#12580) 2024-07-30 14:30:25 +01:00
epenet
459c85ba27 [flake8-return] Exempt cached properties and other property-like decorators from explicit return rule (RET501) (#12563)
Co-authored-by: Alex Waygood <alex.waygood@gmail.com>
2024-07-30 11:06:28 +00:00
Alex Waygood
aaa56eb0bd Fix NFKC normalization bug when removing unused imports (#12571) 2024-07-30 09:54:35 +00:00
Dhruv Manilawala
f3c14a4276 Keep track of deleted cell for reorder change request (#12575)
## Summary

This PR fixes a bug where the server wouldn't retain the cell content in
case of a reorder change request.

As mentioned in
https://github.com/astral-sh/ruff/issues/12573#issuecomment-2257819298,
this change request is modeled as (a) remove these cell URIs and (b) add
these cell URIs. The cell content isn't provided. But, the way we've
modeled the `NotebookCell` (it contains the underlying `TextDocument`),
we need to keep track of the deleted cells to get the content.

This is not an ideal solution and a better long term solution would be
to model it as per the spec but that is a big structural change and will
affect multiple parts of the server. Modeling as per the spec would also
avoid bugs like https://github.com/astral-sh/ruff/pull/11864. For
context, that model would add complexity per
https://github.com/astral-sh/ruff/pull/11206#discussion_r1600165481.

fixes: #12573

## Test Plan

This video shows the before and after the bug is fixed:


https://github.com/user-attachments/assets/2fcad4b5-f9af-4776-8640-4cd1fa16e325
2024-07-30 09:51:26 +00:00
Alex Waygood
3169d408fa [red-knot] Fix typos in the module resolver (#12574) 2024-07-30 09:38:38 +00:00
Micha Reiser
a2286c8e47 Set Durability to 'HIGH' for most inputs and third-party libraries (#12566) 2024-07-30 09:03:59 +00:00
Piotr Osiewicz
fb9f566f56 Use $/logTrace for server trace logs in Zed and VS Code (#12564)
## Summary

This pull request adds support for logging via `$/logTrace` RPC
messages. It also enables that code path for when a client is Zed editor
or VS Code (as there's no way for us to generically tell whether a client prefers
`$/logTrace` over stderr.

Related to: #12523

## Test Plan

I've built Ruff from this branch and tested it manually with Zed.

---------

Co-authored-by: Dhruv Manilawala <dhruvmanila@gmail.com>
2024-07-30 08:32:20 +05:30
Micha Reiser
381bd1ff4a Delete left over debug statement (#12567) 2024-07-29 16:16:12 +02:00
Micha Reiser
2f54d05d97 Remove salsa::report_untracked_read when finding the dynamic module resolution paths (#12509) 2024-07-29 09:31:29 +00:00
Micha Reiser
e18b4e42d3 [red-knot] Upgrade to the *new* *new* salsa (#12406) 2024-07-29 07:21:24 +00:00
Dhruv Manilawala
9495331a5f Recommend client config for trace setting in Neovim (#12562) 2024-07-29 06:14:34 +00:00
renovate[bot]
e1076db7d0 Update CodSpeedHQ/action action to v3 (#12559) 2024-07-29 07:37:02 +02:00
renovate[bot]
1986c9e8e2 Update NPM Development dependencies (#12556) 2024-07-28 22:17:44 -04:00
renovate[bot]
d7e80dc955 Update pre-commit dependencies (#12555) 2024-07-28 22:17:34 -04:00
renovate[bot]
87d09f77cd Update Rust crate imperative to v1.0.6 (#12552) 2024-07-28 22:17:28 -04:00
renovate[bot]
bd37ef13b8 Update Rust crate bstr to v1.10.0 (#12557) 2024-07-28 22:17:11 -04:00
renovate[bot]
ec23c974db Update Rust crate toml to v0.8.16 (#12554) 2024-07-28 22:17:01 -04:00
renovate[bot]
122e5ab428 Update Rust crate serde_json to v1.0.121 (#12553) 2024-07-28 22:16:55 -04:00
renovate[bot]
2f2149aca8 Update Rust crate env_logger to v0.11.5 (#12550) 2024-07-28 22:16:49 -04:00
renovate[bot]
9d5c31e7da Update Rust crate imara-diff to v0.1.7 (#12551) 2024-07-28 22:16:42 -04:00
renovate[bot]
25f3ad6238 Update Rust crate clap to v4.5.11 (#12549) 2024-07-28 22:16:36 -04:00
renovate[bot]
79926329a4 Update Rust crate argfile to v0.2.1 (#12548) 2024-07-28 22:16:31 -04:00
Aleksei Latyshev
9cdc578dd9 [flake8-builtins] Implement import, lambda, and module shadowing (#12546)
## Summary

<!-- What's the purpose of the change? What does it do, and why? -->
Extend `flake8-builtins` to imports, lambda-arguments, and modules to be
consistent with original checker
[flake8_builtins](https://github.com/gforcada/flake8-builtins/blob/main/flake8_builtins.py).

closes #12540 

## Details

- Implement builtin-import-shadowing (A004)
- Stop tracking imports shadowing in builtin-variable-shadowing (A001)
in preview mode.
- Implement builtin-lambda-argument-shadowing (A005)
- Implement builtin-module-shadowing (A006)
  - Add new option `linter.flake8_builtins.builtins_allowed_modules`

## Test Plan

cargo test
2024-07-29 01:42:42 +00:00
Charlie Marsh
665c75f7ab Add document for executable determination (#12547)
Closes https://github.com/astral-sh/ruff/issues/12505.
2024-07-28 16:23:00 -04:00
Micha Reiser
f37b39d6cc Allow downloading ecosystem results from forks (#12544) 2024-07-27 19:57:19 +02:00
Charlie Marsh
e18c45c310 Avoid marking required imports as unused (#12537)
## Summary

If an import is marked as "required", we should never flag it as unused.
In practice, this is rare, since required imports are typically used for
`__future__` annotations, which are always considered "used".

Closes https://github.com/astral-sh/ruff/issues/12458.
2024-07-26 14:23:43 -04:00
Charlie Marsh
d930052de8 Move required import parsing out of lint rule (#12536)
## Summary

Instead, make it part of the serialization and deserialization itself.
This makes it _much_ easier to reuse when solving
https://github.com/astral-sh/ruff/issues/12458.
2024-07-26 13:35:45 -04:00
Sigurd Spieckermann
7ad4df9e9f Complete FBT002 example with Enum argument (#12525)
## Summary

I've completed `FBT002` rule example with an `Enum` argument to show the
full usage in this case.
2024-07-26 11:50:19 -04:00
Charlie Marsh
425761e960 Use colon rather than dot formatting for integer-only types (#12534)
## Summary

Closes https://github.com/astral-sh/ruff/issues/12421.
2024-07-26 15:48:19 +00:00
Carl Meyer
4b69271809 [red-knot] resolve int/list/dict/set/tuple to builtin type (#12521)
Now that we have builtins available, resolve some simple cases to the
right builtin type.

We should also adjust the display for types to include their module
name; that's not done yet here.
2024-07-26 08:21:31 -07:00
Micha Reiser
bf23d38a21 Remove unnecessary clone in workspace API (#12529) 2024-07-26 17:19:05 +02:00
Charlie Marsh
49f51583fa Always allow explicit multi-line concatenations when implicit are banned (#12532)
## Summary

Closes https://github.com/astral-sh/ruff/issues/11582.
2024-07-26 10:36:35 -04:00
Charlie Marsh
1fe4a5faed Avoid recommending __slots__ for classes that inherit from more than namedtuple (#12531)
## Summary

Closes https://github.com/astral-sh/ruff/issues/11887.
2024-07-26 14:24:40 +00:00
Charlie Marsh
998bfe0847 Avoid recommending no-argument super in slots=True dataclasses (#12530)
## Summary

Closes https://github.com/astral-sh/ruff/issues/12506.
2024-07-26 10:09:51 -04:00
Dhruv Manilawala
6f4db8675b [red-knot] Add support for untitled files (#12492)
## Summary

This PR adds support for untitled files in the Red Knot project.

Refer to the [design
discussion](https://github.com/astral-sh/ruff/discussions/12336) for
more details.

### Changes
* The `parsed_module` always assumes that the `SystemVirtual` path is of
`PySourceType::Python`.
* For the module resolver, as suggested, I went ahead by adding a new
`SystemOrVendoredPath` enum and renamed `FilePathRef` to
`SystemOrVendoredPathRef` (happy to consider better names here).
* The `file_to_module` query would return if it's a
`FilePath::SystemVirtual` variant because a virtual file doesn't belong
to any module.
* The sync implementation for the system virtual path is basically the
same as that of system path except that it uses the
`virtual_path_metadata`. The reason for this is that the system
(language server) would provide the metadata on whether it still exists
or not and if it exists, the corresponding metadata.

For point (1), VS Code would use `Untitled-1` for Python files and
`Untitled-1.ipynb` for Jupyter Notebooks. We could use this distinction
to determine whether the source type is `Python` or `Ipynb`.

## Test Plan

Added test cases in #12526
2024-07-26 18:13:31 +05:30
Micha Reiser
71f7aa4971 Remove criterion/codspeed compat layer (#12524) 2024-07-26 12:22:16 +02:00
Auguste Lalande
9f72f474e6 [pydoclint] Add docstring-missing-returns amd docstring-extraneous-returns (DOC201, DOC202) (#12485)
Co-authored-by: Micha Reiser <micha@reiser.io>
2024-07-26 06:36:00 +00:00
Carl Meyer
10c993e21a [red-knot] remove wrong __init__.py from file-watching tests (#12519) 2024-07-26 07:14:01 +01:00
Carl Meyer
2d3914296d [red-knot] handle all syntax without panic (#12499)
Extend red-knot type inference to cover all syntax, so that inferring
types for a scope gives all expressions a type. This means we can run
the red-knot semantic lint on all Python code without panics. It also
means we can infer types for `builtins.pyi` without panics.

To keep things simple, this PR intentionally doesn't add any new type
inference capabilities: the expanded coverage is all achieved with
`Type::Unknown`. But this puts the skeleton in place for adding better
inference of all these language features.

I also had to add basic Salsa cycle recovery (with just `Type::Unknown`
for now), because some `builtins.pyi` definitions are cyclic.

To test this, I added a comprehensive corpus of test snippets sourced
from Cinder under [MIT
license](https://github.com/facebookincubator/cinder/blob/cinder/3.10/cinderx/LICENSE),
which matches Ruff's license. I also added to this corpus some
additional snippets for newer language features: all the
`27_func_generic_*` and `73_class_generic_*` files, as well as
`20_lambda_default_arg.py`, and added a test which runs semantic-lint
over all these files. (The test doesn't assert the test-corpus files are
lint-free; just that they are able to lint without a panic.)
2024-07-25 17:38:08 -07:00
Charlie Marsh
7571da8778 Preserve trailing inline comments on import-from statements (#12498)
## Summary

Right now, in the isort comment model, there's nowhere for trailing
comments on the _statement_ to go, as in:

```python
from mylib import (
    MyClient,
    MyMgmtClient,
)  # some comment
```

If the comment is on the _alias_, we do preserve it, because we attach
it to the alias, as in:

```python
from mylib import (
    MyClient,
    MyMgmtClient,  # some comment
)
```

Similarly, if the comment is trailing on an import statement
(non-`from`), we again attach it to the alias, because it can't be
parenthesized, as in:

```python
import foo  # some comment
```

This PR adds logic to track and preserve those trailing comments.

We also no longer drop several other comments, like:

```python
from mylib import (
    # some comment
    MyClient
)
```

Closes https://github.com/astral-sh/ruff/issues/12487.
2024-07-25 17:46:58 -04:00
Alex Waygood
2ceac5f868 [red-knot] Rename some methods in the module resolver (#12517) 2024-07-25 19:28:48 +00:00
Alex Waygood
5ce80827d2 [red-knot] Refactor path.rs in the module resolver (#12494) 2024-07-25 19:29:28 +01:00
Dhruv Manilawala
e047b9685a Use docs bot email for docs publish (#12511)
Ref: https://github.com/astral-sh/uv/pull/5369
2024-07-25 21:50:00 +05:30
Dhruv Manilawala
fc16d8d04d Bump version to 0.5.5 (#12510) 2024-07-25 20:17:01 +05:30
Uriya Harpeness
175e5d7b88 Add missing traceback line in f-string-in-exception docstring. (#12508)
## Summary

Add missing traceback line in `f-string-in-exception` docstring.

Solves https://github.com/astral-sh/ruff/issues/12504.
2024-07-25 10:22:05 -04:00
Dhruv Manilawala
c03f257ed7 Add note about the breaking change in nvim-lspconfig (#12507)
Refer https://github.com/astral-sh/ruff/issues/12408
2024-07-25 14:01:16 +00:00
Dhruv Manilawala
6bbb4a28c2 Add setup docs for Zed editor (#12501)
## Summary

This PR adds the setup documentation for using Ruff with the Zed editor.

Closes: #12388
2024-07-25 13:09:17 +00:00
Micha Reiser
2ce3e3ae60 Fix the search path tests on MacOS (#12503) 2024-07-25 08:21:38 +02:00
Charlie Marsh
2a64cccb61 Avoid applying ignore-names to self and cls function names (#12497)
## Summary

Closes https://github.com/astral-sh/ruff/issues/12465.
2024-07-24 18:08:23 -04:00
Alex Waygood
928ffd6650 Ignore NPY201 inside except blocks for compatibility with older numpy versions (#12490) 2024-07-24 20:03:23 +00:00
Alex Waygood
e52be0951a [red-knot] Improve validation for search paths (#12376) 2024-07-24 15:02:25 +01:00
Dylan
889073578e [flake8-bugbear] Allow singleton tuples with starred expressions in B013 (#12484) 2024-07-24 15:19:30 +02:00
Micha Reiser
eac965ecaf [red-knot] Watch search paths (#12407) 2024-07-24 07:38:50 +00:00
Auguste Lalande
8659f2f4ea [pydoclint] Fix documentation for DOC501 (#12483)
## Summary

The doc was written backwards. mb.
2024-07-24 00:08:53 -04:00
Alex Waygood
c1b292a0dc Refactor NPY201 (#12479) 2024-07-23 18:24:20 +01:00
Micha Reiser
3af6ccb720 Fix Ord of cmp_fix (#12471) 2024-07-23 15:14:22 +02:00
Micha Reiser
f0fc6a95fe [red-knot] Lazy package file discovery (#12452)
Co-authored-by: Carl Meyer <carl@astral.sh>
2024-07-23 08:47:15 +00:00
Mateusz Sokół
f96a3c71ff Fix NumPy 2.0 rule for np.alltrue and np.sometrue (#12473)
Co-authored-by: Micha Reiser <micha@reiser.io>
2024-07-23 08:34:43 +00:00
Micha Reiser
b9b7deff17 Implement upcast_mut for new TestDb (#12470) 2024-07-23 07:11:00 +00:00
Micha Reiser
40d9324f5a [red-knot] Improved file watching (#12382) 2024-07-23 08:18:59 +02:00
Pathompong Kwangtong
a9f8bd59b2 Add Eglot setup guide for Emacs editor (#12426)
## Summary

The purpose of this change is to explain how to use ruff as a language
server in Eglot with automatic formatting because I've struggle to use
it with Eglot. I've search it online and found that there are some
people also struggle too. (See [this reddit
post](https://www.reddit.com/r/emacs/comments/118mo6w/eglot_automatic_formatting/)
and
https://github.com/astral-sh/ruff-lsp/issues/19#issuecomment-1435138828)


## Test Plan

I've use this setting myself. And I will continue maintain this part as
long as I use it.

---------

Co-authored-by: Dhruv Manilawala <dhruvmanila@gmail.com>
2024-07-23 10:50:51 +05:30
Piotr Osiewicz
143e172431 Do not bail code action resolution when a quick fix is requested (#12462)
## Summary

When working on improving Ruff integration with Zed I noticed that it
errors out when we try to resolve a code action of a `QUICKFIX` kind;
apparently, per @dhruvmanila we shouldn't need to resolve it, as the
edit is provided in the initial response for the code action. However,
it's possible for the `resolve` call to fill out other fields (such as
`command`).
AFAICT Helix also tries to resolve the code actions unconditionally (as
in, when either `edit` or `command` is absent); so does VSC. They can
still apply the quickfixes though, as they do not error out on a failed
call to resolve code actions - Zed does. Following suit on Zed's side
does not cut it though, as we still get a log request from Ruff for that
failure (which is surfaced in the UI).
There are also other language servers (such as
[rust-analyzer](c1c9e10f72/crates/rust-analyzer/src/handlers/request.rs (L1257)))
that fill out both `command` and `edit` fields as a part of code action
resolution.

This PR makes the resolve calls for quickfix actions return the input
value.

## Test Plan

N/A
2024-07-23 10:30:03 +05:30
Auguste Lalande
b2d3a05ee4 [flake8-async] Fix references in documentation not displaying (#12467)
## Summary

Fix references in documentation of several `ASYNC` rules not displaying

## Test Plan

Validated documentation now displays correctly
2024-07-22 19:38:13 -04:00
Josh Cannon
ef1ca0dd38 Fix bad markdown in CONTRIBUTING.md (#12466)
See
https://github.com/astral-sh/ruff/blob/main/CONTRIBUTING.md#import-categorization
2024-07-23 00:03:30 +01:00
Carl Meyer
c7b13bb8fc [red-knot] add cycle-free while-loop control flow (#12413)
Add support for while-loop control flow.

This doesn't yet include general support for terminals and reachability;
that is wider than just while loops and belongs in its own PR.

This also doesn't yet add support for cyclic definitions in loops; that
comes with enough of its own complexity in Salsa that I want to handle
it separately.
2024-07-22 14:27:33 -07:00
Carl Meyer
dbbe3526ef [red-knot] add while-loop to benchmark (#12464)
So we can get some signal from the benchmark result on
https://github.com/astral-sh/ruff/pull/12413
2024-07-22 14:16:56 -07:00
Carl Meyer
f22c8ab811 [red-knot] add maybe-undefined lint rule (#12414)
Add a lint rule to detect if a name is definitely or possibly undefined
at a given usage.

If I create the file `undef/main.py` with contents:

```python
x = int
def foo():
    z
    return x
if flag:
    y = x
y
```

And then run `cargo run --bin red_knot -- --current-directory
../ruff-examples/undef`, I get the output:

```
Name 'z' used when not defined.
Name 'flag' used when not defined.
Name 'y' used when possibly not defined.
```

If I modify the file to add `y = 0` at the top, red-knot re-checks it
and I get the new output:

```
Name 'z' used when not defined.
Name 'flag' used when not defined.
```

Note that `int` is not flagged, since it's a builtin, and `return x` in
the function scope is not flagged, since it refers to the global `x`.
2024-07-22 13:53:59 -07:00
Alex Waygood
2a8f95c437 [red-knot] Use a distinct type for module search paths in the module resolver (#12379) 2024-07-22 19:44:27 +00:00
Dhruv Manilawala
ea2d51c2bb Add note to include notebook files for native server (#12449)
## Summary

Similar to https://github.com/astral-sh/ruff-vscode/pull/547 but for the
online docs.

Refer to https://github.com/astral-sh/ruff-vscode/issues/546

## Preview

<img width="1728" alt="Screenshot 2024-07-22 at 14 51 40"
src="https://github.com/user-attachments/assets/39014278-c868-45b0-9058-42858a060fd8">
2024-07-22 21:40:30 +05:30
Micha Reiser
ed238e0c76 Fix incorrect placement of leading function comment with type params (#12447) 2024-07-22 14:17:00 +02:00
Micha Reiser
3ace12943e Ignore more open ai notebooks for now (#12448) 2024-07-22 14:16:48 +02:00
Dhruv Manilawala
978909fcf4 Raise syntax error for unparenthesized generator expr in multi-argument call (#12445)
## Summary

This PR fixes a bug to raise a syntax error when an unparenthesized
generator expression is used as an argument to a call when there are
more than one argument.

For reference, the grammar is:
```
primary:
    | ...
    | primary genexp 
    | primary '(' [arguments] ')' 
    | ...

genexp:
    | '(' ( assignment_expression | expression !':=') for_if_clauses ')' 
```

The `genexp` requires the parenthesis as mentioned in the grammar. So,
the grammar for a call expression is either a name followed by a
generator expression or a name followed by a list of argument. In the
former case, the parenthesis are excluded because the generator
expression provides them while in the later case, the parenthesis are
explicitly provided for a list of arguments which means that the
generator expression requires it's own parenthesis.

This was discovered in https://github.com/astral-sh/ruff/issues/12420.

## Test Plan

Add test cases for valid and invalid syntax.

Make sure that the parser from CPython also raises this at the parsing
step:
```console
$ python3.13 -m ast parser/_.py
  File "parser/_.py", line 1
    total(1, 2, x for x in range(5), 6)
                ^^^^^^^^^^^^^^^^^^^
SyntaxError: Generator expression must be parenthesized

$ python3.13 -m ast parser/_.py
  File "parser/_.py", line 1
    sum(x for x in range(10), 10)
        ^^^^^^^^^^^^^^^^^^^^
SyntaxError: Generator expression must be parenthesized
```
2024-07-22 14:44:20 +05:30
Dhruv Manilawala
f8735e1ee8 Remove unused dependencies, sync existing versions (#12446)
## Summary

This PR removes unused dependencies from `fuzz` crate and syncs the
`similar` crate to the workspace version. This will help in resolve
https://github.com/astral-sh/ruff/pull/12442.

## Test Plan

Build the fuzz crate:

For Mac (it requires the nightly build):
```
cargo +nightly fuzz build
```
2024-07-22 10:49:05 +05:30
renovate[bot]
d70ceb6a56 Update Rust crate uuid to v1.10.0 (#12444) 2024-07-21 21:50:53 -04:00
renovate[bot]
fc7d9e95b8 Update Rust crate tracing-tree to 0.4.0 (#12443) 2024-07-21 21:50:46 -04:00
renovate[bot]
b578fca9cb Update NPM Development dependencies (#12441) 2024-07-21 21:50:32 -04:00
renovate[bot]
8d3146c2b2 Update pre-commit hook astral-sh/ruff-pre-commit to v0.5.4 (#12440) 2024-07-21 21:50:27 -04:00
renovate[bot]
fa5c841154 Update Rust crate thiserror to v1.0.63 (#12437) 2024-07-21 21:49:42 -04:00
renovate[bot]
f8fcbc19d9 Update dependency react-resizable-panels to v2.0.22 (#12439) 2024-07-21 21:49:33 -04:00
renovate[bot]
97fdd48208 Update Rust crate toml to v0.8.15 (#12438) 2024-07-21 21:49:23 -04:00
renovate[bot]
731ed2e40b Update Rust crate syn to v2.0.72 (#12436) 2024-07-21 21:49:16 -04:00
Auguste Lalande
3a742c17f8 [pydoclint] Fix DOC501 panic #12428 (#12435)
## Summary

Fix panic reported in #12428. Where a string would sometimes get split
within a character boundary. This bypasses the need to split the string.

This does not guarantee the correct formatting of the docstring, but
neither did the previous implementation.

Resolves #12428 

## Test Plan

Test case added to fixture
2024-07-21 19:30:06 +00:00
TomerBin
053243635c [fastapi] Implement FAST001 (fastapi-redundant-response-model) and FAST002 (fastapi-non-annotated-dependency) (#11579)
## Summary

Implements ruff specific role for fastapi routes, and its autofix.

## Test Plan

`cargo test` / `cargo insta review`
2024-07-21 18:28:10 +00:00
Ivan Carvalho
82355712c3 Add IBM to Who is Using ruff (#12433)
<!--
Thank you for contributing to Ruff! To help us out with reviewing,
please consider the following:

- Does this pull request include a summary of the change? (See below.)
- Does this pull request include a descriptive title?
- Does this pull request include references to any relevant issues?
-->

## Summary

Just updating the README to reflect that IBM has been using ruff for a
year already: https://github.com/Qiskit/qiskit/pull/10116.
2024-07-21 11:17:24 -05:00
Auguste Lalande
4bc73dd87e [pydoclint] Implement docstring-missing-exception and docstring-extraneous-exception (DOC501, DOC502) (#11471)
## Summary

These are the first rules implemented as part of #458, but I plan to
implement more.

Specifically, this implements `docstring-missing-exception` which checks
for raised exceptions not documented in the docstring, and
`docstring-extraneous-exception` which checks for exceptions in the
docstring not present in the body.

## Test Plan

Test fixtures added for both google and numpy style.
2024-07-20 19:41:51 +00:00
T-256
53b84ab054 Cleanup redundant spaces from changelog (#12424) 2024-07-20 17:46:15 +00:00
Charlie Marsh
3664f85f45 Bump version to v0.5.4 (#12423) 2024-07-20 17:28:13 +00:00
Charlie Marsh
2c1926beeb Insert parentheses for multi-argument generators (#12422)
## Summary

Closes https://github.com/astral-sh/ruff/issues/12420.
2024-07-20 16:41:55 +00:00
Charlie Marsh
4bcc96ae51 Avoid shadowing diagnostics for @override methods (#12415)
Closes https://github.com/astral-sh/ruff/issues/12412.
2024-07-19 21:32:33 -04:00
FishAlchemist
c0a2b49bac Fix the Github link error for Neovim in the setup for editors in the docs. (#12410)
## Summary

Fix Github link error for Neovim setup editors .

## Test Plan
Click Neovim Github link with mkdocs on local.
2024-07-19 16:24:12 -04:00
Sashko
ca22248628 Update docs Settings output-format default (#12409)
## Update docs Settings output-format default

Fixes https://github.com/astral-sh/ruff/issues/12350

## Test Plan

Run all automation mentioned here
fe04f2b09d/CONTRIBUTING.md (development)

Manually verified changes in the generated MkDocs site.

Co-authored-by: Oleksandr Zavertniev <oleksandr.zavertniev@yellowbrick.com>
2024-07-19 17:51:46 +00:00
Alex Waygood
d8cf8ac2ef [red-knot] Resolve symbols from builtins.pyi in the stdlib if they cannot be found in other scopes (#12390)
Co-authored-by: Carl Meyer <carl@astral.sh>
2024-07-19 17:44:56 +01:00
Carl Meyer
1c7b84059e [red-knot] fix incremental benchmark (#12400)
We should write `BAR_CODE` to `bar.py`, not to `foo.py`.
2024-07-19 08:32:37 -07:00
Carl Meyer
f82bb67555 [red-knot] trace file when inferring types (#12401)
When poring over traces, the ones that just include a definition or
symbol or expression ID aren't very useful, because you don't know which
file it comes from. This adds that information to the trace.

I guess the downside here is that if calling `.file(db)` on a
scope/definition/expression would execute other traced code, it would be
marked as outside the span? I don't think that's a concern, because I
don't think a simple field access on a tracked struct should ever
execute our code. If I'm wrong and this is a problem, it seems like the
tracing crate has this feature where you can record a field as
`tracing::field::Empty` and then fill in its value later with
`span.record(...)`, but when I tried this it wasn't working for me, not
sure why.

I think there's a lot more we can do to make our tracing output more
useful for debugging (e.g. record an event whenever a
definition/symbol/expression/use id is created with the details of that
definition/symbol/expression/use), this is just dipping my toes in the
water.
2024-07-19 07:13:51 -07:00
Alex Waygood
5f96f69151 [red-knot] Fix bug where module resolution would not be invalidated if an entire package was deleted (#12378) 2024-07-19 13:53:09 +01:00
Micha Reiser
ad19b3fd0e [red-knot] Add verbosity argument to CLI (#12404) 2024-07-19 11:38:24 +00:00
Carl Meyer
a62e2d2000 [red-knot] preparse builtins in without_parse benchmark (#12395) 2024-07-19 05:58:27 +00:00
Dylan
d61747093c [ruff] Rename RUF007 to zip-instead-of-pairwise (#12399)
## Summary

<!-- What's the purpose of the change? What does it do, and why? -->

Renames the rule
[RUF007](https://docs.astral.sh/ruff/rules/pairwise-over-zipped/) from
`pairwise-over-zipped` to `zip-instead-of-pairwise`. This closes #12397.

Specifically, in this PR:

- The file containing the rule was renamed
- The struct was renamed
- The function implementing the rule was renamed

## Testing

<!-- How was it tested? -->

- `cargo test`
- Docs re-built locally and verified that new rule name is displayed.
(Screenshots below).

<img width="939" alt="New rule name in rule summary"
src="https://github.com/user-attachments/assets/bf638bc9-1b7a-4675-99bf-e4de88fec167">

<img width="805" alt="New rule name in rule details"
src="https://github.com/user-attachments/assets/6fffd745-2568-424a-84e5-f94a41351022">
2024-07-18 19:26:27 -04:00
ukyen
0ba7fc63d0 [pydocstyle] Escaped docstring in docstring (D301 ) (#12192)
<!--
Thank you for contributing to Ruff! To help us out with reviewing,
please consider the following:

- Does this pull request include a summary of the change? (See below.)
- Does this pull request include a descriptive title?
- Does this pull request include references to any relevant issues?
-->

## Summary

<!-- What's the purpose of the change? What does it do, and why? -->
This PR updates D301 rule to allow inclduing escaped docstring, e.g.
`\"""Foo.\"""` or `\"\"\"Bar.\"\"\"`, within a docstring.

Related issue: #12152 

## Test Plan

Add more test cases to D301.py and update the snapshot file.

<!-- How was it tested? -->
2024-07-18 18:36:05 -04:00
Carl Meyer
fa5b19d4b6 [red-knot] use a simpler builtin in the benchmark (#12393)
In preparation for supporting resolving builtins, simplify the benchmark
so it doesn't look up `str`, which is actually a complex builtin to deal
with because it inherits `Sequence[str]`.

Co-authored-by: Alex Waygood <alex.waygood@gmail.com>
2024-07-18 14:04:33 -07:00
Carl Meyer
181e7b3c0d [red-knot] rename module_global to global (#12385)
Per comments in https://github.com/astral-sh/ruff/pull/12269, "module
global" is kind of long, and arguably redundant.

I tried just using "module" but there were too many cases where I felt
this was ambiguous. I like the way "global" works out better, though it
does require an understanding that in Python "global" generally means
"module global" not "globally global" (though in a sense module globals
are also globally global since modules are singletons).
2024-07-18 13:05:30 -07:00
Carl Meyer
519eca9fe7 [red-knot] support implicit global name lookups (#12374)
Support falling back to a global name lookup if a name isn't defined in
the local scope, in the cases where that is correct according to Python
semantics.

In class scopes, a name lookup checks the local namespace first, and if
the name isn't found there, looks it up in globals.

In function scopes (and type parameter scopes, which are function-like),
if a name has any definitions in the local scope, it is a local, and
accessing it when none of those definitions have executed yet just
results in an `UnboundLocalError`, it does not fall back to a global. If
the name does not have any definitions in the local scope, then it is an
implicit global.

Public symbol type lookups never include such a fall back. For example,
if a name is not defined in a class scope, it is not available as a
member on that class, even if a name lookup within the class scope would
have fallen back to a global lookup.

This PR makes the `@override` lint rule work again.

Not yet included/supported in this PR:

* Support for free variables / closures: a free symbol in a nested
function-like scope referring to a symbol in an outer function-like
scope.
* Support for `global` and `nonlocal` statements, which force a symbol
to be treated as global or nonlocal even if it has definitions in the
local scope.
* Module-global lookups should fall back to builtins if the name isn't
found in the module scope.

I would like to expose nicer APIs for the various kinds of symbols
(explicit global, implicit global, free, etc), but this will also wait
for a later PR, when more kinds of symbols are supported.
2024-07-18 10:50:43 -07:00
Dhruv Manilawala
f0d589d7a3 Provide custom job permissions to cargo-dist (#12386)
We can't just directly update the `release.yml` file because that's
auto-generated using `cargo-dist`. So, update the permissions in
`Cargo.toml` and then use `cargo dist generate` to make sure there's no
diff.
2024-07-18 16:49:38 +00:00
Dhruv Manilawala
512c8b2cc5 Provide contents read permission to wasm publish job (#12384)
The job has asked for the permission:
811f78d94d/.github/workflows/publish-wasm.yml (L25)
2024-07-18 22:02:49 +05:30
Carl Meyer
811f78d94d [red-knot] small efficiency improvements and bugfixes to use-def map building (#12373)
Adds inference tests sufficient to give full test coverage of the
`UseDefMapBuilder::merge` method.

In the process I realized that we could implement visiting of if
statements in `SemanticBuilder` with fewer `snapshot`, `restore`, and
`merge` operations, so I restructured that visit a bit.

I also found one correctness bug in the `merge` method (it failed to
extend the given snapshot with "unbound" for any missing symbols,
meaning we would just lose the fact that the symbol could be unbound in
the merged-in path), and two efficiency bugs (if one of the ranges to
merge is empty, we can just use the other one, no need for copies, and
if the ranges are overlapping -- which can occur with nested branches --
we can still just merge them with no copies), and fixed all three.
2024-07-18 09:24:58 -07:00
Dhruv Manilawala
8f1be31289 Update 0.5.3 changelog caption (#12383)
As suggested in
https://github.com/astral-sh/ruff/pull/12381#discussion_r1683123202
2024-07-18 16:17:07 +00:00
Dhruv Manilawala
8cfbac71a4 Bump version to 0.5.3 (#12381) 2024-07-18 16:07:34 +00:00
Charlie Marsh
9460857932 Migrate to standalone docs repo (#12341)
## Summary

See: https://github.com/astral-sh/uv/pull/5081
2024-07-18 15:35:49 +00:00
Dhruv Manilawala
a028ca22f0 Add VS Code specific extension settings (#12380)
## Summary

This PR adds VS Code specific extension settings in the online
documentation.

The content is basically taken from the `package.json` file in the
`ruff-vscode` repository.
2024-07-18 20:58:14 +05:30
Dhruv Manilawala
7953f6aa79 Update versioning policy for editor integration (#12375)
## Summary

Following the stabilization of the Ruff language server, we need to
update our versioning policy to account for any changes in it. This
could be server settings, capability, etc.

This PR also adds a new section for the VS Code extension which is
adopted from [Biome's versioning
policy](https://biomejs.dev/internals/versioning/#visual-studio-code-extension)
for the same.

---------

Co-authored-by: Zanie Blue <contact@zanie.dev>
2024-07-18 15:17:36 +00:00
Charlie Marsh
764d9ab4ee Allow repeated-equality-comparison for mixed operations (#12369)
## Summary

This PR allows us to fix both expressions in `foo == "a" or foo == "b"
or ("c" != bar and "d" != bar)`, but limits the rule to consecutive
comparisons, following https://github.com/astral-sh/ruff/issues/7797.

I think this logic was _probably_ added because of
https://github.com/astral-sh/ruff/pull/12368 -- the intent being that
we'd replace the _entire_ expression.
2024-07-18 11:16:40 -04:00
Charlie Marsh
9b9d701500 Allow additional arguments for sum and max comprehensions (#12364)
## Summary

These can have other arguments, so it seems wrong to gate on single
argument here.

Closes https://github.com/astral-sh/ruff/issues/12358.
2024-07-18 08:37:28 -04:00
Dhruv Manilawala
648cca199b Add docs for Ruff language server (#12344)
## Summary

This PR adds documentation for the Ruff language server.

It mainly does the following:
1. Combines various READMEs containing instructions for different editor
setup in their respective section on the online docs
2. Provide an enumerated list of server settings. Additionally, it also
provides a section for VS Code specific options.
3. Adds a "Features" section which enumerates all the current
capabilities of the native server

For (2), the settings documentation is done manually but a future
improvement (easier after `ruff-lsp` is deprecated) is to move the docs
in to Rust struct and generate the documentation from the code itself.
And, the VS Code extension specific options can be generated by diffing
against the `package.json` in `ruff-vscode` repository.

### Structure

1. Setup: This section contains the configuration for setting up the
language server for different editors
2. Features: This section contains a list of capabilities provided by
the server along with short GIF to showcase it
3. Settings: This section contains an enumerated list of settings in a
similar format to the one for the linter / formatter
4. Migrating from `ruff-lsp`

> [!NOTE]
>
> The settings page is manually written but could possibly be
auto-generated via a macro similar to `OptionsMetadata` on the
`ClientSettings` struct

resolves: #11217 

## Test Plan

Generate and open the documentation locally using:
1. `python scripts/generate_mkdocs.py`
2. `mkdocs serve -f mkdocs.insiders.yml`
2024-07-18 17:41:43 +05:30
Dhruv Manilawala
2e77b775b0 Consider --preview flag for server subcommand (#12208)
## Summary

This PR removes the requirement of `--preview` flag to run the `ruff
server` and instead considers it to be an indicator to turn on preview
mode for the linter and the formatter.

resolves: #12161 

## Test Plan

Add test cases to assert the `preview` value is updated accordingly.

In an editor context, I used the local `ruff` executable in Neovim with
the `--preview` flag and verified that the preview-only violations are
being highlighted.

Running with:
```lua
require('lspconfig').ruff.setup({
  cmd = {
    '/Users/dhruv/work/astral/ruff/target/debug/ruff',
    'server',
    '--preview',
  },
})
```
The screenshot shows that `E502` is highlighted with the below config in
`pyproject.toml`:

<img width="877" alt="Screenshot 2024-07-17 at 16 43 09"
src="https://github.com/user-attachments/assets/c7016ef3-55b1-4a14-bbd3-a07b1bcdd323">
2024-07-18 11:05:01 +05:30
Dhruv Manilawala
ebe5b06c95 Use fallback settings when indexing the project (#12362)
## Summary

This PR updates the settings index building logic in the language server
to consider the fallback settings for applying ignore filters in
`WalkBuilder` and the exclusion via `exclude` / `extend-exclude`.

This flow matches the one in the `ruff` CLI where the root settings is
built by (1) finding the workspace setting in the ancestor directory (2)
finding the user configuration if that's missing and (3) fallback to
using the default configuration.

Previously, the index building logic was being executed before (2) and
(3). This PR reverses the logic so that the exclusion /
`respect_gitignore` is being considered from the default settings if
there's no workspace / user settings. This has the benefit that the
server no longer enters the `.git` directory or any other excluded
directory when a user opens a file in the home directory.

Related to #11366

## Test plan

Opened a test file from the home directory and confirmed with the debug
trace (removed in #12360) that the server excludes the `.git` directory
when indexing.
2024-07-18 09:16:45 +05:30
Carl Meyer
b2a49d8140 [red-knot] better docs for use-def maps (#12357)
Add better doc comments and comments, as well as one debug assertion, to
use-def map building.
2024-07-17 17:50:58 -07:00
Carl Meyer
985a999234 [red-knot] better docs for type inference (#12356)
Add some docs for how type inference works.

Also a couple minor code changes to rearrange or rename for better
clarity.
2024-07-17 13:36:58 -07:00
cake-monotone
1df51b1fbf [pyupgrade] Implement unnecessary-default-type-args (UP043) (#12371)
## Summary

Add new rule and implement for `unnecessary default type arguments`
under the `UP` category (`UP043`).

```py
// < py313
Generator[int, None, None] 

// >= py313
Generator[int]
```

I think that as Python 3.13 develops, there might be more default type
arguments added besides `Generator` and `AsyncGenerator`. So, I made
this more flexible to accommodate future changes.

related issue: #12286

## Test Plan

snapshot included..!
2024-07-17 19:45:43 +00:00
Charlie Marsh
1435b0f022 Remove discard, remove, and pop allowance for loop-iterator-mutation (#12365)
## Summary

Pretty sure this should still be an error, but also, I think I added
this because of ecosystem CI? So want to see what pops up.

Closes https://github.com/astral-sh/ruff/issues/12164.
2024-07-17 17:42:14 +00:00
Charlie Marsh
e39298dcbc Use UTF-8 as default encoding in unspecified-encoding fix (#12370)
## Summary

This is the _intended_ default that PEP 597 _wants_, but it's not
backwards compatible. The fix is already unsafe, so it's better for us
to recommend the desired and expected behavior.

Closes https://github.com/astral-sh/ruff/issues/12069.
2024-07-17 12:57:27 -04:00
Charlie Marsh
1de8ff3308 Detect enumerate iterations in loop-iterator-mutation (#12366)
## Summary

Closes https://github.com/astral-sh/ruff/issues/12164.
2024-07-17 12:03:36 -04:00
Charlie Marsh
72e02206d6 Avoid dropping extra boolean operations in repeated-equality-comparison (#12368)
## Summary

Closes https://github.com/astral-sh/ruff/issues/12062.
2024-07-17 11:49:27 -04:00
Charlie Marsh
80f0116641 Ignore self and cls when counting arguments (#12367)
## Summary

Closes https://github.com/astral-sh/ruff/issues/12320.
2024-07-17 10:49:38 -04:00
Micha Reiser
79b535587b [red-knot] Reload notebook on file change (#12361) 2024-07-17 12:23:48 +00:00
Dhruv Manilawala
6e0cbe0f35 Remove leftover debug log (#12360)
This was a leftover from #12299
2024-07-17 17:52:44 +05:30
Micha Reiser
91338ae902 [red-knot] Add basic workspace support (#12318) 2024-07-17 11:34:21 +02:00
Micha Reiser
0c72577b5d [red-knot] Add notebook support (#12338) 2024-07-17 08:26:33 +00:00
Matthew Runyon
fe04f2b09d Publish wasm API to npm (#12317) 2024-07-17 08:50:38 +02:00
Carl Meyer
073588b48e [red-knot] improve semantic index tests (#12355)
Improve semantic index tests with better assertions than just `.len()`,
and re-add use-definition test that was commented out in the switch to
Salsa initially.
2024-07-16 23:46:49 -07:00
Alex Waygood
9a2dafb43d [red-knot] Add support for editable installs to the module resolver (#12307)
Co-authored-by: Micha Reiser <micha@reiser.io>
Co-authored-by: Carl Meyer <carl@astral.sh>
2024-07-16 18:17:47 +00:00
Carl Meyer
595b1aa4a1 [red-knot] per-definition inference, use-def maps (#12269)
Implements definition-level type inference, with basic control flow
(only if statements and if expressions so far) in Salsa.

There are a couple key ideas here:

1) We can do type inference queries at any of three region
granularities: an entire scope, a single definition, or a single
expression. These are represented by the `InferenceRegion` enum, and the
entry points are the salsa queries `infer_scope_types`,
`infer_definition_types`, and `infer_expression_types`. Generally
per-scope will be used for scopes that we are directly checking and
per-definition will be used anytime we are looking up symbol types from
another module/scope. Per-expression should be uncommon: used only for
the RHS of an unpacking or multi-target assignment (to avoid
re-inferring the RHS once per symbol defined in the assignment) and for
test nodes in type narrowing (e.g. the `test` of an `If` node). All
three queries return a `TypeInference` with a map of types for all
definitions and expressions within their region. If you do e.g.
scope-level inference, when it hits a definition, or an
independently-inferable expression, it should use the relevant query
(which may already be cached) to get all types within the smaller
region. This avoids double-inferring smaller regions, even though larger
regions encompass smaller ones.

2) Instead of building a control-flow graph and lazily traversing it to
find definitions which reach a use of a name (which is O(n^2) in the
worst case), instead semantic indexing builds a use-def map, where every
use of a name knows which definitions can reach that use. We also no
longer track all definitions of a symbol in the symbol itself; instead
the use-def map also records which defs remain visible at the end of the
scope, and considers these the publicly-visible definitions of the
symbol (see below).

Major items left as TODOs in this PR, to be done in follow-up PRs:

1) Free/global references aren't supported yet (only lookup based on
definitions in current scope), which means the override-check example
doesn't currently work. This is the first thing I'll fix as follow-up to
this PR.

2) Control flow outside of if statements and expressions.

3) Type narrowing.

There are also some smaller relevant changes here:

1) Eliminate `Option` in the return type of member lookups; instead
always return `Type::Unbound` for a name we can't find. Also use
`Type::Unbound` for modules we can't resolve (not 100% sure about this
one yet.)

2) Eliminate the use of the terms "public" and "root" to refer to
module-global scope or symbols. Instead consistently use the term
"module-global". It's longer, but it's the clearest, and the most
consistent with typical Python terminology. In particular I don't like
"public" for this use because it has other implications around author
intent (is an underscore-prefixed module-global symbol "public"?). And
"root" is just not commonly used for this in Python.

3) Eliminate the `PublicSymbol` Salsa ingredient. Many non-module-global
symbols can also be seen from other scopes (e.g. by a free var in a
nested scope, or by class attribute access), and thus need to have a
"public type" (that is, the type not as seen from a particular use in
the control flow of the same scope, but the type as seen from some other
scope.) So all symbols need to have a "public type" (here I want to keep
the use of the term "public", unless someone has a better term to
suggest -- since it's "public type of a symbol" and not "public symbol"
the confusion with e.g. initial underscores is less of an issue.) At
least initially, I would like to try not having special handling for
module-global symbols vs other symbols.

4) Switch to using "definitions that reach end of scope" rather than
"all definitions" in determining the public type of a symbol. I'm
convinced that in general this is the right way to go. We may want to
refine this further in future for some free-variable cases, but it can
be changed purely by making changes to the building of the use-def map
(the `public_definitions` index in it), without affecting any other
code. One consequence of combining this with no control-flow support
(just last-definition-wins) is that some inference tests now give more
wrong-looking results; I left TODO comments on these tests to fix them
when control flow is added.

And some potential areas for consideration in the future:

1) Should `symbol_ty` be a Salsa query? This would require making all
symbols a Salsa ingredient, and tracking even more dependencies. But it
would save some repeated reconstruction of unions, for symbols with
multiple public definitions. For now I'm not making it a query, but open
to changing this in future with actual perf evidence that it's better.
2024-07-16 11:02:30 -07:00
Charlie Marsh
30cef67b45 Remove BindingKind::ComprehensionVar (#12347)
## Summary

This doesn't seem to be used anywhere. Maybe it mattered when we didn't
handle generator scopes properly?
2024-07-16 11:18:04 -04:00
Charlie Marsh
d0c5925672 Consider expression before statement when determining binding kind (#12346)
## Summary

I believe these should always bind more tightly -- e.g., in:

```python
for _ in bar(baz for foo in [1]):
    pass
```

The inner `baz` and `foo` should be considered comprehension variables,
not for loop bindings.

We need to revisit this more holistically. In some of these cases,
`BindingKind` should probably be a flag, not an enum, since the values
aren't mutually exclusive. Separately, we should probably be more
precise in how we set it (e.g., by passing down from the parent rather
than sniffing in `handle_node_store`).

Closes https://github.com/astral-sh/ruff/issues/12339
2024-07-16 14:49:26 +00:00
Micha Reiser
b1487b6b4f Ignore more OpenAI notebooks with syntax errors in the ecosystem check (#12342) 2024-07-16 10:32:00 +02:00
Micha Reiser
85ae02d62e [red-knot] Add walk_directories to System (#12297) 2024-07-16 06:40:10 +00:00
konsti
9a817a2922 Insert empty line between suite and alternative branch after def/class (#12294)
When there is a function or class definition at the end of a suite
followed by the beginning of an alternative block, we have to insert a
single empty line between them.

In the if-else-statement example below, we insert an empty line after
the `foo` in the if-block, but none after the else-block `foo`, since in
the latter case the enclosing suite already adds empty lines.

```python
if sys.version_info >= (3, 10):
    def foo():
        return "new"
else:
    def foo():
        return "old"
class Bar:
    pass
```

To do so, we track whether the current suite is the last one in the
current statement with a new option on the suite kind.

Fixes #12199

---------

Co-authored-by: Micha Reiser <micha@reiser.io>
2024-07-15 12:59:33 +02:00
Dhruv Manilawala
ecd4b4d943 Build settings index in parallel for the native server (#12299)
## Summary

This PR updates the server to build the settings index in parallel using
similar logic as `python_files_in_path`.

This should help with https://github.com/astral-sh/ruff/issues/11366 but
ideally we would want to build it lazily.

## Test Plan

`cargo insta test`
2024-07-15 09:57:54 +00:00
github-actions[bot]
b9a8cd390f Sync vendored typeshed stubs (#12325)
Close and reopen this PR to trigger CI

Co-authored-by: typeshedbot <>
2024-07-15 07:46:55 +01:00
renovate[bot]
2348714081 Update pre-commit dependencies (#12330) 2024-07-15 07:27:10 +01:00
renovate[bot]
3817b207cf Update NPM Development dependencies (#12331)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2024-07-15 08:08:10 +02:00
renovate[bot]
b1cf9ea663 Update Rust crate clap_complete_command to 0.6.0 (#12332)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2024-07-15 08:05:07 +02:00
renovate[bot]
8ad10b9307 Update Rust crate compact_str to 0.8.0 (#12333)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
Co-authored-by: Micha Reiser <micha@reiser.io>
2024-07-15 06:03:23 +00:00
renovate[bot]
9c5524a9a2 Update Rust crate tikv-jemallocator to 0.6.0 (#12335)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2024-07-15 08:01:43 +02:00
renovate[bot]
1530223311 Update Rust crate serde_with to v3.9.0 (#12334)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2024-07-15 07:58:18 +02:00
renovate[bot]
b9671522c4 Update Rust crate thiserror to v1.0.62 (#12329)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2024-07-15 05:54:00 +00:00
renovate[bot]
9918202422 Update Rust crate syn to v2.0.71 (#12328)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2024-07-15 07:52:09 +02:00
renovate[bot]
42e7147860 Update Rust crate clap to v4.5.9 (#12326)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2024-07-15 07:51:48 +02:00
renovate[bot]
25feab93f8 Update Rust crate matchit to v0.8.4 (#12327)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2024-07-15 07:50:58 +02:00
Charlie Marsh
dc8db1afb0 Make some amendments to the v0.5.2 changelog (#12319) 2024-07-14 14:47:51 +00:00
Charlie Marsh
18c364d5df [flake8-bandit] Support explicit string concatenations in S310 HTTP detection (#12315)
Closes https://github.com/astral-sh/ruff/issues/12314.
2024-07-14 10:44:08 -04:00
Charlie Marsh
7a7c601d5e Bump version to v0.5.2 (#12316) 2024-07-14 10:43:58 -04:00
Charlie Marsh
3bfbbbc78c Avoid allocation when validating HTTP and HTTPS prefixes (#12313) 2024-07-13 17:25:02 -04:00
Tim Chan
1a3ee45b23 [flake8-bandit] Avoid S310 violations for HTTP-safe f-strings (#12305)
this resolves https://github.com/astral-sh/ruff/issues/12245
2024-07-13 20:57:05 +00:00
Charlie Marsh
65848869d5 [refurb] Make list-reverse-copy an unsafe fix (#12303)
## Summary

I don't know that there's more to do here. We could consider not raising
the violation at all for arguments, but that would have some false
negatives and could also be surprising to users.

Closes https://github.com/astral-sh/ruff/issues/12267.
2024-07-13 15:45:35 -04:00
Charlie Marsh
456d6a2fb2 Consider with blocks as single-item branches (#12311)
## Summary

Ensures that, e.g., the following is not considered a
redefinition-without-use:

```python
import contextlib

foo = None
with contextlib.suppress(ImportError):
    from some_module import foo
```

Closes https://github.com/astral-sh/ruff/issues/12309.
2024-07-13 15:22:17 -04:00
Charlie Marsh
940df67823 Omit code frames for fixes with empty ranges (#12304)
## Summary

Closes https://github.com/astral-sh/ruff/issues/12291.

## Test Plan

```shell
❯ cargo run check ../uv/foo --select INP
/Users/crmarsh/workspace/uv/foo/bar/baz.py:1:1: INP001 File `/Users/crmarsh/workspace/uv/foo/bar/baz.py` is part of an implicit namespace package. Add an `__init__.py`.
Found 1 error.
```
2024-07-12 15:21:28 +00:00
Charlie Marsh
e58713e2ac Make cache-write failures non-fatal (#12302)
## Summary

Closes https://github.com/astral-sh/ruff/issues/12284.
2024-07-12 10:33:54 -04:00
Charlie Marsh
aa5c53b38b Remove 'non-obvious' allowance for E721 (#12300)
## Summary

I don't fully understand the purpose of this. In #7905, it was just
copied over from the previous non-preview implementation. But it means
that (e.g.) we don't treat `type(self.foo)` as a type -- which is wrong.

Closes https://github.com/astral-sh/ruff/issues/12290.
2024-07-12 09:21:43 -04:00
Charlie Marsh
4e6ecb2348 Treat not operations as boolean tests (#12301)
## Summary

Closes https://github.com/astral-sh/ruff/issues/12285.
2024-07-12 08:53:37 -04:00
Alex Waygood
6febd96dfe [red-knot] Add a read_directory() method to the ruff_db::system::System trait (#12289) 2024-07-12 12:31:05 +00:00
Matthias
17e84d5f40 [numpy] Update NPY201: add np.NAN to exception (#12292)
Co-authored-by: Micha Reiser <micha@reiser.io>
2024-07-12 12:09:55 +00:00
Victorien
b6545ce5d6 Use indentation consistently (#12293) 2024-07-12 14:08:56 +02:00
Dhruv Manilawala
90e9aae3f4 Consider nested configs for settings reloading (#12253)
## Summary

This PR fixes a bug in the settings reloading logic to consider nested
configuration in a workspace.

fixes: #11766

## Test Plan


https://github.com/astral-sh/ruff/assets/67177269/69704b7b-44b9-4cc7-b5a7-376bf87c6ef4
2024-07-12 05:00:08 +00:00
Micha Reiser
bd01004a42 Use space separator before parenthesiszed expressions in comprehensions with leading comments. (#12282) 2024-07-11 22:38:12 +02:00
Gaétan Lepage
d0298dc26d Explicitly add schemars to ruff_python_ast Cargo.toml (#12275)
Co-authored-by: Micha Reiser <micha@reiser.io>
2024-07-11 06:46:34 +00:00
Jack Desert
bbb9fe1692 [Docs] Clear instruction for single quotes (linter and formatter) (#12015)
## Summary

In order to use single quotes with both the ruff linter and the ruff
formatter,
two different rules must be applied. This was not clear to me when 
internet searching "configure ruff single quotes" and it eventually
I filed this issue:

https://github.com/astral-sh/ruff/issues/12003
2024-07-10 16:29:29 +00:00
Alex Waygood
5b21922420 [red-knot] Add more stress tests for module resolver invalidation (#12272) 2024-07-10 14:34:06 +00:00
Micha Reiser
abcf07c8c5 Change File::touch_path to only take a SystemPath (#12273) 2024-07-10 12:15:14 +00:00
Alex Waygood
e8b5341c97 [red-knot] Rework module resolver tests (#12260) 2024-07-10 10:40:21 +00:00
Auguste Lalande
880c31d164 [flake8-async] Update ASYNC116 to match upstream (#12266)
Co-authored-by: Micha Reiser <micha@reiser.io>
2024-07-10 09:58:33 +02:00
Auguste Lalande
d365f1a648 [flake8-async] Update ASYNC115 to match upstream (#12262)
Co-authored-by: Micha Reiser <micha@reiser.io>
2024-07-10 09:43:11 +02:00
Micha Reiser
4cc7bc9d32 Use more threads when discovering python files (#12258) 2024-07-10 09:29:17 +02:00
Dhruv Manilawala
0bb2fc6eec Conside include, extend-include for the native server (#12252)
## Summary

This PR updates the native server to consider the `include` and
`extend-include` file resolver settings.

fixes: #12242 

## Test Plan

Note: Settings reloading doesn't work for nested configs which is fixed
in #12253 so the preview here only showcases root level config.

https://github.com/astral-sh/ruff/assets/67177269/e8969128-c175-4f98-8114-0d692b906cc8
2024-07-10 04:12:57 +00:00
Auguste Lalande
855d62cdde [flake8-async] Update ASYNC110 to match upstream (#12261)
## Summary

Update the name of `ASYNC110` to match
[upstream](https://flake8-async.readthedocs.io/en/latest/rules.html).

Also update to the functionality to match upstream by adding support for
`asyncio` and `anyio` (gated behind preview).

Part of https://github.com/astral-sh/ruff/issues/12039.

## Test Plan

Added tests for `asyncio` and `anyio`
2024-07-09 17:17:28 -07:00
Auguste Lalande
88abc6aed8 [flake8-async] Update ASYNC100 to match upstream (#12221)
## Summary

Update the name of `ASYNC100` to match
[upstream](https://flake8-async.readthedocs.io/en/latest/rules.html).

Also update to the functionality to match upstream by supporting
additional context managers from asyncio and anyio. Matching this
[list](https://flake8-async.readthedocs.io/en/latest/glossary.html#timeout-context).

Part of #12039.

## Test Plan

Added the new context managers to the fixture.
2024-07-09 17:55:18 +00:00
Alex Waygood
6fa4e32ad3 [red-knot] Use vendored typeshed stubs for stdlib module resolution (#12224) 2024-07-09 09:21:52 +00:00
Alex Waygood
000dabcd88 [red-knot] Allow module-resolution options to be specified via the CLI (#12246) 2024-07-09 09:16:28 +00:00
Micha Reiser
f8ff42a13d [red-knot] Prevent salsa cancellation from aborting the program (#12183) 2024-07-09 08:26:15 +00:00
Micha Reiser
b5834d57af [red-knot] Only store absolute paths in Files (#12215) 2024-07-09 09:52:13 +02:00
renovate[bot]
3d3ff10bb9 Update dependency react-resizable-panels to v2.0.20 (#12231)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2024-07-09 07:26:08 +00:00
Micha Reiser
ac04380f36 [red-knot] Rename FileSystem to System (#12214) 2024-07-09 07:20:51 +00:00
Auguste Lalande
16a63c88cf [flake8-async] Update ASYNC109 to match upstream (#12236)
## Summary

Update the name of `ASYNC109` to match
[upstream](https://flake8-async.readthedocs.io/en/latest/rules.html).

Also update to the functionality to match upstream by supporting
additional context managers from `asyncio` and `anyio`. This doesn't
change any of the detection functionality, but recommends additional
context managers from `asyncio` and `anyio` depending on context.

Part of https://github.com/astral-sh/ruff/issues/12039.

## Test Plan

Added fixture for asyncio recommendation
2024-07-09 04:14:27 +00:00
Dani Bodor
10f07d88a2 Update help and documentation for --output-format to reflect "full" default (#12248)
fix #12247 

changed help to list "full" as the default for --output-format and
removed "text" as an option (as this is no longer supported).
2024-07-09 02:45:24 +00:00
Evan Rittenhouse
1e04bd0b73 Restrict fowarding newline argument in open() calls to Python versions >= 3.10 (#12244)
Fixes https://github.com/astral-sh/ruff/issues/12222
2024-07-09 02:43:31 +00:00
epenet
2041b0e5fb [flake8-return] Exempt properties from explicit return rule (RET501) (#12243)
First contribution - apologies if something is missing

Fixes #12197
2024-07-08 19:39:30 -07:00
Micha Reiser
bf3d903939 Warn about D203 formatter incompatibility (#12238) 2024-07-08 15:12:14 +02:00
Micha Reiser
64855c5f06 Remove default-run from 'red_knot' crate (#12241) 2024-07-08 09:31:45 +00:00
renovate[bot]
b5ab4ce293 Update pre-commit dependencies (#12232) 2024-07-08 01:51:24 +00:00
renovate[bot]
c396b9f08b Update cloudflare/wrangler-action action to v3.7.0 (#12235) 2024-07-07 21:41:24 -04:00
renovate[bot]
9ed3893e6d Update Rust crate ureq to v2.10.0 (#12234) 2024-07-07 21:41:18 -04:00
renovate[bot]
30c9604c1d Update NPM Development dependencies (#12233) 2024-07-07 21:41:09 -04:00
renovate[bot]
7e4a1c2b33 Update Rust crate syn to v2.0.69 (#12230) 2024-07-07 21:40:42 -04:00
renovate[bot]
e379160941 Update Rust crate serde_with to v3.8.3 (#12229) 2024-07-07 21:40:35 -04:00
renovate[bot]
38b503ebcc Update Rust crate serde_json to v1.0.120 (#12228) 2024-07-07 21:40:29 -04:00
renovate[bot]
dac476f2c0 Update Rust crate serde to v1.0.204 (#12227) 2024-07-07 21:40:20 -04:00
renovate[bot]
754e5d6a7d Update Rust crate imara-diff to v0.1.6 (#12226) 2024-07-07 21:40:03 -04:00
Alex Waygood
d9c15e7a12 [Ecosystem checks] trio has changed its default branch name to main (#12225)
Fixes CI errors seen in
https://github.com/astral-sh/ruff/pull/12224#issuecomment-2212594024

x-ref
17b3644f64
2024-07-07 23:37:27 +01:00
Trim21
757c75752e [flake8-bandit] fix S113 false positive for httpx without timeout argument (#12213)
## Summary

S113 exists because `requests` doesn't have a default timeout, so
request without timeout may hang indefinitely

> B113: Test for missing requests timeout
This plugin test checks for requests or httpx calls without a timeout
specified.
>
> Nearly all production code should use this parameter in nearly all
requests, **Failure to do so can cause your program to hang
indefinitely.**


But httpx has default timeout 5s, so S113 for httpx request without
`timeout` argument is a false positive, only valid case would be
`timeout=None`.

https://www.python-httpx.org/advanced/timeouts/

> HTTPX is careful to enforce timeouts everywhere by default.
>
> The default behavior is to raise a TimeoutException after 5 seconds of
network inactivity.


## Test Plan

snap updated
2024-07-06 14:08:40 -05:00
Micha Reiser
9d61727289 [red-knot] Exclude drop time in benchmark (#12218) 2024-07-06 17:35:00 +02:00
Alex Waygood
a62a432a48 [red-knot] Respect typeshed's VERSIONS file when resolving stdlib modules (#12141) 2024-07-05 22:43:31 +00:00
Charlie Marsh
8198723201 Move SELinux docs to example (#12211) 2024-07-05 20:42:43 +00:00
Maximilian Kolb
7df10ea3e9 Docs: Respect SELinux with podman for docker mount (#12102)
Tested on Fedora 40 with Podman 5.1.1 and ruff "0.5.0" and "latest".
source: https://unix.stackexchange.com/q/651198


## Error without fix

````
$ podman run --rm -it -v .:/io ghcr.io/astral-sh/ruff:latest check
error: Failed to initialize cache at /io/.ruff_cache: Permission denied (os error 13)
warning: Encountered error: Permission denied (os error 13)
All checks passed!

$ podman run --rm -it -v .:/io ghcr.io/astral-sh/ruff:latest format
error: Failed to initialize cache at /io/.ruff_cache: Permission denied (os error 13)
error: Encountered error: Permission denied (os error 13)
````

## Summary

Running ruff by using a docker container requires `:Z` when mounting the
current directory on Fedora with SELinux and Podman.

## Test Plan

````
$ podman run --rm -it -v .:/io:Z ghcr.io/astral-sh/ruff:latest check
$ podman run --rm -it -v .:/io:Z ghcr.io/astral-sh/ruff:0.5.0 check
````
2024-07-05 15:39:00 -05:00
Carl Meyer
0e44235981 [red-knot] intern types using Salsa (#12061)
Intern types using Salsa interning instead of in the `TypeInference`
result.

This eliminates the need for `TypingContext`, and also paves the way for
finer-grained type inference queries.
2024-07-05 12:16:37 -07:00
Dhruv Manilawala
7b50061b43 Fix eslint errors for playground source code (#12207)
Refer
https://github.com/astral-sh/ruff/actions/runs/9808907924/job/27086333001
2024-07-05 13:43:16 +00:00
Dhruv Manilawala
3a72400202 Rename publish workflow file extension (yaml -> yml) (#12206) 2024-07-05 13:12:49 +00:00
Dhruv Manilawala
1b3bff0330 Bump version to 0.5.1 (#12205) 2024-07-05 18:33:14 +05:30
Alex Waygood
0f6f73ecf3 [red-knot] Require that FileSystem objects implement Debug (#12204) 2024-07-05 12:53:30 +01:00
Dhruv Manilawala
7910beecc4 Consider the content of the new cells during notebook sync (#12203)
## Summary

This PR fixes the bug where the server was not considering the
`cells.structure.didOpen` field to sync up the new content of the newly
added cells.

The parameters corresponding to this request provides two fields to get
the newly added cells:
1. `cells.structure.array.cells`: This is a list of `NotebookCell` which
doesn't contain any cell content. The only useful information from this
array is the cell kind and the cell document URI which we use to
initialize the new cell in the index.
2. `cells.structure.didOpen`: This is a list of `TextDocumentItem` which
corresponds to the newly added cells. This actually contains the text
content and the version.

This wasn't a problem before because we initialize the cell with an
empty string and this isn't a problem when someone just creates an empty
cell. But, when someone copy-pastes a cell, the cell needs to be
initialized with the content.

fixes: #12201 

## Test Plan

First, let's see the panic in action:

1. Press <kbd>Esc</kbd> to allow using the keyboard to perform cell
actions (move around, copy, paste, etc.)
2. Copy the second cell with <kbd>c</kbd> key
3. Delete the second cell with <kbd>dd</kbd> key
4. Paste the copied cell with <kbd>p</kbd> key

You can see that the content isn't synced up because the `unused-import`
for `sys` is still being highlighted but it's being used in the second
cell. And, the hover isn't working either. Then, as I start editing the
second cell, it panics.


https://github.com/astral-sh/ruff/assets/67177269/fc58364c-c8fc-4c11-a917-71b6dd90c1ef

Now, here's the preview of the fixed version:


https://github.com/astral-sh/ruff/assets/67177269/207872dd-dca6-49ee-8b6e-80435c7ef22e
2024-07-05 17:10:00 +05:30
Dhruv Manilawala
f3ccd152e9 Revert "Remove --preview as a required argument for ruff server (#12053)" (#12196)
This reverts commit b28dc9ac14.

We're not ready to stabilize the server yet. There's some pending work
for the VS Code extension and documentation improvements.

This change is to unblock Ruff release.
2024-07-05 11:58:35 +05:30
Javier Kauer
1e07bfa373 [pycodestyle] Whitespace after decorator (E204) (#12140)
## Summary

<!-- What's the purpose of the change? What does it do, and why? -->
This is the implementation for the new rule of `pycodestyle (E204)`. It
follows the guidlines described in the contributing site, and as such it
has a new file named `whitespace_after_decorator.rs`, a new test file
called `E204.py`, and as such invokes the `function` in the `AST
statement checker` for functions and functions in classes. Linking #2402
because it has all the pycodestyle rules.

## Test Plan

<!-- How was it tested? -->
The file E204.py, has a `decorator` defined called wrapper, and this
decorator is used for 2 cases. The first one is when a `function` which
has a `decorator` is called in the file, and the second one is when
there is a `class` and 2 `methods` are defined for the `class` with a
`decorator` attached it.

Test file:

``` python
def foo(fun):
    def wrapper():
        print('before')
        fun()
        print('after')
    return wrapper

# No error
@foo
def bar():
    print('bar')

# E204
@ foo
def baz():
    print('baz')

class Test:
    # No error
    @foo
    def bar(self):
        print('bar')

    # E204
    @ foo
    def baz(self):
        print('baz')
```

I am still new to rust and any suggestion is appreciated. Specially with
the way im using native ruff utilities.

---------

Co-authored-by: Charlie Marsh <charlie.r.marsh@gmail.com>
2024-07-04 23:31:03 +00:00
Mathieu Kniewallner
5e7ba05612 docs(*): fix a few typos, consistency issues and links (#12193)
## Summary

Fixes a few typos, consistency issues and dead links found across the
documentation.
2024-07-04 19:05:51 -04:00
Mathieu Kniewallner
d12570ea00 docs(options): fix some typos and improve consistency (#12191)
## Summary

Fixes a few typos and consistency issues in the "Settings"
documentation:
- use "Ruff" consistently in the few places where "ruff" is used
- use double quotes in the few places where single quotes are used
- add backticks around rule codes where they are currently missing
- update a few example values where they are the same as the defaults,
for consistency

2nd commit might be controversial, as there are many options mentioned
where we don't currently link to the documentation sections, so maybe
it's done on purpose, as this will also appear in the JSON schema where
it's not desirable? If that's the case, I can easily drop it.

## Test Plan

Local testing.
2024-07-04 19:05:03 -04:00
Mathieu Kniewallner
2f3264e148 fix(rules): skip dummy variables for PLR1704 (#12190)
## Summary

Resolves #12157.

## Test Plan

Snapshot tests.
2024-07-04 20:09:31 +00:00
Micha Reiser
e2e0889a30 [red-knot] Add very basic benchmark (#12182) 2024-07-04 15:29:00 +00:00
Micha Reiser
497fd4c505 Update code owners for red knot (#12187) 2024-07-04 12:14:24 +00:00
Dhruv Manilawala
6cdf3e7af8 Reorder installation section in README (#12177)
See https://github.com/astral-sh/ruff/pull/12163#issuecomment-2207016631
2024-07-04 14:11:54 +05:30
Micha Reiser
4d385b60c8 [red-knot] Migrate CLI to Salsa (#11972) 2024-07-04 07:23:45 +00:00
Micha Reiser
262053f85c [red-knot]: Implement HasTy for Alias (#11971) 2024-07-04 07:17:10 +00:00
Micha Reiser
3ce8b9fcae Make Definition a salsa-ingredient (#12151) 2024-07-04 06:46:08 +00:00
Dhruv Manilawala
e6e09ea93a Avoid syntax error notification for source code actions (#12148)
## Summary

This PR avoids the error notification if a user selects the source code
actions and there's a syntax error in the source.

Before https://github.com/astral-sh/ruff/pull/12134, the change would've
been different. But that PR disables generating fixes if there's a
syntax error. This means that we can return an empty map instead as
there won't be any fixes in the diagnostics returned by the `lint_fix`
function.

For reference, following are the screenshot as on `main` with the error:

**VS Code:**

<img width="1715" alt="Screenshot 2024-07-02 at 16 39 59"
src="https://github.com/astral-sh/ruff/assets/67177269/62f3e99b-0b0c-4608-84a2-26aeabcc6933">

**Neovim:**

<img width="1717" alt="Screenshot 2024-07-02 at 16 38 50"
src="https://github.com/astral-sh/ruff/assets/67177269/5d637c36-d7f8-4a3b-8011-9a89708919a8">

fixes: #11931

## Test Plan

Considering the following code snippet where there are two diagnostics
(syntax error and useless semicolon `E703`):
```py
x;

y =
```

### VS Code


https://github.com/astral-sh/ruff/assets/67177269/943537fc-ed8d-448d-8a36-1e34536c4f3e

### Neovim


https://github.com/astral-sh/ruff/assets/67177269/4e6f3372-6e5b-4380-8919-6221066efd5b
2024-07-04 09:37:16 +05:30
Dhruv Manilawala
d870720841 Fix replacement edit range computation (#12171)
## Summary

This PR fixes various bugs for computing the replacement range between
the original and modified source for the language server.

1. When finding the end offset of the source and modified range, we
should apply `zip` on the reversed iterator. The bug was that it was
reversing the already zipped iterator. The problem here is that the
length of both slices aren't going to be the same unless the source
wasn't modified at all. Refer to the [Rust
playground](https://play.rust-lang.org/?version=stable&mode=debug&edition=2021&gist=44f860d31bd26456f3586b6ab530c22f)
where you can see this in action.
2. Skip the first line when computing the start offset because the first
line start value will always be 0 and the default value of the source /
modified range start is also 0. So, comparing 0 and 0 is not useful
which means we can skip the first value.
3. While iterating in the reverse direction, we should only stop if the
line start is strictly less than the source start i.e., we should use
`<` instead of `<=`.

fixes: #12128 

## Test Plan

Add test cases where the text is being inserted, deleted, and replaced
between the original and new source code, validate the replacement
ranges.
2024-07-04 09:24:07 +05:30
Mathieu Kniewallner
8210c1ed5b [flake8-bandit] Detect httpx for S113 (#12174)
## Summary

Bandit now also reports `B113` on `httpx`
(https://github.com/PyCQA/bandit/pull/1060). This PR implements the same
logic, to detect missing or `None` timeouts for `httpx` alongside
`requests`.

## Test Plan

Snapshot tests.
2024-07-03 19:26:55 -04:00
Charlie Marsh
a184f84f69 Upgrade cargo-dist to v0.18.0 (#12175)
## Summary

This enables us to get rid of `allow-dirty`!
2024-07-03 22:38:29 +00:00
Zanie Blue
c487b99e93 Add standalone installers to Ruff installation in README (#12163)
Note this is already included in our installation page at
`docs/installation.md`

---------

Co-authored-by: Dhruv Manilawala <dhruvmanila@gmail.com>
2024-07-03 13:39:58 -05:00
Thomas Faivre
24524771f2 Fix typo in CHANGELOG for misplaced-bare-raise URL (#12173)
Hi all!

## Summary

Fix a typo.

## Test Plan

URL was tested with curl.



Not much left to say, except that I originally saw this issue on the
blog post: https://astral.sh/blog/ruff-v0.5.0
Not sure how it is related to the CHANGELOG.md file, so the post might
need fixing as well.

Thanks for this incredible tool!

Signed-off-by: Thomas Faivre <thomas.faivre@6wind.com>
2024-07-03 13:39:33 -05:00
Micha Reiser
b950a6c389 Replace Mutex<RefCell> with Mutex in vendored file system" (#12170) 2024-07-03 15:12:13 +02:00
Zanie Blue
47eb6ee42b Fix cache key collisions for paths with separators (#12159)
Closes https://github.com/astral-sh/ruff/issues/12158

Hashing `Path` does not take into account path separators so `foo/bar`
is the same as `foobar` which is no good for our case. I'm guessing this
is an upstream bug, perhaps introduced by
45082b077b?
I'm investigating that further.
2024-07-03 07:36:46 -05:00
Zanie Blue
b4f7d5b2fb Fix latest version detection during Rooster invocations (#12162)
We no longer use the "v" prefix so Rooster detects the wrong version.
2024-07-03 07:35:34 -05:00
Zanie Blue
c13c60bc47 Update release script to match uv (#11496)
See https://github.com/astral-sh/uv/pull/3764

---------

Co-authored-by: T-256 <132141463+T-256@users.noreply.github.com>
2024-07-03 07:35:28 -05:00
Dhruv Manilawala
ee90017d3f Remove demisto/content from ecosystem checks (#12160)
## Summary

Follow-up to https://github.com/astral-sh/ruff/pull/12129 to remove the
`demisto/content` from ecosystem checks. The previous PR removed it from
the deprecated script which I didn't notice until recently.

## Test Plan

Ecosystem comment
2024-07-03 08:25:29 +00:00
Micha Reiser
adfd78e05a Correct parenthesized long nested-expressions example to match Ruff's output (#12153) 2024-07-03 10:03:08 +02:00
Alex Waygood
7c8112614a Remove use of deprecated E999 from the fuzz-parser script (#12150) 2024-07-02 14:18:25 +01:00
Dhruv Manilawala
8f40928534 Enable token-based rules on source with syntax errors (#11950)
## Summary

This PR updates the linter, specifically the token-based rules, to work
on the tokens that come after a syntax error.

For context, the token-based rules only diagnose the tokens up to the
first lexical error. This PR builds up an error resilience by
introducing a `TokenIterWithContext` which updates the `nesting` level
and tries to reflect it with what the lexer is seeing. This isn't 100%
accurate because if the parser recovered from an unclosed parenthesis in
the middle of the line, the context won't reduce the nesting level until
it sees the newline token at the end of the line.

resolves: #11915

## Test Plan

* Add test cases for a bunch of rules that are affected by this change.
* Run the fuzzer for a long time, making sure to fix any other bugs.
2024-07-02 08:57:46 +00:00
Dhruv Manilawala
88a4cc41f7 Disable auto-fix when source has syntax errors (#12134)
## Summary

This PR updates Ruff to **not** generate auto-fixes if the source code
contains syntax errors as determined by the parser.

The main motivation behind this is to avoid infinite autofix loop when
the token-based rules are run over any source with syntax errors in
#11950.

Although even after this, it's not certain that there won't be an
infinite autofix loop because the logic might be incorrect. For example,
https://github.com/astral-sh/ruff/issues/12094 and
https://github.com/astral-sh/ruff/pull/12136.

This requires updating the test infrastructure to not validate for fix
availability status when the source contained syntax errors. This is
required because otherwise the fuzzer might fail as it uses the test
function to run the linter and validate the source code.

resolves: #11455 

## Test Plan

`cargo insta test`
2024-07-02 14:22:51 +05:30
Micha Reiser
dcb9523b1e Address review feedback from 11963 (#12145) 2024-07-02 09:05:55 +02:00
Micha Reiser
25080acb7a [red-knot] Introduce ExpressionNodeKey to improve typing of expression_map (#12142) 2024-07-01 16:15:53 +02:00
Micha Reiser
228b1c4235 [red-knot] Remove Scope::name (#12137) 2024-07-01 15:55:50 +02:00
Micha Reiser
955138b74a Refactor ast_ids traits to take ScopeId instead of VfsFile plus FileScopeId. (#12139) 2024-07-01 15:50:07 +02:00
Dhruv Manilawala
5677614079 Use char-wise width instead of str-width (#12135)
## Summary

This PR updates various references in the linter to compute the
line-width for summing the width of each `char` in a `str` instead of
computing the width of the `str` itself.

Refer to #12133 for more details.

fixes: #12130 

## Test Plan

Add a file with null (`\0`) character which is zero-width. Run this test
case on `main` to make sure it panics and switch over to this branch to
make sure it doesn't panic now.
2024-07-01 18:56:27 +05:30
Micha Reiser
37f260b5af Introduce HasTy trait and SemanticModel facade (#11963) 2024-07-01 14:48:27 +02:00
Dhruv Manilawala
3f25561511 Avoid E275 if keyword followed by comma (#12136)
## Summary

Use the following to reproduce this:
```console
$ cargo run -- check --select=E275,E203 --preview --no-cache ~/playground/ruff/src/play.py --fix
debug error: Failed to converge after 100 iterations in `/Users/dhruv/playground/ruff/src/play.py` with rule codes E275:---
yield,x

---
/Users/dhruv/playground/ruff/src/play.py:1:1: E275 Missing whitespace after keyword
  |
1 | yield,x
  | ^^^^^ E275
  |
  = help: Added missing whitespace after keyword

Found 101 errors (100 fixed, 1 remaining).
[*] 1 fixable with the `--fix` option.
```

## Test Plan

Add a test case and run `cargo insta test`.
2024-07-01 18:04:23 +05:30
Charlie Marsh
eaf33d85ed Remove demisto/content from ecosystem checks (#12129)
## Summary

Unfortunately `demisto/content` uses an explicit `select` for `E999`, so
it will _always_ fail in preview. And they're on a fairly old version.
I'd like to keep checking it, but seems easiest for now to just disable
it.

In response, I've added a few new repos.

---------

Co-authored-by: Dhruv Manilawala <dhruvmanila@gmail.com>
2024-07-01 12:20:13 +00:00
renovate[bot]
aaa6cabf3a Update Rust crate dashmap to v6 (#12126)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
Co-authored-by: Micha Reiser <micha@reiser.io>
2024-07-01 08:48:26 +00:00
Micha Reiser
9a4d9072c1 Update salsa (#12132) 2024-07-01 08:33:04 +00:00
Micha Reiser
4cb6a09fc0 Use CompactString for ModuleName (#12131) 2024-07-01 10:22:34 +02:00
Micha Reiser
5109b50bb3 Use CompactString for Identifier (#12101) 2024-07-01 10:06:02 +02:00
github-actions[bot]
db6ee74cbe Sync vendored typeshed stubs (#12116) 2024-07-01 07:07:45 +01:00
Tom Kuson
d1aeadc009 [pytest] Reverse PT001 and PT0023 defaults (#12106)
## Summary

This patch inverts the defaults for
[pytest-fixture-incorrect-parentheses-style
(PT001)](https://docs.astral.sh/ruff/rules/pytest-fixture-incorrect-parentheses-style/)
and [pytest-incorrect-mark-parentheses-style
(PT003)](https://docs.astral.sh/ruff/rules/pytest-incorrect-mark-parentheses-style/)
to prefer dropping superfluous parentheses.

Presently, Ruff defaults to adding superfluous parentheses on pytest
mark and fixture decorators for documented purpose of consistency; for
example,

```diff
 import pytest


-@pytest.mark.foo
+@pytest.mark.foo()
 def test_bar(): ...
```

This behaviour is counter to the official pytest recommendation and
diverges from the flake8-pytest-style plugin as of version 2.0.0 (see
https://github.com/m-burst/flake8-pytest-style/issues/272). Seeing as
either default satisfies the documented benefit of consistency across a
codebase, it makes sense to change the behaviour to be consistent with
pytest and the flake8 plugin as well.

This change is breaking, so is gated behind preview (at least under my
understanding of Ruff versioning). The implementation of this gating
feature is a bit hacky, but seemed to be the least disruptive solution
without performing invasive surgery on the `#[option()]` macro.

Related to #8796.

### Caveat

Whilst updating the documentation, I sought to reference the pytest
recommendation to drop superfluous parentheses, but couldn't find any
official instruction beyond it being a revealed preference within the
pytest documentation code examples (as well as the linked issues from a
core pytest developer). Thus, the wording of the preference is
deliberately timid; it's to cohere with pytest rather than follow an
explicit guidance.

## Test Plan

`cargo nextest run`

I also ran

```sh
cargo run -p ruff -- check crates/ruff_linter/resources/test/fixtures/flake8_pytest_style/PT001.py --no-cache --diff --select PT001
```

and compared against it with `--preview` to verify that the default does
change under preview (I also repeated this with `echo
'[tool.ruff]\npreview = true' > pyproject.toml` to verify that it works
with a configuration file).

---------

Co-authored-by: Charlie Marsh <charlie.r.marsh@gmail.com>
2024-07-01 02:06:11 +00:00
Tom Kuson
d80a9d9ce9 [flake8-bugbear] Implement mutable-contextvar-default (B039) (#12113)
## Summary

Implement mutable-contextvar-default (B039) which was added to
flake8-bugbear in https://github.com/PyCQA/flake8-bugbear/pull/476.

This rule is similar to [mutable-argument-default
(B006)](https://docs.astral.sh/ruff/rules/mutable-argument-default) and
[function-call-in-default-argument
(B008)](https://docs.astral.sh/ruff/rules/function-call-in-default-argument),
except that it checks the `default` keyword argument to
`contextvars.ContextVar`.

```
B039.py:19:26: B039 Do not use mutable data structures for ContextVar defaults
   |
18 | # Bad
19 | ContextVar("cv", default=[])
   |                          ^^ B039
20 | ContextVar("cv", default={})
21 | ContextVar("cv", default=list())
   |
   = help: Replace with `None`; initialize with `.set()` after checking for `None`
```

In the upstream flake8-plugin, this rule is written expressly as a
corollary to B008 and shares much of its logic. Likewise, this
implementation reuses the logic of the Ruff implementation of B008,
namely


f765d19402/crates/ruff_linter/src/rules/flake8_bugbear/rules/function_call_in_argument_default.rs (L104-L106)

and 


f765d19402/crates/ruff_linter/src/rules/flake8_bugbear/rules/mutable_argument_default.rs (L106)

Thus, this rule deliberately replicates B006's and B008's heuristics.
For example, this rule assumes that all functions are mutable unless
otherwise qualified. If improvements are to be made to B039 heuristics,
they should probably be made to B006 and B008 as well (whilst trying to
match the upstream implementation).

This rule does not have an autofix as it is unknown where the ContextVar
next used (and it might not be within the same file).

Closes #12054

## Test Plan

`cargo nextest run`
2024-07-01 01:55:49 +00:00
renovate[bot]
85ede4a88c Update docker/build-push-action action to v6 (#12127) 2024-06-30 21:25:24 -04:00
renovate[bot]
d2fefc8bf3 Update NPM Development dependencies (#12122) 2024-06-30 21:21:20 -04:00
renovate[bot]
5fd3f43de1 Update pre-commit hook astral-sh/ruff-pre-commit to v0.5.0 (#12125) 2024-06-30 21:21:03 -04:00
renovate[bot]
ab372f5f48 Update Rust crate uuid to v1.9.1 (#12124) 2024-06-30 21:20:48 -04:00
renovate[bot]
6a8a7b65e9 Update Rust crate bitflags to v2.6.0 (#12123) 2024-06-30 21:20:17 -04:00
renovate[bot]
211cafc571 Update Rust crate log to v0.4.22 (#12118) 2024-06-30 21:19:50 -04:00
renovate[bot]
0b1b94567a Update Rust crate serde_with to v3.8.2 (#12121) 2024-06-30 21:19:33 -04:00
renovate[bot]
168112d343 Update Rust crate serde_json to v1.0.119 (#12120) 2024-06-30 21:19:10 -04:00
renovate[bot]
a5355084b5 Update Rust crate matchit to v0.8.3 (#12119) 2024-06-30 21:18:51 -04:00
renovate[bot]
deedb29e75 Update Rust crate clap to v4.5.8 (#12117) 2024-06-30 21:18:21 -04:00
Micha Reiser
f765d19402 Mention that Cursor is based on rustc's implementation. (#12109) 2024-06-30 16:53:25 +01:00
Gilles Peiffer
d1079680bb [pylint] Add fix for duplicate-bases (PLE0241) (#12105)
## Summary

This adds a fix for the `duplicate-bases` rule that removes the
duplicate base from the class definition.

## Test Plan

`cargo nextest run duplicate_bases`, `cargo insta review`.
2024-06-29 17:48:24 +00:00
Micha Reiser
da78de0439 Remove allcation in parse_identifier (#12103) 2024-06-29 15:00:24 +02:00
Dhruv Manilawala
47b227394e Avoid E275 if keyword is followed by a semicolon (#12095)
fixes: #12094
2024-06-28 20:51:35 +05:30
Charlie Marsh
c326778652 Make requires-python inference robust to == (#12091)
## Summary

Instead of using a high patch version, attempt to detect the
minimum-supported minor.

Closes #12088.
2024-06-28 09:38:17 -04:00
Dhruv Manilawala
434ce307a7 Revert "Use correct range to highlight line continuation error" (#12089)
This PR reverts https://github.com/astral-sh/ruff/pull/12016 with a
small change where the error location points to the continuation
character only. Earlier, it would also highlight the whitespace that
came before it.

The motivation for this change is to avoid panic in
https://github.com/astral-sh/ruff/pull/11950. For example:

```py
\)
```

Playground: https://play.ruff.rs/87711071-1b54-45a3-b45a-81a336a1ea61

The range of `Unknown` token and `Rpar` is the same. Once #11950 is
enabled, the indexer would panic. It won't panic in the stable version
because we stop at the first `Unknown` token.
2024-06-28 18:10:00 +05:30
Charlie Marsh
6a37d7a1e6 Add bandit rule changes to breaking section (#12090)
Closes https://github.com/astral-sh/ruff/issues/12086.
2024-06-28 11:41:45 +00:00
Dhruv Manilawala
0179ff97da Add standalone installer instruction to docs (#12081)
Adopted from `uv` README
(https://github.com/astral-sh/uv#getting-started), this PR adds a
section of using standalone installers in the installation section of
Ruff docs.
2024-06-28 11:34:46 +00:00
Charlie Marsh
2b54fab02c Publish docs and playground on cargo-dist release (#12079)
## Summary

These are now `post-announce-jobs`. So if they fail, the release itself
will still succeed, which seems ok. (If we make them `publish-jobs`,
then we might end up publishing to PyPI but failing the release itself
if one of these fails.)

The intent is that these are still runnable via `workflow_dispatch` too.

Closes https://github.com/astral-sh/ruff/issues/12074.
2024-06-28 07:29:04 -04:00
Micha Reiser
117ab789c9 Add more NPY201 tests (#12087) 2024-06-28 09:58:39 +02:00
Étienne BERSAC
2336c078e2 Improve Emacs configuration (#12070)
Replace black and combine `ruff check --select=I --fix` and `ruff
format`.
2024-06-28 13:09:29 +05:30
Dhruv Manilawala
9fec384d11 Show syntax errors on the playground (#12083)
## Summary

This PR updates the playground to show syntax errors.

(I forgot to update this and noticed it this morning.)

## Test Plan

Build the playground locally and preview it:

<img width="764" alt="Screenshot 2024-06-28 at 11 03 35"
src="https://github.com/astral-sh/ruff/assets/67177269/1fd48d6c-ae41-4672-bf3c-32a61d9946ef">
2024-06-28 13:06:15 +05:30
Dhruv Manilawala
526efd398a Remove E999 to find diagnostic severity (#12080)
## Summary

This PR removes the need to check for `E999` code to find the diagnostic
severity in the server.

**Note:** This is just removing a redundant check because all
`ParseErrors` are converted to `Diagnostic` with default `Error`
severity by
63c92586a1/crates/ruff_server/src/lint.rs (L309-L346)

## Test Plan

Verify that syntax errors are still shown with error severity as it did
before:

<img width="1313" alt="Screenshot 2024-06-28 at 09 30 20"
src="https://github.com/astral-sh/ruff/assets/67177269/75e389a7-01ea-461c-86a2-0dfc244e515d">
2024-06-28 09:31:35 +05:30
Jane Lewis
b28dc9ac14 Remove --preview as a required argument for ruff server (#12053)
## Summary

`ruff server` has reached a point of stabilization, and `--preview` is
no longer required as a flag.

`--preview` is still supported as a flag, since future features may be
need to gated behind it initially.

## Test Plan

A simple way to test this is to run `ruff server` from the command line.
No error about a missing `--preview` argument should be reported.
2024-06-27 19:27:15 +00:00
Mateusz Sokół
59ea94ce88 [numpy] Update NPY201 to include exception deprecations (#12065)
Hi!

This PR updates `NPY201` rule to address
https://github.com/astral-sh/ruff/issues/12034 and partially
https://github.com/numpy/numpy/issues/26800.
2024-06-27 18:56:56 +00:00
Alex Waygood
5bef2b0361 fix link to the release workflow in CONTRIBUTING.md (#12073) 2024-06-27 16:15:31 +00:00
2109 changed files with 55303 additions and 28101 deletions

3
.github/CODEOWNERS vendored
View File

@@ -17,4 +17,5 @@
/scripts/fuzz-parser/ @AlexWaygood
# red-knot
/crates/red_knot/ @carljm @MichaReiser
/crates/red_knot* @carljm @MichaReiser @AlexWaygood
/crates/ruff_db/ @carljm @MichaReiser @AlexWaygood

View File

@@ -8,7 +8,7 @@
semanticCommits: "disabled",
separateMajorMinor: false,
prHourlyLimit: 10,
enabledManagers: ["github-actions", "pre-commit", "cargo", "pep621", "npm"],
enabledManagers: ["github-actions", "pre-commit", "cargo", "pep621", "pip_requirements", "npm"],
cargo: {
// See https://docs.renovatebot.com/configuration-options/#rangestrategy
rangeStrategy: "update-lockfile",
@@ -16,6 +16,9 @@
pep621: {
fileMatch: ["^(python|scripts)/.*pyproject\\.toml$"],
},
pip_requirements: {
fileMatch: ["^docs/requirements.*\\.txt$"],
},
npm: {
fileMatch: ["^playground/.*package\\.json$"],
},
@@ -48,6 +51,14 @@
matchManagers: ["cargo"],
enabled: false,
},
{
// `mkdocs-material` requires a manual update to keep the version in sync
// with `mkdocs-material-insider`.
// See: https://squidfunk.github.io/mkdocs-material/insiders/upgrade/
matchManagers: ["pip_requirements"],
matchPackagePatterns: ["mkdocs-material"],
enabled: false,
},
{
groupName: "pre-commit dependencies",
matchManagers: ["pre-commit"],

View File

@@ -56,7 +56,7 @@ jobs:
fi
- name: "Build and push Docker image"
uses: docker/build-push-action@v5
uses: docker/build-push-action@v6
with:
context: .
platforms: linux/amd64,linux/arm64

View File

@@ -111,7 +111,7 @@ jobs:
- name: "Clippy"
run: cargo clippy --workspace --all-targets --all-features --locked -- -D warnings
- name: "Clippy (wasm)"
run: cargo clippy -p ruff_wasm --target wasm32-unknown-unknown --all-features --locked -- -D warnings
run: cargo clippy -p ruff_wasm -p red_knot_wasm --target wasm32-unknown-unknown --all-features --locked -- -D warnings
cargo-test-linux:
name: "cargo test (linux)"
@@ -191,10 +191,14 @@ jobs:
cache-dependency-path: playground/package-lock.json
- uses: jetli/wasm-pack-action@v0.4.0
- uses: Swatinem/rust-cache@v2
- name: "Run wasm-pack"
- name: "Test ruff_wasm"
run: |
cd crates/ruff_wasm
wasm-pack test --node
- name: "Test red_knot_wasm"
run: |
cd crates/red_knot_wasm
wasm-pack test --node
cargo-build-release:
name: "cargo build (release)"
@@ -619,7 +623,7 @@ jobs:
run: cargo codspeed build --features codspeed -p ruff_benchmark
- name: "Run benchmarks"
uses: CodSpeedHQ/action@v2
uses: CodSpeedHQ/action@v3
with:
run: cargo codspeed run
token: ${{ secrets.CODSPEED_TOKEN }}

View File

@@ -1,55 +0,0 @@
name: mkdocs
on:
workflow_dispatch:
inputs:
ref:
description: "The commit SHA, tag, or branch to publish. Uses the default branch if not specified."
default: ""
type: string
release:
types: [published]
jobs:
mkdocs:
runs-on: ubuntu-latest
env:
CF_API_TOKEN_EXISTS: ${{ secrets.CF_API_TOKEN != '' }}
MKDOCS_INSIDERS_SSH_KEY_EXISTS: ${{ secrets.MKDOCS_INSIDERS_SSH_KEY != '' }}
steps:
- uses: actions/checkout@v4
with:
ref: ${{ inputs.ref }}
- uses: actions/setup-python@v5
- name: "Add SSH key"
if: ${{ env.MKDOCS_INSIDERS_SSH_KEY_EXISTS == 'true' }}
uses: webfactory/ssh-agent@v0.9.0
with:
ssh-private-key: ${{ secrets.MKDOCS_INSIDERS_SSH_KEY }}
- name: "Install Rust toolchain"
run: rustup show
- uses: Swatinem/rust-cache@v2
- name: "Install Insiders dependencies"
if: ${{ env.MKDOCS_INSIDERS_SSH_KEY_EXISTS == 'true' }}
run: pip install -r docs/requirements-insiders.txt
- name: "Install dependencies"
if: ${{ env.MKDOCS_INSIDERS_SSH_KEY_EXISTS != 'true' }}
run: pip install -r docs/requirements.txt
- name: "Copy README File"
run: |
python scripts/transform_readme.py --target mkdocs
python scripts/generate_mkdocs.py
- name: "Build Insiders docs"
if: ${{ env.MKDOCS_INSIDERS_SSH_KEY_EXISTS == 'true' }}
run: mkdocs build --strict -f mkdocs.insiders.yml
- name: "Build docs"
if: ${{ env.MKDOCS_INSIDERS_SSH_KEY_EXISTS != 'true' }}
run: mkdocs build --strict -f mkdocs.public.yml
- name: "Deploy to Cloudflare Pages"
if: ${{ env.CF_API_TOKEN_EXISTS == 'true' }}
uses: cloudflare/wrangler-action@v3.6.1
with:
apiToken: ${{ secrets.CF_API_TOKEN }}
accountId: ${{ secrets.CF_ACCOUNT_ID }}
# `github.head_ref` is only set during pull requests and for manual runs or tags we use `main` to deploy to production
command: pages deploy site --project-name=astral-docs --branch ${{ github.head_ref || 'main' }} --commit-hash ${GITHUB_SHA}

View File

@@ -23,6 +23,7 @@ jobs:
name: pr-number
run_id: ${{ github.event.workflow_run.id || github.event.inputs.workflow_run_id }}
if_no_artifact_found: ignore
allow_forks: true
- name: Parse pull request number
id: pr-number
@@ -43,6 +44,7 @@ jobs:
path: pr/ecosystem
workflow_conclusion: completed
if_no_artifact_found: ignore
allow_forks: true
- name: Generate comment content
id: generate-comment

151
.github/workflows/publish-docs.yml vendored Normal file
View File

@@ -0,0 +1,151 @@
# Publish the Ruff documentation.
#
# Assumed to run as a subworkflow of .github/workflows/release.yml; specifically, as a post-announce
# job within `cargo-dist`.
name: mkdocs
on:
workflow_dispatch:
inputs:
ref:
description: "The commit SHA, tag, or branch to publish. Uses the default branch if not specified."
default: ""
type: string
workflow_call:
inputs:
plan:
required: true
type: string
jobs:
mkdocs:
runs-on: ubuntu-latest
env:
MKDOCS_INSIDERS_SSH_KEY_EXISTS: ${{ secrets.MKDOCS_INSIDERS_SSH_KEY != '' }}
steps:
- uses: actions/checkout@v4
with:
ref: ${{ inputs.ref }}
- uses: actions/setup-python@v5
with:
python-version: 3.12
- name: "Set docs version"
run: |
version="${{ (inputs.plan != '' && fromJson(inputs.plan).announcement_tag) || inputs.ref }}"
# if version is missing, exit with error
if [[ -z "$version" ]]; then
echo "Can't build docs without a version."
exit 1
fi
# Use version as display name for now
display_name="$version"
echo "version=$version" >> $GITHUB_ENV
echo "display_name=$display_name" >> $GITHUB_ENV
- name: "Set branch name"
run: |
version="${{ env.version }}"
display_name="${{ env.display_name }}"
timestamp="$(date +%s)"
# create branch_display_name from display_name by replacing all
# characters disallowed in git branch names with hyphens
branch_display_name="$(echo "$display_name" | tr -c '[:alnum:]._' '-' | tr -s '-')"
echo "branch_name=update-docs-$branch_display_name-$timestamp" >> $GITHUB_ENV
echo "timestamp=$timestamp" >> $GITHUB_ENV
- name: "Add SSH key"
if: ${{ env.MKDOCS_INSIDERS_SSH_KEY_EXISTS == 'true' }}
uses: webfactory/ssh-agent@v0.9.0
with:
ssh-private-key: ${{ secrets.MKDOCS_INSIDERS_SSH_KEY }}
- name: "Install Rust toolchain"
run: rustup show
- uses: Swatinem/rust-cache@v2
- name: "Install Insiders dependencies"
if: ${{ env.MKDOCS_INSIDERS_SSH_KEY_EXISTS == 'true' }}
run: pip install -r docs/requirements-insiders.txt
- name: "Install dependencies"
if: ${{ env.MKDOCS_INSIDERS_SSH_KEY_EXISTS != 'true' }}
run: pip install -r docs/requirements.txt
- name: "Copy README File"
run: |
python scripts/transform_readme.py --target mkdocs
python scripts/generate_mkdocs.py
- name: "Build Insiders docs"
if: ${{ env.MKDOCS_INSIDERS_SSH_KEY_EXISTS == 'true' }}
run: mkdocs build --strict -f mkdocs.insiders.yml
- name: "Build docs"
if: ${{ env.MKDOCS_INSIDERS_SSH_KEY_EXISTS != 'true' }}
run: mkdocs build --strict -f mkdocs.public.yml
- name: "Clone docs repo"
run: |
version="${{ env.version }}"
git clone https://${{ secrets.ASTRAL_DOCS_PAT }}@github.com/astral-sh/docs.git astral-docs
- name: "Copy docs"
run: rm -rf astral-docs/site/ruff && mkdir -p astral-docs/site && cp -r site/ruff astral-docs/site/
- name: "Commit docs"
working-directory: astral-docs
run: |
branch_name="${{ env.branch_name }}"
git config user.name "astral-docs-bot"
git config user.email "176161322+astral-docs-bot@users.noreply.github.com"
git checkout -b $branch_name
git add site/ruff
git commit -m "Update ruff documentation for $version"
- name: "Create Pull Request"
working-directory: astral-docs
env:
GITHUB_TOKEN: ${{ secrets.ASTRAL_DOCS_PAT }}
run: |
version="${{ env.version }}"
display_name="${{ env.display_name }}"
branch_name="${{ env.branch_name }}"
# set the PR title
pull_request_title="Update ruff documentation for $display_name"
# Delete any existing pull requests that are open for this version
# by checking against pull_request_title because the new PR will
# supersede the old one.
gh pr list --state open --json title --jq '.[] | select(.title == "$pull_request_title") | .number' | \
xargs -I {} gh pr close {}
# push the branch to GitHub
git push origin $branch_name
# create the PR
gh pr create --base main --head $branch_name \
--title "$pull_request_title" \
--body "Automated documentation update for $display_name" \
--label "documentation"
- name: "Merge Pull Request"
if: ${{ inputs.plan != '' && !fromJson(inputs.plan).announcement_tag_is_implicit }}
working-directory: astral-docs
env:
GITHUB_TOKEN: ${{ secrets.ASTRAL_DOCS_PAT }}
run: |
branch_name="${{ env.branch_name }}"
# auto-merge the PR if the build was triggered by a release. Manual builds should be reviewed by a human.
# give the PR a few seconds to be created before trying to auto-merge it
sleep 10
gh pr merge --squash $branch_name

View File

@@ -1,9 +1,16 @@
# Publish the Ruff playground.
#
# Assumed to run as a subworkflow of .github/workflows/release.yml; specifically, as a post-announce
# job within `cargo-dist`.
name: "[Playground] Release"
on:
workflow_dispatch:
release:
types: [published]
workflow_call:
inputs:
plan:
required: true
type: string
env:
CARGO_INCREMENTAL: 0
@@ -40,7 +47,7 @@ jobs:
working-directory: playground
- name: "Deploy to Cloudflare Pages"
if: ${{ env.CF_API_TOKEN_EXISTS == 'true' }}
uses: cloudflare/wrangler-action@v3.6.1
uses: cloudflare/wrangler-action@v3.7.0
with:
apiToken: ${{ secrets.CF_API_TOKEN }}
accountId: ${{ secrets.CF_ACCOUNT_ID }}

55
.github/workflows/publish-wasm.yml vendored Normal file
View File

@@ -0,0 +1,55 @@
# Build and publish ruff-api for wasm.
#
# Assumed to run as a subworkflow of .github/workflows/release.yml; specifically, as a publish
# job within `cargo-dist`.
name: "Build and publish wasm"
on:
workflow_dispatch:
workflow_call:
inputs:
plan:
required: true
type: string
env:
CARGO_INCREMENTAL: 0
CARGO_NET_RETRY: 10
CARGO_TERM_COLOR: always
RUSTUP_MAX_RETRIES: 10
jobs:
ruff_wasm:
runs-on: ubuntu-latest
permissions:
contents: read
id-token: write
strategy:
matrix:
target: [web, bundler, nodejs]
fail-fast: false
steps:
- uses: actions/checkout@v4
- name: "Install Rust toolchain"
run: rustup target add wasm32-unknown-unknown
- uses: jetli/wasm-pack-action@v0.4.0
- uses: jetli/wasm-bindgen-action@v0.2.0
- name: "Run wasm-pack build"
run: wasm-pack build --target ${{ matrix.target }} crates/ruff_wasm
- name: "Rename generated package"
run: | # Replace the package name w/ jq
jq '.name="@astral-sh/ruff-wasm-${{ matrix.target }}"' crates/ruff_wasm/pkg/package.json > /tmp/package.json
mv /tmp/package.json crates/ruff_wasm/pkg
- run: cp LICENSE crates/ruff_wasm/pkg # wasm-pack does not put the LICENSE file in the pkg
- uses: actions/setup-node@v4
with:
node-version: 18
registry-url: "https://registry.npmjs.org"
- name: "Publish (dry-run)"
if: ${{ inputs.plan == '' || fromJson(inputs.plan).announcement_tag_is_implicit }}
run: npm publish --dry-run crates/ruff_wasm/pkg
- name: "Publish"
if: ${{ inputs.plan != '' && !fromJson(inputs.plan).announcement_tag_is_implicit }}
run: npm publish --provenance --access public crates/ruff_wasm/pkg
env:
NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}

View File

@@ -12,9 +12,8 @@
# title/body based on your changelogs.
name: Release
permissions:
contents: write
"contents": "write"
# This task will run whenever you workflow_dispatch with a tag that looks like a version
# like "1.0.0", "v0.1.0-prerelease.1", "my-app/0.1.0", "releases/v1.0.0", etc.
@@ -49,7 +48,7 @@ on:
jobs:
# Run 'cargo dist plan' (or host) to determine what tasks we need to do
plan:
runs-on: ubuntu-latest
runs-on: "ubuntu-20.04"
outputs:
val: ${{ steps.plan.outputs.manifest }}
tag: ${{ (inputs.tag != 'dry-run' && inputs.tag) || '' }}
@@ -65,7 +64,12 @@ jobs:
# we specify bash to get pipefail; it guards against the `curl` command
# failing. otherwise `sh` won't catch that `curl` returned non-0
shell: bash
run: "curl --proto '=https' --tlsv1.2 -LsSf https://github.com/axodotdev/cargo-dist/releases/download/v0.14.0/cargo-dist-installer.sh | sh"
run: "curl --proto '=https' --tlsv1.2 -LsSf https://github.com/axodotdev/cargo-dist/releases/download/v0.18.0/cargo-dist-installer.sh | sh"
- name: Cache cargo-dist
uses: actions/upload-artifact@v4
with:
name: cargo-dist-cache
path: ~/.cargo/bin/cargo-dist
# sure would be cool if github gave us proper conditionals...
# so here's a doubly-nested ternary-via-truthiness to try to provide the best possible
# functionality based on whether this is a pull_request, and whether it's from a fork.
@@ -101,8 +105,8 @@ jobs:
plan: ${{ needs.plan.outputs.val }}
secrets: inherit
permissions:
packages: write
contents: read
"contents": "read"
"packages": "write"
# Build and package all the platform-agnostic(ish) things
build-global-artifacts:
@@ -118,9 +122,12 @@ jobs:
- uses: actions/checkout@v4
with:
submodules: recursive
- name: Install cargo-dist
shell: bash
run: "curl --proto '=https' --tlsv1.2 -LsSf https://github.com/axodotdev/cargo-dist/releases/download/v0.14.0/cargo-dist-installer.sh | sh"
- name: Install cached cargo-dist
uses: actions/download-artifact@v4
with:
name: cargo-dist-cache
path: ~/.cargo/bin/
- run: chmod +x ~/.cargo/bin/cargo-dist
# Get all the local artifacts for the global tasks to use (for e.g. checksums)
- name: Fetch local artifacts
uses: actions/download-artifact@v4
@@ -165,8 +172,12 @@ jobs:
- uses: actions/checkout@v4
with:
submodules: recursive
- name: Install cargo-dist
run: "curl --proto '=https' --tlsv1.2 -LsSf https://github.com/axodotdev/cargo-dist/releases/download/v0.14.0/cargo-dist-installer.sh | sh"
- name: Install cached cargo-dist
uses: actions/download-artifact@v4
with:
name: cargo-dist-cache
path: ~/.cargo/bin/
- run: chmod +x ~/.cargo/bin/cargo-dist
# Fetch artifacts from scratch-storage
- name: Fetch artifacts
uses: actions/download-artifact@v4
@@ -200,8 +211,23 @@ jobs:
secrets: inherit
# publish jobs get escalated permissions
permissions:
id-token: write
packages: write
"id-token": "write"
"packages": "write"
custom-publish-wasm:
needs:
- plan
- host
if: ${{ !fromJson(needs.plan.outputs.val).announcement_is_prerelease || fromJson(needs.plan.outputs.val).publish_prereleases }}
uses: ./.github/workflows/publish-wasm.yml
with:
plan: ${{ needs.plan.outputs.val }}
secrets: inherit
# publish jobs get escalated permissions
permissions:
"contents": "read"
"id-token": "write"
"packages": "write"
# Create a GitHub Release while uploading all files to it
announce:
@@ -209,10 +235,11 @@ jobs:
- plan
- host
- custom-publish-pypi
- custom-publish-wasm
# use "always() && ..." to allow us to wait for all publish jobs while
# still allowing individual publish jobs to skip themselves (for prereleases).
# "host" however must run to completion, no skipping allowed!
if: ${{ always() && needs.host.result == 'success' && (needs.custom-publish-pypi.result == 'skipped' || needs.custom-publish-pypi.result == 'success') }}
if: ${{ always() && needs.host.result == 'success' && (needs.custom-publish-pypi.result == 'skipped' || needs.custom-publish-pypi.result == 'success') && (needs.custom-publish-wasm.result == 'skipped' || needs.custom-publish-wasm.result == 'success') }}
runs-on: "ubuntu-20.04"
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
@@ -220,6 +247,7 @@ jobs:
- uses: actions/checkout@v4
with:
submodules: recursive
# Create a GitHub Release while uploading all files to it
- name: "Download GitHub Artifacts"
uses: actions/download-artifact@v4
with:
@@ -231,13 +259,16 @@ jobs:
# Remove the granular manifests
rm -f artifacts/*-dist-manifest.json
- name: Create GitHub Release
uses: ncipollo/release-action@v1
with:
tag: ${{ needs.plan.outputs.tag }}
name: ${{ fromJson(needs.host.outputs.val).announcement_title }}
body: ${{ fromJson(needs.host.outputs.val).announcement_github_body }}
prerelease: ${{ fromJson(needs.host.outputs.val).announcement_is_prerelease }}
artifacts: "artifacts/*"
env:
PRERELEASE_FLAG: "${{ fromJson(needs.host.outputs.val).announcement_is_prerelease && '--prerelease' || '' }}"
ANNOUNCEMENT_TITLE: "${{ fromJson(needs.host.outputs.val).announcement_title }}"
ANNOUNCEMENT_BODY: "${{ fromJson(needs.host.outputs.val).announcement_github_body }}"
RELEASE_COMMIT: "${{ github.sha }}"
run: |
# Write and read notes from a file to avoid quoting breaking things
echo "$ANNOUNCEMENT_BODY" > $RUNNER_TEMP/notes.txt
gh release create "${{ needs.plan.outputs.tag }}" --target "$RELEASE_COMMIT" $PRERELEASE_FLAG --title "$ANNOUNCEMENT_TITLE" --notes-file "$RUNNER_TEMP/notes.txt" artifacts/*
custom-notify-dependents:
needs:
@@ -247,3 +278,21 @@ jobs:
with:
plan: ${{ needs.plan.outputs.val }}
secrets: inherit
custom-publish-docs:
needs:
- plan
- announce
uses: ./.github/workflows/publish-docs.yml
with:
plan: ${{ needs.plan.outputs.val }}
secrets: inherit
custom-publish-playground:
needs:
- plan
- announce
uses: ./.github/workflows/publish-playground.yml
with:
plan: ${{ needs.plan.outputs.val }}
secrets: inherit

View File

@@ -37,13 +37,13 @@ jobs:
- name: Sync typeshed
id: sync
run: |
rm -rf ruff/crates/red_knot_module_resolver/vendor/typeshed
mkdir ruff/crates/red_knot_module_resolver/vendor/typeshed
cp typeshed/README.md ruff/crates/red_knot_module_resolver/vendor/typeshed
cp typeshed/LICENSE ruff/crates/red_knot_module_resolver/vendor/typeshed
cp -r typeshed/stdlib ruff/crates/red_knot_module_resolver/vendor/typeshed/stdlib
rm -rf ruff/crates/red_knot_module_resolver/vendor/typeshed/stdlib/@tests
git -C typeshed rev-parse HEAD > ruff/crates/red_knot_module_resolver/vendor/typeshed/source_commit.txt
rm -rf ruff/crates/red_knot_python_semantic/vendor/typeshed
mkdir ruff/crates/red_knot_python_semantic/vendor/typeshed
cp typeshed/README.md ruff/crates/red_knot_python_semantic/vendor/typeshed
cp typeshed/LICENSE ruff/crates/red_knot_python_semantic/vendor/typeshed
cp -r typeshed/stdlib ruff/crates/red_knot_python_semantic/vendor/typeshed/stdlib
rm -rf ruff/crates/red_knot_python_semantic/vendor/typeshed/stdlib/@tests
git -C typeshed rev-parse HEAD > ruff/crates/red_knot_python_semantic/vendor/typeshed/source_commit.txt
- name: Commit the changes
id: commit
if: ${{ steps.sync.outcome == 'success' }}

8
.gitignore vendored
View File

@@ -21,6 +21,14 @@ flamegraph.svg
# `CARGO_TARGET_DIR=target-llvm-lines RUSTFLAGS="-Csymbol-mangling-version=v0" cargo llvm-lines -p ruff --lib`
/target*
# samply profiles
profile.json
# tracing-flame traces
tracing.folded
tracing-flamechart.svg
tracing-flamegraph.svg
###
# Rust.gitignore
###

View File

@@ -2,7 +2,8 @@ fail_fast: true
exclude: |
(?x)^(
crates/red_knot_module_resolver/vendor/.*|
crates/red_knot_python_semantic/vendor/.*|
crates/red_knot_workspace/resources/.*|
crates/ruff_linter/resources/.*|
crates/ruff_linter/src/rules/.*/snapshots/.*|
crates/ruff/resources/.*|
@@ -42,7 +43,7 @@ repos:
)$
- repo: https://github.com/crate-ci/typos
rev: v1.22.9
rev: v1.23.6
hooks:
- id: typos
@@ -56,18 +57,13 @@ repos:
pass_filenames: false # This makes it a lot faster
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.4.10
rev: v0.5.6
hooks:
- id: ruff-format
- id: ruff
args: [--fix, --exit-non-zero-on-fix]
types_or: [python, pyi]
require_serial: true
exclude: |
(?x)^(
crates/ruff_linter/resources/.*|
crates/ruff_python_formatter/resources/.*
)$
# Prettier
- repo: https://github.com/pre-commit/mirrors-prettier

View File

@@ -1,5 +1,269 @@
# Changelog
## 0.5.7
### Preview features
- \[`flake8-comprehensions`\] Account for list and set comprehensions in `unnecessary-literal-within-tuple-call` (`C409`) ([#12657](https://github.com/astral-sh/ruff/pull/12657))
- \[`flake8-pyi`\] Add autofix for `future-annotations-in-stub` (`PYI044`) ([#12676](https://github.com/astral-sh/ruff/pull/12676))
- \[`flake8-return`\] Avoid syntax error when auto-fixing `RET505` with mixed indentation (space and tabs) ([#12740](https://github.com/astral-sh/ruff/pull/12740))
- \[`pydoclint`\] Add `docstring-missing-yields` (`DOC402`) and `docstring-extraneous-yields` (`DOC403`) ([#12538](https://github.com/astral-sh/ruff/pull/12538))
- \[`pydoclint`\] Avoid `DOC201` if docstring begins with "Return", "Returns", "Yield", or "Yields" ([#12675](https://github.com/astral-sh/ruff/pull/12675))
- \[`pydoclint`\] Deduplicate collected exceptions after traversing function bodies (`DOC501`) ([#12642](https://github.com/astral-sh/ruff/pull/12642))
- \[`pydoclint`\] Ignore `DOC` errors for stub functions ([#12651](https://github.com/astral-sh/ruff/pull/12651))
- \[`pydoclint`\] Teach rules to understand reraised exceptions as being explicitly raised (`DOC501`, `DOC502`) ([#12639](https://github.com/astral-sh/ruff/pull/12639))
- \[`ruff`\] Implement `incorrectly-parenthesized-tuple-in-subscript` (`RUF031`) ([#12480](https://github.com/astral-sh/ruff/pull/12480))
- \[`ruff`\] Mark `RUF023` fix as unsafe if `__slots__` is not a set and the binding is used elsewhere ([#12692](https://github.com/astral-sh/ruff/pull/12692))
### Rule changes
- \[`refurb`\] Add autofix for `implicit-cwd` (`FURB177`) ([#12708](https://github.com/astral-sh/ruff/pull/12708))
- \[`ruff`\] Add autofix for `zip-instead-of-pairwise` (`RUF007`) ([#12663](https://github.com/astral-sh/ruff/pull/12663))
- \[`tryceratops`\] Add `BaseException` to `raise-vanilla-class` rule (`TRY002`) ([#12620](https://github.com/astral-sh/ruff/pull/12620))
### Server
- Ignore non-file workspace URL; Ruff will display a warning notification in this case ([#12725](https://github.com/astral-sh/ruff/pull/12725))
### CLI
- Fix cache invalidation for nested `pyproject.toml` files ([#12727](https://github.com/astral-sh/ruff/pull/12727))
### Bug fixes
- \[`flake8-async`\] Fix false positives with multiple `async with` items (`ASYNC100`) ([#12643](https://github.com/astral-sh/ruff/pull/12643))
- \[`flake8-bandit`\] Avoid false-positives for list concatenations in SQL construction (`S608`) ([#12720](https://github.com/astral-sh/ruff/pull/12720))
- \[`flake8-bugbear`\] Treat `return` as equivalent to `break` (`B909`) ([#12646](https://github.com/astral-sh/ruff/pull/12646))
- \[`flake8-comprehensions`\] Set comprehensions not a violation for `sum` in `unnecessary-comprehension-in-call` (`C419`) ([#12691](https://github.com/astral-sh/ruff/pull/12691))
- \[`flake8-simplify`\] Parenthesize conditions based on precedence when merging if arms (`SIM114`) ([#12737](https://github.com/astral-sh/ruff/pull/12737))
- \[`pydoclint`\] Try both 'Raises' section styles when convention is unspecified (`DOC501`) ([#12649](https://github.com/astral-sh/ruff/pull/12649))
## 0.5.6
Ruff 0.5.6 automatically enables linting and formatting of notebooks in *preview mode*.
You can opt-out of this behavior by adding `*.ipynb` to the `extend-exclude` setting.
```toml
[tool.ruff]
extend-exclude = ["*.ipynb"]
```
### Preview features
- Enable notebooks by default in preview mode ([#12621](https://github.com/astral-sh/ruff/pull/12621))
- \[`flake8-builtins`\] Implement import, lambda, and module shadowing ([#12546](https://github.com/astral-sh/ruff/pull/12546))
- \[`pydoclint`\] Add `docstring-missing-returns` (`DOC201`) and `docstring-extraneous-returns` (`DOC202`) ([#12485](https://github.com/astral-sh/ruff/pull/12485))
### Rule changes
- \[`flake8-return`\] Exempt cached properties and other property-like decorators from explicit return rule (`RET501`) ([#12563](https://github.com/astral-sh/ruff/pull/12563))
### Server
- Make server panic hook more error resilient ([#12610](https://github.com/astral-sh/ruff/pull/12610))
- Use `$/logTrace` for server trace logs in Zed and VS Code ([#12564](https://github.com/astral-sh/ruff/pull/12564))
- Keep track of deleted cells for reorder change request ([#12575](https://github.com/astral-sh/ruff/pull/12575))
### Configuration
- \[`flake8-implicit-str-concat`\] Always allow explicit multi-line concatenations when implicit concatenations are banned ([#12532](https://github.com/astral-sh/ruff/pull/12532))
### Bug fixes
- \[`flake8-async`\] Avoid flagging `asyncio.timeout`s as unused when the context manager includes `asyncio.TaskGroup` ([#12605](https://github.com/astral-sh/ruff/pull/12605))
- \[`flake8-slots`\] Avoid recommending `__slots__` for classes that inherit from more than `namedtuple` ([#12531](https://github.com/astral-sh/ruff/pull/12531))
- \[`isort`\] Avoid marking required imports as unused ([#12537](https://github.com/astral-sh/ruff/pull/12537))
- \[`isort`\] Preserve trailing inline comments on import-from statements ([#12498](https://github.com/astral-sh/ruff/pull/12498))
- \[`pycodestyle`\] Add newlines before comments (`E305`) ([#12606](https://github.com/astral-sh/ruff/pull/12606))
- \[`pycodestyle`\] Don't attach comments with mismatched indents ([#12604](https://github.com/astral-sh/ruff/pull/12604))
- \[`pyflakes`\] Fix preview-mode bugs in `F401` when attempting to autofix unused first-party submodule imports in an `__init__.py` file ([#12569](https://github.com/astral-sh/ruff/pull/12569))
- \[`pylint`\] Respect start index in `unnecessary-list-index-lookup` ([#12603](https://github.com/astral-sh/ruff/pull/12603))
- \[`pyupgrade`\] Avoid recommending no-argument super in `slots=True` dataclasses ([#12530](https://github.com/astral-sh/ruff/pull/12530))
- \[`pyupgrade`\] Use colon rather than dot formatting for integer-only types ([#12534](https://github.com/astral-sh/ruff/pull/12534))
- Fix NFKC normalization bug when removing unused imports ([#12571](https://github.com/astral-sh/ruff/pull/12571))
### Other changes
- Consider more stdlib decorators to be property-like ([#12583](https://github.com/astral-sh/ruff/pull/12583))
- Improve handling of metaclasses in various linter rules ([#12579](https://github.com/astral-sh/ruff/pull/12579))
- Improve consistency between linter rules in determining whether a function is property ([#12581](https://github.com/astral-sh/ruff/pull/12581))
## 0.5.5
### Preview features
- \[`fastapi`\] Implement `fastapi-redundant-response-model` (`FAST001`) and `fastapi-non-annotated-dependency`(`FAST002`) ([#11579](https://github.com/astral-sh/ruff/pull/11579))
- \[`pydoclint`\] Implement `docstring-missing-exception` (`DOC501`) and `docstring-extraneous-exception` (`DOC502`) ([#11471](https://github.com/astral-sh/ruff/pull/11471))
### Rule changes
- \[`numpy`\] Fix NumPy 2.0 rule for `np.alltrue` and `np.sometrue` ([#12473](https://github.com/astral-sh/ruff/pull/12473))
- \[`numpy`\] Ignore `NPY201` inside `except` blocks for compatibility with older numpy versions ([#12490](https://github.com/astral-sh/ruff/pull/12490))
- \[`pep8-naming`\] Avoid applying `ignore-names` to `self` and `cls` function names (`N804`, `N805`) ([#12497](https://github.com/astral-sh/ruff/pull/12497))
### Formatter
- Fix incorrect placement of leading function comment with type params ([#12447](https://github.com/astral-sh/ruff/pull/12447))
### Server
- Do not bail code action resolution when a quick fix is requested ([#12462](https://github.com/astral-sh/ruff/pull/12462))
### Bug fixes
- Fix `Ord` implementation of `cmp_fix` ([#12471](https://github.com/astral-sh/ruff/pull/12471))
- Raise syntax error for unparenthesized generator expression in multi-argument call ([#12445](https://github.com/astral-sh/ruff/pull/12445))
- \[`pydoclint`\] Fix panic in `DOC501` reported in [#12428](https://github.com/astral-sh/ruff/pull/12428) ([#12435](https://github.com/astral-sh/ruff/pull/12435))
- \[`flake8-bugbear`\] Allow singleton tuples with starred expressions in `B013` ([#12484](https://github.com/astral-sh/ruff/pull/12484))
### Documentation
- Add Eglot setup guide for Emacs editor ([#12426](https://github.com/astral-sh/ruff/pull/12426))
- Add note about the breaking change in `nvim-lspconfig` ([#12507](https://github.com/astral-sh/ruff/pull/12507))
- Add note to include notebook files for native server ([#12449](https://github.com/astral-sh/ruff/pull/12449))
- Add setup docs for Zed editor ([#12501](https://github.com/astral-sh/ruff/pull/12501))
## 0.5.4
### Rule changes
- \[`ruff`\] Rename `RUF007` to `zip-instead-of-pairwise` ([#12399](https://github.com/astral-sh/ruff/pull/12399))
### Bug fixes
- \[`flake8-builtins`\] Avoid shadowing diagnostics for `@override` methods ([#12415](https://github.com/astral-sh/ruff/pull/12415))
- \[`flake8-comprehensions`\] Insert parentheses for multi-argument generators ([#12422](https://github.com/astral-sh/ruff/pull/12422))
- \[`pydocstyle`\] Handle escaped docstrings within docstring (`D301`) ([#12192](https://github.com/astral-sh/ruff/pull/12192))
### Documentation
- Fix GitHub link to Neovim setup ([#12410](https://github.com/astral-sh/ruff/pull/12410))
- Fix `output-format` default in settings reference ([#12409](https://github.com/astral-sh/ruff/pull/12409))
## 0.5.3
**Ruff 0.5.3 marks the stable release of the Ruff language server and introduces revamped
[documentation](https://docs.astral.sh/ruff/editors), including [setup guides for your editor of
choice](https://docs.astral.sh/ruff/editors/setup) and [the language server
itself](https://docs.astral.sh/ruff/editors/settings)**.
### Preview features
- Formatter: Insert empty line between suite and alternative branch after function/class definition ([#12294](https://github.com/astral-sh/ruff/pull/12294))
- \[`pyupgrade`\] Implement `unnecessary-default-type-args` (`UP043`) ([#12371](https://github.com/astral-sh/ruff/pull/12371))
### Rule changes
- \[`flake8-bugbear`\] Detect enumerate iterations in `loop-iterator-mutation` (`B909`) ([#12366](https://github.com/astral-sh/ruff/pull/12366))
- \[`flake8-bugbear`\] Remove `discard`, `remove`, and `pop` allowance for `loop-iterator-mutation` (`B909`) ([#12365](https://github.com/astral-sh/ruff/pull/12365))
- \[`pylint`\] Allow `repeated-equality-comparison` for mixed operations (`PLR1714`) ([#12369](https://github.com/astral-sh/ruff/pull/12369))
- \[`pylint`\] Ignore `self` and `cls` when counting arguments (`PLR0913`) ([#12367](https://github.com/astral-sh/ruff/pull/12367))
- \[`pylint`\] Use UTF-8 as default encoding in `unspecified-encoding` fix (`PLW1514`) ([#12370](https://github.com/astral-sh/ruff/pull/12370))
### Server
- Build settings index in parallel for the native server ([#12299](https://github.com/astral-sh/ruff/pull/12299))
- Use fallback settings when indexing the project ([#12362](https://github.com/astral-sh/ruff/pull/12362))
- Consider `--preview` flag for `server` subcommand for the linter and formatter ([#12208](https://github.com/astral-sh/ruff/pull/12208))
### Bug fixes
- \[`flake8-comprehensions`\] Allow additional arguments for `sum` and `max` comprehensions (`C419`) ([#12364](https://github.com/astral-sh/ruff/pull/12364))
- \[`pylint`\] Avoid dropping extra boolean operations in `repeated-equality-comparison` (`PLR1714`) ([#12368](https://github.com/astral-sh/ruff/pull/12368))
- \[`pylint`\] Consider expression before statement when determining binding kind (`PLR1704`) ([#12346](https://github.com/astral-sh/ruff/pull/12346))
### Documentation
- Add docs for Ruff language server ([#12344](https://github.com/astral-sh/ruff/pull/12344))
- Migrate to standalone docs repo ([#12341](https://github.com/astral-sh/ruff/pull/12341))
- Update versioning policy for editor integration ([#12375](https://github.com/astral-sh/ruff/pull/12375))
### Other changes
- Publish Wasm API to npm ([#12317](https://github.com/astral-sh/ruff/pull/12317))
## 0.5.2
### Preview features
- Use `space` separator before parenthesized expressions in comprehensions with leading comments ([#12282](https://github.com/astral-sh/ruff/pull/12282))
- \[`flake8-async`\] Update `ASYNC100` to include `anyio` and `asyncio` ([#12221](https://github.com/astral-sh/ruff/pull/12221))
- \[`flake8-async`\] Update `ASYNC109` to include `anyio` and `asyncio` ([#12236](https://github.com/astral-sh/ruff/pull/12236))
- \[`flake8-async`\] Update `ASYNC110` to include `anyio` and `asyncio` ([#12261](https://github.com/astral-sh/ruff/pull/12261))
- \[`flake8-async`\] Update `ASYNC115` to include `anyio` and `asyncio` ([#12262](https://github.com/astral-sh/ruff/pull/12262))
- \[`flake8-async`\] Update `ASYNC116` to include `anyio` and `asyncio` ([#12266](https://github.com/astral-sh/ruff/pull/12266))
### Rule changes
- \[`flake8-return`\] Exempt properties from explicit return rule (`RET501`) ([#12243](https://github.com/astral-sh/ruff/pull/12243))
- \[`numpy`\] Add `np.NAN`-to-`np.nan` diagnostic ([#12292](https://github.com/astral-sh/ruff/pull/12292))
- \[`refurb`\] Make `list-reverse-copy` an unsafe fix ([#12303](https://github.com/astral-sh/ruff/pull/12303))
### Server
- Consider `include` and `extend-include` settings in native server ([#12252](https://github.com/astral-sh/ruff/pull/12252))
- Include nested configurations in settings reloading ([#12253](https://github.com/astral-sh/ruff/pull/12253))
### CLI
- Omit code frames for fixes with empty ranges ([#12304](https://github.com/astral-sh/ruff/pull/12304))
- Warn about formatter incompatibility for `D203` ([#12238](https://github.com/astral-sh/ruff/pull/12238))
### Bug fixes
- Make cache-write failures non-fatal on Windows ([#12302](https://github.com/astral-sh/ruff/pull/12302))
- Treat `not` operations as boolean tests ([#12301](https://github.com/astral-sh/ruff/pull/12301))
- \[`flake8-bandit`\] Avoid `S310` violations for HTTP-safe f-strings ([#12305](https://github.com/astral-sh/ruff/pull/12305))
- \[`flake8-bandit`\] Support explicit string concatenations in S310 HTTP detection ([#12315](https://github.com/astral-sh/ruff/pull/12315))
- \[`flake8-bandit`\] fix S113 false positive for httpx without `timeout` argument ([#12213](https://github.com/astral-sh/ruff/pull/12213))
- \[`pycodestyle`\] Remove "non-obvious" allowance for E721 ([#12300](https://github.com/astral-sh/ruff/pull/12300))
- \[`pyflakes`\] Consider `with` blocks as single-item branches for redefinition analysis ([#12311](https://github.com/astral-sh/ruff/pull/12311))
- \[`refurb`\] Restrict forwarding for `newline` argument in `open()` calls to Python versions >= 3.10 ([#12244](https://github.com/astral-sh/ruff/pull/12244))
### Documentation
- Update help and documentation to reflect `--output-format full` default ([#12248](https://github.com/astral-sh/ruff/pull/12248))
### Performance
- Use more threads when discovering Python files ([#12258](https://github.com/astral-sh/ruff/pull/12258))
## 0.5.1
### Preview features
- \[`flake8-bugbear`\] Implement mutable-contextvar-default (B039) ([#12113](https://github.com/astral-sh/ruff/pull/12113))
- \[`pycodestyle`\] Whitespace after decorator (`E204`) ([#12140](https://github.com/astral-sh/ruff/pull/12140))
- \[`pytest`\] Reverse `PT001` and `PT0023` defaults ([#12106](https://github.com/astral-sh/ruff/pull/12106))
### Rule changes
- Enable token-based rules on source with syntax errors ([#11950](https://github.com/astral-sh/ruff/pull/11950))
- \[`flake8-bandit`\] Detect `httpx` for `S113` ([#12174](https://github.com/astral-sh/ruff/pull/12174))
- \[`numpy`\] Update `NPY201` to include exception deprecations ([#12065](https://github.com/astral-sh/ruff/pull/12065))
- \[`pylint`\] Generate autofix for `duplicate-bases` (`PLE0241`) ([#12105](https://github.com/astral-sh/ruff/pull/12105))
### Server
- Avoid syntax error notification for source code actions ([#12148](https://github.com/astral-sh/ruff/pull/12148))
- Consider the content of the new cells during notebook sync ([#12203](https://github.com/astral-sh/ruff/pull/12203))
- Fix replacement edit range computation ([#12171](https://github.com/astral-sh/ruff/pull/12171))
### Bug fixes
- Disable auto-fix when source has syntax errors ([#12134](https://github.com/astral-sh/ruff/pull/12134))
- Fix cache key collisions for paths with separators ([#12159](https://github.com/astral-sh/ruff/pull/12159))
- Make `requires-python` inference robust to `==` ([#12091](https://github.com/astral-sh/ruff/pull/12091))
- Use char-wise width instead of `str`-width ([#12135](https://github.com/astral-sh/ruff/pull/12135))
- \[`pycodestyle`\] Avoid `E275` if keyword followed by comma ([#12136](https://github.com/astral-sh/ruff/pull/12136))
- \[`pycodestyle`\] Avoid `E275` if keyword is followed by a semicolon ([#12095](https://github.com/astral-sh/ruff/pull/12095))
- \[`pylint`\] Skip [dummy variables](https://docs.astral.sh/ruff/settings/#lint_dummy-variable-rgx) for `PLR1704` ([#12190](https://github.com/astral-sh/ruff/pull/12190))
### Performance
- Remove allocation in `parse_identifier` ([#12103](https://github.com/astral-sh/ruff/pull/12103))
- Use `CompactString` for `Identifier` AST node ([#12101](https://github.com/astral-sh/ruff/pull/12101))
## 0.5.0
Check out the [blog post](https://astral.sh/blog/ruff-v0.5.0) for a migration guide and overview of the changes!
@@ -12,6 +276,7 @@ See also, the "Remapped rules" section which may result in disabled rules.
- Selecting `ALL` now excludes deprecated rules
- The released archives now include an extra level of nesting, which can be removed with `--strip-components=1` when untarring.
- The release artifact's file name no longer includes the version tag. This enables users to install via `/latest` URLs on GitHub.
- The diagnostic ranges for some `flake8-bandit` rules were modified ([#10667](https://github.com/astral-sh/ruff/pull/10667)).
### Deprecations
@@ -55,7 +320,7 @@ The following rules have been stabilized and are no longer in preview:
- [`bad-open-mode`](https://docs.astral.sh/ruff/rules/bad-open-mode/) (`PLW1501`)
- [`empty-comment`](https://docs.astral.sh/ruff/rules/empty-comment/) (`PLR2044`)
- [`global-at-module-level`](https://docs.astral.sh/ruff/rules/global-at-module-level/) (`PLW0604`)
- [`misplaced-bare-raise`](https://docs.astral.sh/ruff/rules/misplaced-bare-raise%60/) (`PLE0744`)
- [`misplaced-bare-raise`](https://docs.astral.sh/ruff/rules/misplaced-bare-raise/) (`PLE0744`)
- [`non-ascii-import-name`](https://docs.astral.sh/ruff/rules/non-ascii-import-name/) (`PLC2403`)
- [`non-ascii-name`](https://docs.astral.sh/ruff/rules/non-ascii-name/) (`PLC2401`)
- [`nonlocal-and-global`](https://docs.astral.sh/ruff/rules/nonlocal-and-global/) (`PLE0115`)
@@ -107,7 +372,7 @@ The following deprecated CLI commands have been removed:
### Rule changes
- \[`ruff`\] Fix false positives if `gettext` is imported using an alias (`RUF027`) ([#12025](https://github.com/astral-sh/ruff/pull/12025))
- \[`npy`\] Update `trapz` and `in1d` deprecation (`NPY201`) ([#11948](https://github.com/astral-sh/ruff/pull/11948))
- \[`numpy`\] Update `trapz` and `in1d` deprecation (`NPY201`) ([#11948](https://github.com/astral-sh/ruff/pull/11948))
- \[`flake8-bandit`\] Modify diagnostic ranges for shell-related rules ([#10667](https://github.com/astral-sh/ruff/pull/10667))
### Server

View File

@@ -280,7 +280,7 @@ These represent, respectively: the schema used to parse the `pyproject.toml` fil
intermediate representation; and the final, internal representation used to power Ruff.
To add a new configuration option, you'll likely want to modify these latter few files (along with
`arg.rs`, if appropriate). If you want to pattern-match against an existing example, grep for
`args.rs`, if appropriate). If you want to pattern-match against an existing example, grep for
`dummy_variable_rgx`, which defines a regular expression to match against acceptable unused
variables (e.g., `_`).
@@ -333,7 +333,7 @@ even patch releases may contain [non-backwards-compatible changes](https://semve
### Creating a new release
1. Install `uv`: `curl -LsSf https://astral.sh/uv/install.sh | sh`
1. Run `./scripts/release/bump.sh`; this command will:
1. Run `./scripts/release.sh`; this command will:
- Generate a temporary virtual environment with `rooster`
- Generate a changelog entry in `CHANGELOG.md`
- Update versions in `pyproject.toml` and `Cargo.toml`
@@ -346,7 +346,7 @@ even patch releases may contain [non-backwards-compatible changes](https://semve
1. Run `cargo check`. This should update the lock file with new versions.
1. Create a pull request with the changelog and version updates
1. Merge the PR
1. Run the [release workflow](https://github.com/astral-sh/ruff/actions/workflows/release.yaml) with:
1. Run the [release workflow](https://github.com/astral-sh/ruff/actions/workflows/release.yml) with:
- The new version number (without starting `v`)
1. The release workflow will do the following:
1. Build all the assets. If this fails (even though we tested in step 4), we haven't tagged or
@@ -905,7 +905,7 @@ There are three ways in which an import can be categorized as "first-party":
package (e.g., `from foo import bar` or `import foo.bar`), they'll be classified as first-party
automatically. This check is as simple as comparing the first segment of the current file's
module path to the first segment of the import.
1. **Source roots**: Ruff supports a `[src](https://docs.astral.sh/ruff/settings/#src)` setting, which
1. **Source roots**: Ruff supports a [`src`](https://docs.astral.sh/ruff/settings/#src) setting, which
sets the directories to scan when identifying first-party imports. The algorithm is
straightforward: given an import, like `import foo`, iterate over the directories enumerated in
the `src` setting and, for each directory, check for the existence of a subdirectory `foo` or a

532
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -4,7 +4,7 @@ resolver = "2"
[workspace.package]
edition = "2021"
rust-version = "1.75"
rust-version = "1.76"
homepage = "https://docs.astral.sh/ruff"
documentation = "https://docs.astral.sh/ruff"
repository = "https://github.com/astral-sh/ruff"
@@ -35,7 +35,9 @@ ruff_source_file = { path = "crates/ruff_source_file" }
ruff_text_size = { path = "crates/ruff_text_size" }
ruff_workspace = { path = "crates/ruff_workspace" }
red_knot_module_resolver = { path = "crates/red_knot_module_resolver" }
red_knot_python_semantic = { path = "crates/red_knot_python_semantic" }
red_knot_server = { path = "crates/red_knot_server" }
red_knot_workspace = { path = "crates/red_knot_workspace" }
aho-corasick = { version = "1.1.3" }
annotate-snippets = { version = "0.9.2", features = ["color"] }
@@ -48,16 +50,17 @@ cachedir = { version = "0.3.1" }
camino = { version = "1.1.7" }
chrono = { version = "0.4.35", default-features = false, features = ["clock"] }
clap = { version = "4.5.3", features = ["derive"] }
clap_complete_command = { version = "0.5.1" }
clap_complete_command = { version = "0.6.0" }
clearscreen = { version = "3.0.0" }
codspeed-criterion-compat = { version = "2.6.0", default-features = false }
colored = { version = "2.1.0" }
console_error_panic_hook = { version = "0.1.7" }
console_log = { version = "1.0.0" }
countme = { version = "3.0.1" }
compact_str = "0.8.0"
criterion = { version = "0.5.1", default-features = false }
crossbeam = { version = "0.8.4" }
dashmap = { version = "5.5.3" }
dashmap = { version = "6.0.1" }
drop_bomb = { version = "0.1.5" }
env_logger = { version = "0.11.0" }
etcetera = { version = "0.8.0" }
@@ -69,7 +72,6 @@ hashbrown = "0.14.3"
ignore = { version = "0.4.22" }
imara-diff = { version = "0.1.5" }
imperative = { version = "1.0.4" }
indexmap = { version = "2.2.6" }
indicatif = { version = "0.17.8" }
indoc = { version = "2.0.4" }
insta = { version = "1.35.1" }
@@ -92,10 +94,10 @@ mimalloc = { version = "0.1.39" }
natord = { version = "1.0.9" }
notify = { version = "6.1.1" }
once_cell = { version = "1.19.0" }
ordermap = { version = "0.5.0" }
path-absolutize = { version = "3.1.1" }
path-slash = { version = "0.2.1" }
pathdiff = { version = "0.2.1" }
parking_lot = "0.12.1"
pep440_rs = { version = "0.6.0", features = ["serde"] }
pretty_assertions = "1.3.0"
proc-macro2 = { version = "1.0.79" }
@@ -106,7 +108,7 @@ rand = { version = "0.8.5" }
rayon = { version = "1.10.0" }
regex = { version = "1.10.2" }
rustc-hash = { version = "2.0.0" }
salsa = { git = "https://github.com/salsa-rs/salsa.git", rev = "f706aa2d32d473ee633a77c1af01d180c85da308" }
salsa = { git = "https://github.com/MichaReiser/salsa.git", tag = "red-knot-0.0.1" }
schemars = { version = "0.8.16" }
seahash = { version = "4.1.0" }
serde = { version = "1.0.197", features = ["derive"] }
@@ -119,7 +121,6 @@ serde_with = { version = "3.6.0", default-features = false, features = [
shellexpand = { version = "3.0.0" }
similar = { version = "2.4.0", features = ["inline"] }
smallvec = { version = "1.13.2" }
smol_str = { version = "0.2.2" }
static_assertions = "1.1.0"
strum = { version = "0.26.0", features = ["strum_macros"] }
strum_macros = { version = "0.26.0" }
@@ -127,12 +128,13 @@ syn = { version = "2.0.55" }
tempfile = { version = "3.9.0" }
test-case = { version = "3.3.1" }
thiserror = { version = "1.0.58" }
tikv-jemallocator = { version = "0.5.0" }
tikv-jemallocator = { version = "0.6.0" }
toml = { version = "0.8.11" }
tracing = { version = "0.1.40" }
tracing-flame = { version = "0.2.0" }
tracing-indicatif = { version = "0.3.6" }
tracing-subscriber = { version = "0.3.18", features = ["env-filter"] }
tracing-tree = { version = "0.3.0" }
tracing-subscriber = { version = "0.3.18", default-features = false, features = ["env-filter", "fmt"] }
tracing-tree = { version = "0.4.0" }
typed-arena = { version = "2.0.2" }
unic-ucd-category = { version = "0.9" }
unicode-ident = { version = "1.0.12" }
@@ -151,11 +153,12 @@ walkdir = { version = "2.3.2" }
wasm-bindgen = { version = "0.2.92" }
wasm-bindgen-test = { version = "0.3.42" }
wild = { version = "2" }
zip = { version = "0.6.6", default-features = false, features = ["zstd"] }
zip = { version = "0.6.6", default-features = false }
[workspace.lints.rust]
unsafe_code = "warn"
unreachable_pub = "warn"
unexpected_cfgs = { level = "warn", check-cfg = ["cfg(fuzzing)", "cfg(codspeed)"] }
[workspace.lints.clippy]
pedantic = { level = "warn", priority = -2 }
@@ -227,7 +230,7 @@ inherits = "release"
# Config for 'cargo dist'
[workspace.metadata.dist]
# The preferred cargo-dist version to use in CI (Cargo.toml SemVer syntax)
cargo-dist-version = "0.14.0"
cargo-dist-version = "0.18.0"
# CI backends to support
ci = ["github"]
# The installers to generate for each app
@@ -258,21 +261,23 @@ targets = [
]
# Whether to auto-include files like READMEs, LICENSEs, and CHANGELOGs (default true)
auto-includes = false
# Whether cargo-dist should create a Github Release or use an existing draft
# Whether cargo-dist should create a GitHub Release or use an existing draft
create-release = true
# Publish jobs to run in CI
pr-run-mode = "skip"
# Whether CI should trigger releases with dispatches instead of tag pushes
dispatch-releases = true
# The stage during which the GitHub Release should be created
github-release = "announce"
# Whether CI should include auto-generated code to build local artifacts
build-local-artifacts = false
# Local artifacts jobs to run in CI
local-artifacts-jobs = ["./build-binaries", "./build-docker"]
# Publish jobs to run in CI
publish-jobs = ["./publish-pypi"]
publish-jobs = ["./publish-pypi", "./publish-wasm"]
# Announcement jobs to run in CI
post-announce-jobs = ["./notify-dependents"]
# Skip checking whether the specified configuration files are up to date
allow-dirty = ["ci"]
post-announce-jobs = ["./notify-dependents", "./publish-docs", "./publish-playground"]
# Custom permissions for GitHub Jobs
github-custom-job-permissions = { "build-docker" = { packages = "write", contents = "read" }, "publish-wasm" = { contents = "read", id-token = "write", packages = "write" } }
# Whether to install an updater program
install-updater = false

25
LICENSE
View File

@@ -1371,3 +1371,28 @@ are:
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
"""
- pydoclint, licensed as follows:
"""
MIT License
Copyright (c) 2023 jsh9
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
"""

View File

@@ -29,14 +29,14 @@ An extremely fast Python linter and code formatter, written in Rust.
- 🐍 Installable via `pip`
- 🛠️ `pyproject.toml` support
- 🤝 Python 3.13 compatibility
- ⚖️ Drop-in parity with [Flake8](https://docs.astral.sh/ruff/faq/#how-does-ruff-compare-to-flake8), isort, and Black
- ⚖️ Drop-in parity with [Flake8](https://docs.astral.sh/ruff/faq/#how-does-ruffs-linter-compare-to-flake8), isort, and [Black](https://docs.astral.sh/ruff/faq/#how-does-ruffs-formatter-compare-to-black)
- 📦 Built-in caching, to avoid re-analyzing unchanged files
- 🔧 Fix support, for automatic error correction (e.g., automatically remove unused imports)
- 📏 Over [800 built-in rules](https://docs.astral.sh/ruff/rules/), with native re-implementations
of popular Flake8 plugins, like flake8-bugbear
- ⌨️ First-party [editor integrations](https://docs.astral.sh/ruff/integrations/) for
[VS Code](https://github.com/astral-sh/ruff-vscode) and [more](https://github.com/astral-sh/ruff-lsp)
- 🌎 Monorepo-friendly, with [hierarchical and cascading configuration](https://docs.astral.sh/ruff/configuration/#pyprojecttoml-discovery)
[VS Code](https://github.com/astral-sh/ruff-vscode) and [more](https://docs.astral.sh/ruff/editors/setup)
- 🌎 Monorepo-friendly, with [hierarchical and cascading configuration](https://docs.astral.sh/ruff/configuration/#config-file-discovery)
Ruff aims to be orders of magnitude faster than alternative tools while integrating more
functionality behind a single, common interface.
@@ -119,7 +119,25 @@ For more, see the [documentation](https://docs.astral.sh/ruff/).
Ruff is available as [`ruff`](https://pypi.org/project/ruff/) on PyPI:
```shell
# With pip.
pip install ruff
# With pipx.
pipx install ruff
```
Starting with version `0.5.0`, Ruff can be installed with our standalone installers:
```shell
# On macOS and Linux.
curl -LsSf https://astral.sh/ruff/install.sh | sh
# On Windows.
powershell -c "irm https://astral.sh/ruff/install.ps1 | iex"
# For a specific version.
curl -LsSf https://astral.sh/ruff/0.5.7/install.sh | sh
powershell -c "irm https://astral.sh/ruff/0.5.7/install.ps1 | iex"
```
You can also install Ruff via [Homebrew](https://formulae.brew.sh/formula/ruff), [Conda](https://anaconda.org/conda-forge/ruff),
@@ -152,7 +170,7 @@ Ruff can also be used as a [pre-commit](https://pre-commit.com/) hook via [`ruff
```yaml
- repo: https://github.com/astral-sh/ruff-pre-commit
# Ruff version.
rev: v0.5.0
rev: v0.5.7
hooks:
# Run the linter.
- id: ruff
@@ -161,8 +179,7 @@ Ruff can also be used as a [pre-commit](https://pre-commit.com/) hook via [`ruff
- id: ruff-format
```
Ruff can also be used as a [VS Code extension](https://github.com/astral-sh/ruff-vscode) or
alongside any other editor through the [Ruff LSP](https://github.com/astral-sh/ruff-lsp).
Ruff can also be used as a [VS Code extension](https://github.com/astral-sh/ruff-vscode) or with [various other editors](https://docs.astral.sh/ruff/editors/setup).
Ruff can also be used as a [GitHub Action](https://github.com/features/actions) via
[`ruff-action`](https://github.com/chartboost/ruff-action):
@@ -406,6 +423,7 @@ Ruff is used by a number of major open-source projects and companies, including:
- [Dagger](https://github.com/dagger/dagger)
- [Dagster](https://github.com/dagster-io/dagster)
- Databricks ([MLflow](https://github.com/mlflow/mlflow))
- [Dify](https://github.com/langgenius/dify)
- [FastAPI](https://github.com/tiangolo/fastapi)
- [Godot](https://github.com/godotengine/godot)
- [Gradio](https://github.com/gradio-app/gradio)
@@ -416,6 +434,7 @@ Ruff is used by a number of major open-source projects and companies, including:
- Hugging Face ([Transformers](https://github.com/huggingface/transformers),
[Datasets](https://github.com/huggingface/datasets),
[Diffusers](https://github.com/huggingface/diffusers))
- IBM ([Qiskit](https://github.com/Qiskit/qiskit))
- ING Bank ([popmon](https://github.com/ing-bank/popmon), [probatus](https://github.com/ing-bank/probatus))
- [Ibis](https://github.com/ibis-project/ibis)
- [ivy](https://github.com/unifyai/ivy)

View File

@@ -1,6 +1,6 @@
[files]
# https://github.com/crate-ci/typos/issues/868
extend-exclude = ["crates/red_knot_module_resolver/vendor/**/*", "**/resources/**/*", "**/snapshots/**/*"]
extend-exclude = ["crates/red_knot_python_semantic/vendor/**/*", "**/resources/**/*", "**/snapshots/**/*"]
[default.extend-words]
"arange" = "arange" # e.g. `numpy.arange`

View File

@@ -10,4 +10,12 @@ doc-valid-idents = [
"SCREAMING_SNAKE_CASE",
"SQLAlchemy",
"StackOverflow",
"PyCharm",
]
ignore-interior-mutability = [
# Interned is read-only. The wrapped `Rc` never gets updated.
"ruff_formatter::format_element::Interned",
# The expression is read-only.
"ruff_python_ast::hashable::HashableExpr",
]

View File

@@ -1,6 +1,6 @@
[package]
name = "red_knot"
version = "0.1.0"
version = "0.0.0"
edition.workspace = true
rust-version.workspace = true
homepage.workspace = true
@@ -12,32 +12,28 @@ license.workspace = true
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
red_knot_module_resolver = { workspace = true }
red_knot_python_semantic = { workspace = true }
red_knot_workspace = { workspace = true }
red_knot_server = { workspace = true }
ruff_python_parser = { workspace = true }
ruff_python_ast = { workspace = true }
ruff_text_size = { workspace = true }
ruff_index = { workspace = true }
ruff_notebook = { workspace = true }
ruff_db = { workspace = true, features = ["os", "cache"] }
anyhow = { workspace = true }
bitflags = { workspace = true }
chrono = { workspace = true }
clap = { workspace = true, features = ["wrap_help"] }
colored = { workspace = true }
countme = { workspace = true, features = ["enable"] }
crossbeam = { workspace = true }
ctrlc = { version = "3.4.4" }
dashmap = { workspace = true }
hashbrown = { workspace = true }
indexmap = { workspace = true }
is-macro = { workspace = true }
notify = { workspace = true }
parking_lot = { workspace = true }
rayon = { workspace = true }
rustc-hash = { workspace = true }
smol_str = { version = "0.2.1" }
tracing = { workspace = true }
tracing-subscriber = { workspace = true }
salsa = { workspace = true }
tracing = { workspace = true, features = ["release_max_level_debug"] }
tracing-subscriber = { workspace = true, features = ["env-filter", "fmt"] }
tracing-flame = { workspace = true }
tracing-tree = { workspace = true }
[dev-dependencies]
filetime = { workspace = true }
tempfile = { workspace = true }
[lints]

Binary file not shown.

After

Width:  |  Height:  |  Size: 40 KiB

View File

@@ -0,0 +1,123 @@
# Tracing
Traces are a useful tool to narrow down the location of a bug or, at least, to understand why the compiler is doing a particular thing.
Note, tracing messages with severity `debug` or greater are user-facing. They should be phrased accordingly.
Tracing spans are only shown when using `-vvv`.
## Verbosity levels
The CLI supports different verbosity levels.
- default: Only show errors and warnings.
- `-v` activates `info!`: Show generally useful information such as paths of configuration files, detected platform, etc., but it's not a lot of messages, it's something you'll activate in CI by default. cargo build e.g. shows you which packages are fresh.
- `-vv` activates `debug!` and timestamps: This should be enough information to get to the bottom of bug reports. When you're processing many packages or files, you'll get pages and pages of output, but each line is link to a specific action or state change.
- `-vvv` activates `trace!` (only in debug builds) and shows tracing-spans: At this level, you're logging everything. Most of this is wasted, it's really slow, we dump e.g. the entire resolution graph. Only useful to developers, and you almost certainly want to use `RED_KNOT_LOG` to filter it down to the area your investigating.
## `RED_KNOT_LOG`
By default, the CLI shows messages from the `ruff` and `red_knot` crates. Tracing messages from other crates are not shown.
The `RED_KNOT_LOG` environment variable allows you to customize which messages are shown by specifying one
or more [filter directives](https://docs.rs/tracing-subscriber/latest/tracing_subscriber/filter/struct.EnvFilter.html#directives).
### Examples
#### Show all debug messages
Shows debug messages from all crates.
```bash
RED_KNOT_LOG=debug
```
#### Show salsa query execution messages
Show the salsa `execute: my_query` messages in addition to all red knot messages.
```bash
RED_KNOT_LOG=ruff=trace,red_knot=trace,salsa=info
```
#### Show typing traces
Only show traces for the `red_knot_python_semantic::types` module.
```bash
RED_KNOT_LOG="red_knot_python_semantic::types"
```
Note: Ensure that you use `-vvv` to see tracing spans.
#### Show messages for a single file
Shows all messages that are inside of a span for a specific file.
```bash
RED_KNOT_LOG=red_knot[{file=/home/micha/astral/test/x.py}]=trace
```
**Note**: Tracing still shows all spans because tracing can't know at the time of entering the span
whether one if its children has the file `x.py`.
**Note**: Salsa currently logs the entire memoized values. In our case, the source text and parsed AST.
This very quickly leads to extremely long outputs.
## Tracing and Salsa
Be mindful about using `tracing` in Salsa queries, especially when using `warn` or `error` because it isn't guaranteed
that the query will execute after restoring from a persistent cache. In which case the user won't see the message.
For example, don't use `tracing` to show the user a message when generating a lint violation failed
because the message would only be shown when linting the file the first time, but not on subsequent analysis
runs or when restoring from a persistent cache. This can be confusing for users because they
don't understand why a specific lint violation isn't raised. Instead, change your
query to return the failure as part of the query's result or use a Salsa accumulator.
## Tracing in tests
You can use `ruff_db::testing::setup_logging` or `ruff_db::testing::setup_logging_with_filter` to set up logging in tests.
```rust
use ruff_db::testing::setup_logging;
#[test]
fn test() {
let _logging = setup_logging();
tracing::info!("This message will be printed to stderr");
}
```
Note: Most test runners capture stderr and only show its output when a test fails.
Note also that `setup_logging` only sets up logging for the current thread because [`set_global_default`](https://docs.rs/tracing/latest/tracing/subscriber/fn.set_global_default.html) can only be
called **once**.
## Release builds
`trace!` events are removed in release builds.
## Profiling
Red Knot generates a folded stack trace to the current directory named `tracing.folded` when setting the environment variable `RED_KNOT_LOG_PROFILE` to `1` or `true`.
```bash
RED_KNOT_LOG_PROFILE=1 red_knot -- --current-directory=../test -vvv
```
You can convert the textual representation into a visual one using `inferno`.
```shell
cargo install inferno
```
```shell
# flamegraph
cat tracing.folded | inferno-flamegraph > tracing-flamegraph.svg
# flamechart
cat tracing.folded | inferno-flamegraph --flamechart > tracing-flamechart.svg
```
![Example flamegraph](./tracing-flamegraph.png)
See [`tracing-flame`](https://crates.io/crates/tracing-flame) for more details.

View File

@@ -1,418 +0,0 @@
use std::any::type_name;
use std::fmt::{Debug, Formatter};
use std::hash::{Hash, Hasher};
use std::marker::PhantomData;
use rustc_hash::FxHashMap;
use ruff_index::{Idx, IndexVec};
use ruff_python_ast::visitor::source_order;
use ruff_python_ast::visitor::source_order::{SourceOrderVisitor, TraversalSignal};
use ruff_python_ast::{
AnyNodeRef, AstNode, ExceptHandler, ExceptHandlerExceptHandler, Expr, MatchCase, ModModule,
NodeKind, Parameter, Stmt, StmtAnnAssign, StmtAssign, StmtAugAssign, StmtClassDef,
StmtFunctionDef, StmtGlobal, StmtImport, StmtImportFrom, StmtNonlocal, StmtTypeAlias,
TypeParam, TypeParamParamSpec, TypeParamTypeVar, TypeParamTypeVarTuple, WithItem,
};
use ruff_text_size::{Ranged, TextRange};
/// A type agnostic ID that uniquely identifies an AST node in a file.
#[ruff_index::newtype_index]
pub struct AstId;
/// A typed ID that uniquely identifies an AST node in a file.
///
/// This is different from [`AstId`] in that it is a combination of ID and the type of the node the ID identifies.
/// Typing the ID prevents mixing IDs of different node types and allows to restrict the API to only accept
/// nodes for which an ID has been created (not all AST nodes get an ID).
pub struct TypedAstId<N: HasAstId> {
erased: AstId,
_marker: PhantomData<fn() -> N>,
}
impl<N: HasAstId> TypedAstId<N> {
/// Upcasts this ID from a more specific node type to a more general node type.
pub fn upcast<M: HasAstId>(self) -> TypedAstId<M>
where
N: Into<M>,
{
TypedAstId {
erased: self.erased,
_marker: PhantomData,
}
}
}
impl<N: HasAstId> Copy for TypedAstId<N> {}
impl<N: HasAstId> Clone for TypedAstId<N> {
fn clone(&self) -> Self {
*self
}
}
impl<N: HasAstId> PartialEq for TypedAstId<N> {
fn eq(&self, other: &Self) -> bool {
self.erased == other.erased
}
}
impl<N: HasAstId> Eq for TypedAstId<N> {}
impl<N: HasAstId> Hash for TypedAstId<N> {
fn hash<H: Hasher>(&self, state: &mut H) {
self.erased.hash(state);
}
}
impl<N: HasAstId> Debug for TypedAstId<N> {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
f.debug_tuple("TypedAstId")
.field(&self.erased)
.field(&type_name::<N>())
.finish()
}
}
pub struct AstIds {
ids: IndexVec<AstId, NodeKey>,
reverse: FxHashMap<NodeKey, AstId>,
}
impl AstIds {
// TODO rust analyzer doesn't allocate an ID for every node. It only allocates ids for
// nodes with a corresponding HIR element, that is nodes that are definitions.
pub fn from_module(module: &ModModule) -> Self {
let mut visitor = AstIdsVisitor::default();
// TODO: visit_module?
// Make sure we visit the root
visitor.create_id(module);
visitor.visit_body(&module.body);
while let Some(deferred) = visitor.deferred.pop() {
match deferred {
DeferredNode::FunctionDefinition(def) => {
def.visit_source_order(&mut visitor);
}
DeferredNode::ClassDefinition(def) => def.visit_source_order(&mut visitor),
}
}
AstIds {
ids: visitor.ids,
reverse: visitor.reverse,
}
}
/// Returns the ID to the root node.
pub fn root(&self) -> NodeKey {
self.ids[AstId::new(0)]
}
/// Returns the [`TypedAstId`] for a node.
pub fn ast_id<N: HasAstId>(&self, node: &N) -> TypedAstId<N> {
let key = node.syntax_node_key();
TypedAstId {
erased: self.reverse.get(&key).copied().unwrap(),
_marker: PhantomData,
}
}
/// Returns the [`TypedAstId`] for the node identified with the given [`TypedNodeKey`].
pub fn ast_id_for_key<N: HasAstId>(&self, node: &TypedNodeKey<N>) -> TypedAstId<N> {
let ast_id = self.ast_id_for_node_key(node.inner);
TypedAstId {
erased: ast_id,
_marker: PhantomData,
}
}
/// Returns the untyped [`AstId`] for the node identified by the given `node` key.
pub fn ast_id_for_node_key(&self, node: NodeKey) -> AstId {
self.reverse
.get(&node)
.copied()
.expect("Can't find node in AstIds map.")
}
/// Returns the [`TypedNodeKey`] for the node identified by the given [`TypedAstId`].
pub fn key<N: HasAstId>(&self, id: TypedAstId<N>) -> TypedNodeKey<N> {
let syntax_key = self.ids[id.erased];
TypedNodeKey::new(syntax_key).unwrap()
}
pub fn node_key<H: HasAstId>(&self, id: TypedAstId<H>) -> NodeKey {
self.ids[id.erased]
}
}
impl std::fmt::Debug for AstIds {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
let mut map = f.debug_map();
for (key, value) in self.ids.iter_enumerated() {
map.entry(&key, &value);
}
map.finish()
}
}
impl PartialEq for AstIds {
fn eq(&self, other: &Self) -> bool {
self.ids == other.ids
}
}
impl Eq for AstIds {}
#[derive(Default)]
struct AstIdsVisitor<'a> {
ids: IndexVec<AstId, NodeKey>,
reverse: FxHashMap<NodeKey, AstId>,
deferred: Vec<DeferredNode<'a>>,
}
impl<'a> AstIdsVisitor<'a> {
fn create_id<A: HasAstId>(&mut self, node: &A) {
let node_key = node.syntax_node_key();
let id = self.ids.push(node_key);
self.reverse.insert(node_key, id);
}
}
impl<'a> SourceOrderVisitor<'a> for AstIdsVisitor<'a> {
fn visit_stmt(&mut self, stmt: &'a Stmt) {
match stmt {
Stmt::FunctionDef(def) => {
self.create_id(def);
self.deferred.push(DeferredNode::FunctionDefinition(def));
return;
}
// TODO defer visiting the assignment body, type alias parameters etc?
Stmt::ClassDef(def) => {
self.create_id(def);
self.deferred.push(DeferredNode::ClassDefinition(def));
return;
}
Stmt::Expr(_) => {
// Skip
return;
}
Stmt::Return(_) => {}
Stmt::Delete(_) => {}
Stmt::Assign(assignment) => self.create_id(assignment),
Stmt::AugAssign(assignment) => {
self.create_id(assignment);
}
Stmt::AnnAssign(assignment) => self.create_id(assignment),
Stmt::TypeAlias(assignment) => self.create_id(assignment),
Stmt::For(_) => {}
Stmt::While(_) => {}
Stmt::If(_) => {}
Stmt::With(_) => {}
Stmt::Match(_) => {}
Stmt::Raise(_) => {}
Stmt::Try(_) => {}
Stmt::Assert(_) => {}
Stmt::Import(import) => self.create_id(import),
Stmt::ImportFrom(import_from) => self.create_id(import_from),
Stmt::Global(global) => self.create_id(global),
Stmt::Nonlocal(non_local) => self.create_id(non_local),
Stmt::Pass(_) => {}
Stmt::Break(_) => {}
Stmt::Continue(_) => {}
Stmt::IpyEscapeCommand(_) => {}
}
source_order::walk_stmt(self, stmt);
}
fn visit_expr(&mut self, _expr: &'a Expr) {}
fn visit_parameter(&mut self, parameter: &'a Parameter) {
self.create_id(parameter);
source_order::walk_parameter(self, parameter);
}
fn visit_except_handler(&mut self, except_handler: &'a ExceptHandler) {
match except_handler {
ExceptHandler::ExceptHandler(except_handler) => {
self.create_id(except_handler);
}
}
source_order::walk_except_handler(self, except_handler);
}
fn visit_with_item(&mut self, with_item: &'a WithItem) {
self.create_id(with_item);
source_order::walk_with_item(self, with_item);
}
fn visit_match_case(&mut self, match_case: &'a MatchCase) {
self.create_id(match_case);
source_order::walk_match_case(self, match_case);
}
fn visit_type_param(&mut self, type_param: &'a TypeParam) {
self.create_id(type_param);
}
}
enum DeferredNode<'a> {
FunctionDefinition(&'a StmtFunctionDef),
ClassDefinition(&'a StmtClassDef),
}
#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)]
pub struct TypedNodeKey<N: AstNode> {
/// The type erased node key.
inner: NodeKey,
_marker: PhantomData<fn() -> N>,
}
impl<N: AstNode> TypedNodeKey<N> {
pub fn from_node(node: &N) -> Self {
let inner = NodeKey::from_node(node.as_any_node_ref());
Self {
inner,
_marker: PhantomData,
}
}
pub fn new(node_key: NodeKey) -> Option<Self> {
N::can_cast(node_key.kind).then_some(TypedNodeKey {
inner: node_key,
_marker: PhantomData,
})
}
pub fn resolve<'a>(&self, root: AnyNodeRef<'a>) -> Option<N::Ref<'a>> {
let node_ref = self.inner.resolve(root)?;
Some(N::cast_ref(node_ref).unwrap())
}
pub fn resolve_unwrap<'a>(&self, root: AnyNodeRef<'a>) -> N::Ref<'a> {
self.resolve(root).expect("node should resolve")
}
pub fn erased(&self) -> &NodeKey {
&self.inner
}
}
struct FindNodeKeyVisitor<'a> {
key: NodeKey,
result: Option<AnyNodeRef<'a>>,
}
impl<'a> SourceOrderVisitor<'a> for FindNodeKeyVisitor<'a> {
fn enter_node(&mut self, node: AnyNodeRef<'a>) -> TraversalSignal {
if self.result.is_some() {
return TraversalSignal::Skip;
}
if node.range() == self.key.range && node.kind() == self.key.kind {
self.result = Some(node);
TraversalSignal::Skip
} else if node.range().contains_range(self.key.range) {
TraversalSignal::Traverse
} else {
TraversalSignal::Skip
}
}
fn visit_body(&mut self, body: &'a [Stmt]) {
// TODO it would be more efficient to use binary search instead of linear
for stmt in body {
if stmt.range().start() > self.key.range.end() {
break;
}
self.visit_stmt(stmt);
}
}
}
// TODO an alternative to this is to have a `NodeId` on each node (in increasing order depending on the position).
// This would allow to reduce the size of this to a u32.
// What would be nice if we could use an `Arc::weak_ref` here but that only works if we use
// `Arc` internally
// TODO: Implement the logic to resolve a node, given a db (and the correct file).
#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)]
pub struct NodeKey {
kind: NodeKind,
range: TextRange,
}
impl NodeKey {
pub fn from_node(node: AnyNodeRef) -> Self {
NodeKey {
kind: node.kind(),
range: node.range(),
}
}
pub fn resolve<'a>(&self, root: AnyNodeRef<'a>) -> Option<AnyNodeRef<'a>> {
// We need to do a binary search here. Only traverse into a node if the range is withint the node
let mut visitor = FindNodeKeyVisitor {
key: *self,
result: None,
};
if visitor.enter_node(root) == TraversalSignal::Traverse {
root.visit_preorder(&mut visitor);
}
visitor.result
}
}
/// Marker trait implemented by AST nodes for which we extract the `AstId`.
pub trait HasAstId: AstNode {
fn node_key(&self) -> TypedNodeKey<Self>
where
Self: Sized,
{
TypedNodeKey {
inner: self.syntax_node_key(),
_marker: PhantomData,
}
}
fn syntax_node_key(&self) -> NodeKey {
NodeKey {
kind: self.as_any_node_ref().kind(),
range: self.range(),
}
}
}
impl HasAstId for StmtFunctionDef {}
impl HasAstId for StmtClassDef {}
impl HasAstId for StmtAnnAssign {}
impl HasAstId for StmtAugAssign {}
impl HasAstId for StmtAssign {}
impl HasAstId for StmtTypeAlias {}
impl HasAstId for ModModule {}
impl HasAstId for StmtImport {}
impl HasAstId for StmtImportFrom {}
impl HasAstId for Parameter {}
impl HasAstId for TypeParam {}
impl HasAstId for Stmt {}
impl HasAstId for TypeParamTypeVar {}
impl HasAstId for TypeParamTypeVarTuple {}
impl HasAstId for TypeParamParamSpec {}
impl HasAstId for StmtGlobal {}
impl HasAstId for StmtNonlocal {}
impl HasAstId for ExceptHandlerExceptHandler {}
impl HasAstId for WithItem {}
impl HasAstId for MatchCase {}

View File

@@ -1,165 +0,0 @@
use std::fmt::Formatter;
use std::hash::Hash;
use std::sync::atomic::{AtomicUsize, Ordering};
use crate::db::QueryResult;
use dashmap::mapref::entry::Entry;
use crate::FxDashMap;
/// Simple key value cache that locks on a per-key level.
pub struct KeyValueCache<K, V> {
map: FxDashMap<K, V>,
statistics: CacheStatistics,
}
impl<K, V> KeyValueCache<K, V>
where
K: Eq + Hash + Clone,
V: Clone,
{
pub fn try_get(&self, key: &K) -> Option<V> {
if let Some(existing) = self.map.get(key) {
self.statistics.hit();
Some(existing.clone())
} else {
self.statistics.miss();
None
}
}
pub fn get<F>(&self, key: &K, compute: F) -> QueryResult<V>
where
F: FnOnce(&K) -> QueryResult<V>,
{
Ok(match self.map.entry(key.clone()) {
Entry::Occupied(cached) => {
self.statistics.hit();
cached.get().clone()
}
Entry::Vacant(vacant) => {
self.statistics.miss();
let value = compute(key)?;
vacant.insert(value.clone());
value
}
})
}
pub fn set(&mut self, key: K, value: V) {
self.map.insert(key, value);
}
pub fn remove(&mut self, key: &K) -> Option<V> {
self.map.remove(key).map(|(_, value)| value)
}
pub fn clear(&mut self) {
self.map.clear();
self.map.shrink_to_fit();
}
pub fn statistics(&self) -> Option<Statistics> {
self.statistics.to_statistics()
}
}
impl<K, V> Default for KeyValueCache<K, V>
where
K: Eq + Hash,
V: Clone,
{
fn default() -> Self {
Self {
map: FxDashMap::default(),
statistics: CacheStatistics::default(),
}
}
}
impl<K, V> std::fmt::Debug for KeyValueCache<K, V>
where
K: std::fmt::Debug + Eq + Hash,
V: std::fmt::Debug,
{
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
let mut debug = f.debug_map();
for entry in &self.map {
debug.entry(&entry.value(), &entry.key());
}
debug.finish()
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Statistics {
pub hits: usize,
pub misses: usize,
}
impl Statistics {
#[allow(clippy::cast_precision_loss)]
pub fn hit_rate(&self) -> Option<f64> {
if self.hits + self.misses == 0 {
return None;
}
Some((self.hits as f64) / (self.hits + self.misses) as f64)
}
}
#[cfg(debug_assertions)]
pub type CacheStatistics = DebugStatistics;
#[cfg(not(debug_assertions))]
pub type CacheStatistics = ReleaseStatistics;
pub trait StatisticsRecorder {
fn hit(&self);
fn miss(&self);
fn to_statistics(&self) -> Option<Statistics>;
}
#[derive(Debug, Default)]
pub struct DebugStatistics {
hits: AtomicUsize,
misses: AtomicUsize,
}
impl StatisticsRecorder for DebugStatistics {
// TODO figure out appropriate Ordering
fn hit(&self) {
self.hits.fetch_add(1, Ordering::SeqCst);
}
fn miss(&self) {
self.misses.fetch_add(1, Ordering::SeqCst);
}
fn to_statistics(&self) -> Option<Statistics> {
let hits = self.hits.load(Ordering::SeqCst);
let misses = self.misses.load(Ordering::SeqCst);
Some(Statistics { hits, misses })
}
}
#[derive(Debug, Default)]
pub struct ReleaseStatistics;
impl StatisticsRecorder for ReleaseStatistics {
#[inline]
fn hit(&self) {}
#[inline]
fn miss(&self) {}
#[inline]
fn to_statistics(&self) -> Option<Statistics> {
None
}
}

View File

@@ -1,42 +0,0 @@
use std::sync::atomic::AtomicBool;
use std::sync::Arc;
#[derive(Debug, Clone, Default)]
pub struct CancellationTokenSource {
signal: Arc<AtomicBool>,
}
impl CancellationTokenSource {
pub fn new() -> Self {
Self {
signal: Arc::new(AtomicBool::new(false)),
}
}
#[tracing::instrument(level = "trace", skip_all)]
pub fn cancel(&self) {
self.signal.store(true, std::sync::atomic::Ordering::SeqCst);
}
pub fn is_cancelled(&self) -> bool {
self.signal.load(std::sync::atomic::Ordering::SeqCst)
}
pub fn token(&self) -> CancellationToken {
CancellationToken {
signal: self.signal.clone(),
}
}
}
#[derive(Clone, Debug)]
pub struct CancellationToken {
signal: Arc<AtomicBool>,
}
impl CancellationToken {
/// Returns `true` if cancellation has been requested.
pub fn is_cancelled(&self) -> bool {
self.signal.load(std::sync::atomic::Ordering::SeqCst)
}
}

View File

@@ -1,248 +0,0 @@
use std::sync::Arc;
pub use jars::{HasJar, HasJars};
pub use query::{QueryError, QueryResult};
pub use runtime::DbRuntime;
pub use storage::JarsStorage;
use crate::files::FileId;
use crate::lint::{LintSemanticStorage, LintSyntaxStorage};
use crate::module::ModuleResolver;
use crate::parse::ParsedStorage;
use crate::semantic::SemanticIndexStorage;
use crate::semantic::TypeStore;
use crate::source::SourceStorage;
mod jars;
mod query;
mod runtime;
mod storage;
pub trait Database {
/// Returns a reference to the runtime of the current worker.
fn runtime(&self) -> &DbRuntime;
/// Returns a mutable reference to the runtime. Only one worker can hold a mutable reference to the runtime.
fn runtime_mut(&mut self) -> &mut DbRuntime;
/// Returns `Ok` if the queries have not been cancelled and `Err(QueryError::Cancelled)` otherwise.
fn cancelled(&self) -> QueryResult<()> {
self.runtime().cancelled()
}
/// Returns `true` if the queries have been cancelled.
fn is_cancelled(&self) -> bool {
self.runtime().is_cancelled()
}
}
/// Database that supports running queries from multiple threads.
pub trait ParallelDatabase: Database + Send {
/// Creates a snapshot of the database state that can be used to query the database in another thread.
///
/// The snapshot is a read-only view of the database but query results are shared between threads.
/// All queries will be automatically cancelled when applying any mutations (calling [`HasJars::jars_mut`])
/// to the database (not the snapshot, because they're readonly).
///
/// ## Creating a snapshot
///
/// Creating a snapshot of the database's jars is cheap but creating a snapshot of
/// other state stored on the database might require deep-cloning data. That's why you should
/// avoid creating snapshots in a hot function (e.g. don't create a snapshot for each file, instead
/// create a snapshot when scheduling the check of an entire program).
///
/// ## Salsa compatibility
/// Salsa prohibits creating a snapshot while running a local query (it's fine if other workers run a query) [[source](https://github.com/salsa-rs/salsa/issues/80)].
/// We should avoid creating snapshots while running a query because we might want to adopt Salsa in the future (if we can figure out persistent caching).
/// Unfortunately, the infrastructure doesn't provide an automated way of knowing when a query is run, that's
/// why we have to "enforce" this constraint manually.
#[must_use]
fn snapshot(&self) -> Snapshot<Self>;
}
pub trait DbWithJar<Jar>: Database + HasJar<Jar> {}
/// Readonly snapshot of a database.
///
/// ## Dead locks
/// A snapshot should always be dropped as soon as it is no longer necessary to run queries.
/// Storing the snapshot without running a query or periodically checking if cancellation was requested
/// can lead to deadlocks because mutating the [`Database`] requires cancels all pending queries
/// and waiting for all [`Snapshot`]s to be dropped.
#[derive(Debug)]
pub struct Snapshot<DB: ?Sized>
where
DB: ParallelDatabase,
{
db: DB,
}
impl<DB> Snapshot<DB>
where
DB: ParallelDatabase,
{
pub fn new(db: DB) -> Self {
Snapshot { db }
}
}
impl<DB> std::ops::Deref for Snapshot<DB>
where
DB: ParallelDatabase,
{
type Target = DB;
fn deref(&self) -> &DB {
&self.db
}
}
pub trait Upcast<T: ?Sized> {
fn upcast(&self) -> &T;
}
// Red knot specific databases code.
pub trait SourceDb: DbWithJar<SourceJar> {
// queries
fn file_id(&self, path: &std::path::Path) -> FileId;
fn file_path(&self, file_id: FileId) -> Arc<std::path::Path>;
}
pub trait SemanticDb: SourceDb + DbWithJar<SemanticJar> + Upcast<dyn SourceDb> {}
pub trait LintDb: SemanticDb + DbWithJar<LintJar> + Upcast<dyn SemanticDb> {}
pub trait Db: LintDb + Upcast<dyn LintDb> {}
#[derive(Debug, Default)]
pub struct SourceJar {
pub sources: SourceStorage,
pub parsed: ParsedStorage,
}
#[derive(Debug, Default)]
pub struct SemanticJar {
pub module_resolver: ModuleResolver,
pub semantic_indices: SemanticIndexStorage,
pub type_store: TypeStore,
}
#[derive(Debug, Default)]
pub struct LintJar {
pub lint_syntax: LintSyntaxStorage,
pub lint_semantic: LintSemanticStorage,
}
#[cfg(test)]
pub(crate) mod tests {
use std::path::Path;
use std::sync::Arc;
use crate::db::{
Database, DbRuntime, DbWithJar, HasJar, HasJars, JarsStorage, LintDb, LintJar, QueryResult,
SourceDb, SourceJar, Upcast,
};
use crate::files::{FileId, Files};
use super::{SemanticDb, SemanticJar};
// This can be a partial database used in a single crate for testing.
// It would hold fewer data than the full database.
#[derive(Debug, Default)]
pub(crate) struct TestDb {
files: Files,
jars: JarsStorage<Self>,
}
impl HasJar<SourceJar> for TestDb {
fn jar(&self) -> QueryResult<&SourceJar> {
Ok(&self.jars()?.0)
}
fn jar_mut(&mut self) -> &mut SourceJar {
&mut self.jars_mut().0
}
}
impl HasJar<SemanticJar> for TestDb {
fn jar(&self) -> QueryResult<&SemanticJar> {
Ok(&self.jars()?.1)
}
fn jar_mut(&mut self) -> &mut SemanticJar {
&mut self.jars_mut().1
}
}
impl HasJar<LintJar> for TestDb {
fn jar(&self) -> QueryResult<&LintJar> {
Ok(&self.jars()?.2)
}
fn jar_mut(&mut self) -> &mut LintJar {
&mut self.jars_mut().2
}
}
impl SourceDb for TestDb {
fn file_id(&self, path: &Path) -> FileId {
self.files.intern(path)
}
fn file_path(&self, file_id: FileId) -> Arc<Path> {
self.files.path(file_id)
}
}
impl DbWithJar<SourceJar> for TestDb {}
impl Upcast<dyn SourceDb> for TestDb {
fn upcast(&self) -> &(dyn SourceDb + 'static) {
self
}
}
impl SemanticDb for TestDb {}
impl DbWithJar<SemanticJar> for TestDb {}
impl Upcast<dyn SemanticDb> for TestDb {
fn upcast(&self) -> &(dyn SemanticDb + 'static) {
self
}
}
impl LintDb for TestDb {}
impl Upcast<dyn LintDb> for TestDb {
fn upcast(&self) -> &(dyn LintDb + 'static) {
self
}
}
impl DbWithJar<LintJar> for TestDb {}
impl HasJars for TestDb {
type Jars = (SourceJar, SemanticJar, LintJar);
fn jars(&self) -> QueryResult<&Self::Jars> {
self.jars.jars()
}
fn jars_mut(&mut self) -> &mut Self::Jars {
self.jars.jars_mut()
}
}
impl Database for TestDb {
fn runtime(&self) -> &DbRuntime {
self.jars.runtime()
}
fn runtime_mut(&mut self) -> &mut DbRuntime {
self.jars.runtime_mut()
}
}
}

View File

@@ -1,37 +0,0 @@
use crate::db::query::QueryResult;
/// Gives access to a specific jar in the database.
///
/// Nope, the terminology isn't borrowed from Java but from Salsa <https://salsa-rs.github.io/salsa/>,
/// which is an analogy to storing the salsa in different jars.
///
/// The basic idea is that each crate can define its own jar and the jars can be combined to a single
/// database in the top level crate. Each crate also defines its own `Database` trait. The combination of
/// `Database` trait and the jar allows to write queries in isolation without having to know how they get composed at the upper levels.
///
/// Salsa further defines a `HasIngredient` trait which slices the jar to a specific storage (e.g. a specific cache).
/// We don't need this just yet because we write our queries by hand. We may want a similar trait if we decide
/// to use a macro to generate the queries.
pub trait HasJar<T> {
/// Gives a read-only reference to the jar.
fn jar(&self) -> QueryResult<&T>;
/// Gives a mutable reference to the jar.
fn jar_mut(&mut self) -> &mut T;
}
/// Gives access to the jars in a database.
pub trait HasJars {
/// A type storing the jars.
///
/// Most commonly, this is a tuple where each jar is a tuple element.
type Jars: Default;
/// Gives access to the underlying jars but tests if the queries have been cancelled.
///
/// Returns `Err(QueryError::Cancelled)` if the queries have been cancelled.
fn jars(&self) -> QueryResult<&Self::Jars>;
/// Gives mutable access to the underlying jars.
fn jars_mut(&mut self) -> &mut Self::Jars;
}

View File

@@ -1,20 +0,0 @@
use std::fmt::{Display, Formatter};
/// Reason why a db query operation failed.
#[derive(Debug, Clone, Copy)]
pub enum QueryError {
/// The query was cancelled because the DB was mutated or the query was cancelled by the host (e.g. on a file change or when pressing CTRL+C).
Cancelled,
}
impl Display for QueryError {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
match self {
QueryError::Cancelled => f.write_str("query was cancelled"),
}
}
}
impl std::error::Error for QueryError {}
pub type QueryResult<T> = Result<T, QueryError>;

View File

@@ -1,41 +0,0 @@
use crate::cancellation::CancellationTokenSource;
use crate::db::{QueryError, QueryResult};
/// Holds the jar agnostic state of the database.
#[derive(Debug, Default)]
pub struct DbRuntime {
/// The cancellation token source used to signal other works that the queries should be aborted and
/// exit at the next possible point.
cancellation_token: CancellationTokenSource,
}
impl DbRuntime {
pub(super) fn snapshot(&self) -> Self {
Self {
cancellation_token: self.cancellation_token.clone(),
}
}
/// Cancels the pending queries of other workers. The current worker cannot have any pending
/// queries because we're holding a mutable reference to the runtime.
pub(super) fn cancel_other_workers(&mut self) {
self.cancellation_token.cancel();
// Set a new cancellation token so that we're in a non-cancelled state again when running the next
// query.
self.cancellation_token = CancellationTokenSource::default();
}
/// Returns `Ok` if the queries have not been cancelled and `Err(QueryError::Cancelled)` otherwise.
pub(super) fn cancelled(&self) -> QueryResult<()> {
if self.cancellation_token.is_cancelled() {
Err(QueryError::Cancelled)
} else {
Ok(())
}
}
/// Returns `true` if the queries have been cancelled.
pub(super) fn is_cancelled(&self) -> bool {
self.cancellation_token.is_cancelled()
}
}

View File

@@ -1,117 +0,0 @@
use std::fmt::Formatter;
use std::sync::Arc;
use crossbeam::sync::WaitGroup;
use crate::db::query::QueryResult;
use crate::db::runtime::DbRuntime;
use crate::db::{HasJars, ParallelDatabase};
/// Stores the jars of a database and the state for each worker.
///
/// Today, all state is shared across all workers, but it may be desired to store data per worker in the future.
pub struct JarsStorage<T>
where
T: HasJars + Sized,
{
// It's important that `jars_wait_group` is declared after `jars` to ensure that `jars` is dropped first.
// See https://doc.rust-lang.org/reference/destructors.html
/// Stores the jars of the database.
jars: Arc<T::Jars>,
/// Used to count the references to `jars`. Allows implementing `jars_mut` without requiring to clone `jars`.
jars_wait_group: WaitGroup,
/// The data agnostic state.
runtime: DbRuntime,
}
impl<Db> JarsStorage<Db>
where
Db: HasJars,
{
pub(super) fn new() -> Self {
Self {
jars: Arc::new(Db::Jars::default()),
jars_wait_group: WaitGroup::default(),
runtime: DbRuntime::default(),
}
}
/// Creates a snapshot of the jars.
///
/// Creating the snapshot is cheap because it doesn't clone the jars, it only increments a ref counter.
#[must_use]
pub fn snapshot(&self) -> JarsStorage<Db>
where
Db: ParallelDatabase,
{
Self {
jars: self.jars.clone(),
jars_wait_group: self.jars_wait_group.clone(),
runtime: self.runtime.snapshot(),
}
}
pub(crate) fn jars(&self) -> QueryResult<&Db::Jars> {
self.runtime.cancelled()?;
Ok(&self.jars)
}
/// Returns a mutable reference to the jars without cloning their content.
///
/// The method cancels any pending queries of other works and waits for them to complete so that
/// this instance is the only instance holding a reference to the jars.
pub(crate) fn jars_mut(&mut self) -> &mut Db::Jars {
// We have a mutable ref here, so no more workers can be spawned between calling this function and taking the mut ref below.
self.cancel_other_workers();
// Now all other references to `self.jars` should have been released. We can now safely return a mutable reference
// to the Arc's content.
let jars =
Arc::get_mut(&mut self.jars).expect("All references to jars should have been released");
jars
}
pub(crate) fn runtime(&self) -> &DbRuntime {
&self.runtime
}
pub(crate) fn runtime_mut(&mut self) -> &mut DbRuntime {
// Note: This method may need to use a similar trick to `jars_mut` if `DbRuntime` is ever to store data that is shared between workers.
&mut self.runtime
}
#[tracing::instrument(level = "trace", skip(self))]
fn cancel_other_workers(&mut self) {
self.runtime.cancel_other_workers();
// Wait for all other works to complete.
let existing_wait = std::mem::take(&mut self.jars_wait_group);
existing_wait.wait();
}
}
impl<Db> Default for JarsStorage<Db>
where
Db: HasJars,
{
fn default() -> Self {
Self::new()
}
}
impl<T> std::fmt::Debug for JarsStorage<T>
where
T: HasJars,
<T as HasJars>::Jars: std::fmt::Debug,
{
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
f.debug_struct("SharedStorage")
.field("jars", &self.jars)
.field("jars_wait_group", &self.jars_wait_group)
.field("runtime", &self.runtime)
.finish()
}
}

View File

@@ -1,180 +0,0 @@
use std::fmt::{Debug, Formatter};
use std::hash::{Hash, Hasher};
use std::path::Path;
use std::sync::Arc;
use hashbrown::hash_map::RawEntryMut;
use parking_lot::RwLock;
use rustc_hash::FxHasher;
use ruff_index::{newtype_index, IndexVec};
type Map<K, V> = hashbrown::HashMap<K, V, ()>;
#[newtype_index]
pub struct FileId;
// TODO we'll need a higher level virtual file system abstraction that allows testing if a file exists
// or retrieving its content (ideally lazily and in a way that the memory can be retained later)
// I suspect that we'll end up with a FileSystem trait and our own Path abstraction.
#[derive(Default)]
pub struct Files {
inner: Arc<RwLock<FilesInner>>,
}
impl Files {
#[tracing::instrument(level = "debug", skip(self))]
pub fn intern(&self, path: &Path) -> FileId {
self.inner.write().intern(path)
}
pub fn try_get(&self, path: &Path) -> Option<FileId> {
self.inner.read().try_get(path)
}
#[tracing::instrument(level = "debug", skip(self))]
pub fn path(&self, id: FileId) -> Arc<Path> {
self.inner.read().path(id)
}
/// Snapshots files for a new database snapshot.
///
/// This method should not be used outside a database snapshot.
#[must_use]
pub fn snapshot(&self) -> Files {
Files {
inner: self.inner.clone(),
}
}
}
impl Debug for Files {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
let files = self.inner.read();
let mut debug = f.debug_map();
for item in files.iter() {
debug.entry(&item.0, &item.1);
}
debug.finish()
}
}
impl PartialEq for Files {
fn eq(&self, other: &Self) -> bool {
self.inner.read().eq(&other.inner.read())
}
}
impl Eq for Files {}
#[derive(Default)]
struct FilesInner {
by_path: Map<FileId, ()>,
// TODO should we use a map here to reclaim the space for removed files?
// TODO I think we should use our own path abstraction here to avoid having to normalize paths
// and dealing with non-utf paths everywhere.
by_id: IndexVec<FileId, Arc<Path>>,
}
impl FilesInner {
/// Inserts the path and returns a new id for it or returns the id if it is an existing path.
// TODO should this accept Path or PathBuf?
pub(crate) fn intern(&mut self, path: &Path) -> FileId {
let hash = FilesInner::hash_path(path);
let entry = self
.by_path
.raw_entry_mut()
.from_hash(hash, |existing_file| &*self.by_id[*existing_file] == path);
match entry {
RawEntryMut::Occupied(entry) => *entry.key(),
RawEntryMut::Vacant(entry) => {
let id = self.by_id.push(Arc::from(path));
entry.insert_with_hasher(hash, id, (), |file| {
FilesInner::hash_path(&self.by_id[*file])
});
id
}
}
}
fn hash_path(path: &Path) -> u64 {
let mut hasher = FxHasher::default();
path.hash(&mut hasher);
hasher.finish()
}
pub(crate) fn try_get(&self, path: &Path) -> Option<FileId> {
let mut hasher = FxHasher::default();
path.hash(&mut hasher);
let hash = hasher.finish();
Some(
*self
.by_path
.raw_entry()
.from_hash(hash, |existing_file| &*self.by_id[*existing_file] == path)?
.0,
)
}
/// Returns the path for the file with the given id.
pub(crate) fn path(&self, id: FileId) -> Arc<Path> {
self.by_id[id].clone()
}
pub(crate) fn iter(&self) -> impl Iterator<Item = (FileId, Arc<Path>)> + '_ {
self.by_path.keys().map(|id| (*id, self.by_id[*id].clone()))
}
}
impl PartialEq for FilesInner {
fn eq(&self, other: &Self) -> bool {
self.by_id == other.by_id
}
}
impl Eq for FilesInner {}
#[cfg(test)]
mod tests {
use super::*;
use std::path::PathBuf;
#[test]
fn insert_path_twice_same_id() {
let files = Files::default();
let path = PathBuf::from("foo/bar");
let id1 = files.intern(&path);
let id2 = files.intern(&path);
assert_eq!(id1, id2);
}
#[test]
fn insert_different_paths_different_ids() {
let files = Files::default();
let path1 = PathBuf::from("foo/bar");
let path2 = PathBuf::from("foo/bar/baz");
let id1 = files.intern(&path1);
let id2 = files.intern(&path2);
assert_ne!(id1, id2);
}
#[test]
fn four_files() {
let files = Files::default();
let foo_path = PathBuf::from("foo");
let foo_id = files.intern(&foo_path);
let bar_path = PathBuf::from("bar");
files.intern(&bar_path);
let baz_path = PathBuf::from("baz");
files.intern(&baz_path);
let qux_path = PathBuf::from("qux");
files.intern(&qux_path);
let foo_id_2 = files.try_get(&foo_path).expect("foo_path to be found");
assert_eq!(foo_id_2, foo_id);
}
}

View File

@@ -1,67 +0,0 @@
//! Key observations
//!
//! The HIR (High-Level Intermediate Representation) avoids allocations to large extends by:
//! * Using an arena per node type
//! * using ids and id ranges to reference items.
//!
//! Using separate arena per node type has the advantage that the IDs are relatively stable, because
//! they only change when a node of the same kind has been added or removed. (What's unclear is if that matters or if
//! it still triggers a re-compute because the AST-id in the node has changed).
//!
//! The HIR does not store all details. It mainly stores the *public* interface. There's a reference
//! back to the AST node to get more details.
//!
//!
use crate::ast_ids::{HasAstId, TypedAstId};
use crate::files::FileId;
use std::fmt::Formatter;
use std::hash::{Hash, Hasher};
pub struct HirAstId<N: HasAstId> {
file_id: FileId,
node_id: TypedAstId<N>,
}
impl<N: HasAstId> Copy for HirAstId<N> {}
impl<N: HasAstId> Clone for HirAstId<N> {
fn clone(&self) -> Self {
*self
}
}
impl<N: HasAstId> PartialEq for HirAstId<N> {
fn eq(&self, other: &Self) -> bool {
self.file_id == other.file_id && self.node_id == other.node_id
}
}
impl<N: HasAstId> Eq for HirAstId<N> {}
impl<N: HasAstId> std::fmt::Debug for HirAstId<N> {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
f.debug_struct("HirAstId")
.field("file_id", &self.file_id)
.field("node_id", &self.node_id)
.finish()
}
}
impl<N: HasAstId> Hash for HirAstId<N> {
fn hash<H: Hasher>(&self, state: &mut H) {
self.file_id.hash(state);
self.node_id.hash(state);
}
}
impl<N: HasAstId> HirAstId<N> {
pub fn upcast<M: HasAstId>(self) -> HirAstId<M>
where
N: Into<M>,
{
HirAstId {
file_id: self.file_id,
node_id: self.node_id.upcast(),
}
}
}

View File

@@ -1,556 +0,0 @@
use std::ops::{Index, Range};
use ruff_index::{newtype_index, IndexVec};
use ruff_python_ast::visitor::preorder;
use ruff_python_ast::visitor::preorder::PreorderVisitor;
use ruff_python_ast::{
Decorator, ExceptHandler, ExceptHandlerExceptHandler, Expr, MatchCase, ModModule, Stmt,
StmtAnnAssign, StmtAssign, StmtClassDef, StmtFunctionDef, StmtGlobal, StmtImport,
StmtImportFrom, StmtNonlocal, StmtTypeAlias, TypeParam, TypeParamParamSpec, TypeParamTypeVar,
TypeParamTypeVarTuple, WithItem,
};
use crate::ast_ids::{AstIds, HasAstId};
use crate::files::FileId;
use crate::hir::HirAstId;
use crate::Name;
#[newtype_index]
pub struct FunctionId;
#[derive(Debug, Clone, Eq, PartialEq)]
pub struct Function {
ast_id: HirAstId<StmtFunctionDef>,
name: Name,
parameters: Range<ParameterId>,
type_parameters: Range<TypeParameterId>, // TODO: type_parameters, return expression, decorators
}
#[newtype_index]
pub struct ParameterId;
#[derive(Debug, Clone, Eq, PartialEq)]
pub struct Parameter {
kind: ParameterKind,
name: Name,
default: Option<()>, // TODO use expression HIR
ast_id: HirAstId<ruff_python_ast::Parameter>,
}
// TODO or should `Parameter` be an enum?
#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)]
pub enum ParameterKind {
PositionalOnly,
Arguments,
Vararg,
KeywordOnly,
Kwarg,
}
#[newtype_index]
pub struct ClassId;
#[derive(Debug, Clone, Eq, PartialEq)]
pub struct Class {
name: Name,
ast_id: HirAstId<StmtClassDef>,
// TODO type parameters, inheritance, decorators, members
}
#[newtype_index]
pub struct AssignmentId;
// This can have more than one name...
// but that means we can't implement `name()` on `ModuleItem`.
#[derive(Debug, Clone, Eq, PartialEq)]
pub struct Assignment {
// TODO: Handle multiple names / targets
name: Name,
ast_id: HirAstId<StmtAssign>,
}
#[derive(Debug, Clone, Eq, PartialEq)]
pub struct AnnotatedAssignment {
name: Name,
ast_id: HirAstId<StmtAnnAssign>,
}
#[newtype_index]
pub struct AnnotatedAssignmentId;
#[newtype_index]
pub struct TypeAliasId;
#[derive(Debug, Clone, Eq, PartialEq)]
pub struct TypeAlias {
name: Name,
ast_id: HirAstId<StmtTypeAlias>,
parameters: Range<TypeParameterId>,
}
#[newtype_index]
pub struct TypeParameterId;
#[derive(Debug, Clone, Eq, PartialEq)]
pub enum TypeParameter {
TypeVar(TypeParameterTypeVar),
ParamSpec(TypeParameterParamSpec),
TypeVarTuple(TypeParameterTypeVarTuple),
}
impl TypeParameter {
pub fn ast_id(&self) -> HirAstId<TypeParam> {
match self {
TypeParameter::TypeVar(type_var) => type_var.ast_id.upcast(),
TypeParameter::ParamSpec(param_spec) => param_spec.ast_id.upcast(),
TypeParameter::TypeVarTuple(type_var_tuple) => type_var_tuple.ast_id.upcast(),
}
}
}
#[derive(Debug, Clone, Eq, PartialEq)]
pub struct TypeParameterTypeVar {
name: Name,
ast_id: HirAstId<TypeParamTypeVar>,
}
#[derive(Debug, Clone, Eq, PartialEq)]
pub struct TypeParameterParamSpec {
name: Name,
ast_id: HirAstId<TypeParamParamSpec>,
}
#[derive(Debug, Clone, Eq, PartialEq)]
pub struct TypeParameterTypeVarTuple {
name: Name,
ast_id: HirAstId<TypeParamTypeVarTuple>,
}
#[newtype_index]
pub struct GlobalId;
#[derive(Debug, Clone, Eq, PartialEq)]
pub struct Global {
// TODO track names
ast_id: HirAstId<StmtGlobal>,
}
#[newtype_index]
pub struct NonLocalId;
#[derive(Debug, Clone, Eq, PartialEq)]
pub struct NonLocal {
// TODO track names
ast_id: HirAstId<StmtNonlocal>,
}
pub enum DefinitionId {
Function(FunctionId),
Parameter(ParameterId),
Class(ClassId),
Assignment(AssignmentId),
AnnotatedAssignment(AnnotatedAssignmentId),
Global(GlobalId),
NonLocal(NonLocalId),
TypeParameter(TypeParameterId),
TypeAlias(TypeAlias),
}
pub enum DefinitionItem {
Function(Function),
Parameter(Parameter),
Class(Class),
Assignment(Assignment),
AnnotatedAssignment(AnnotatedAssignment),
Global(Global),
NonLocal(NonLocal),
TypeParameter(TypeParameter),
TypeAlias(TypeAlias),
}
// The closest is rust-analyzers item-tree. It only represents "Items" which make the public interface of a module
// (it excludes any other statement or expressions). rust-analyzer uses it as the main input to the name resolution
// algorithm
// > It is the input to the name resolution algorithm, as well as to the queries defined in `adt.rs`,
// > `data.rs`, and most things in `attr.rs`.
//
// > One important purpose of this layer is to provide an "invalidation barrier" for incremental
// > computations: when typing inside an item body, the `ItemTree` of the modified file is typically
// > unaffected, so we don't have to recompute name resolution results or item data (see `data.rs`).
//
// I haven't fully figured this out but I think that this composes the "public" interface of a module?
// But maybe that's too optimistic.
//
//
#[derive(Debug, Clone, Default, Eq, PartialEq)]
pub struct Definitions {
functions: IndexVec<FunctionId, Function>,
parameters: IndexVec<ParameterId, Parameter>,
classes: IndexVec<ClassId, Class>,
assignments: IndexVec<AssignmentId, Assignment>,
annotated_assignments: IndexVec<AnnotatedAssignmentId, AnnotatedAssignment>,
type_aliases: IndexVec<TypeAliasId, TypeAlias>,
type_parameters: IndexVec<TypeParameterId, TypeParameter>,
globals: IndexVec<GlobalId, Global>,
non_locals: IndexVec<NonLocalId, NonLocal>,
}
impl Definitions {
pub fn from_module(module: &ModModule, ast_ids: &AstIds, file_id: FileId) -> Self {
let mut visitor = DefinitionsVisitor {
definitions: Definitions::default(),
ast_ids,
file_id,
};
visitor.visit_body(&module.body);
visitor.definitions
}
}
impl Index<FunctionId> for Definitions {
type Output = Function;
fn index(&self, index: FunctionId) -> &Self::Output {
&self.functions[index]
}
}
impl Index<ParameterId> for Definitions {
type Output = Parameter;
fn index(&self, index: ParameterId) -> &Self::Output {
&self.parameters[index]
}
}
impl Index<ClassId> for Definitions {
type Output = Class;
fn index(&self, index: ClassId) -> &Self::Output {
&self.classes[index]
}
}
impl Index<AssignmentId> for Definitions {
type Output = Assignment;
fn index(&self, index: AssignmentId) -> &Self::Output {
&self.assignments[index]
}
}
impl Index<AnnotatedAssignmentId> for Definitions {
type Output = AnnotatedAssignment;
fn index(&self, index: AnnotatedAssignmentId) -> &Self::Output {
&self.annotated_assignments[index]
}
}
impl Index<TypeAliasId> for Definitions {
type Output = TypeAlias;
fn index(&self, index: TypeAliasId) -> &Self::Output {
&self.type_aliases[index]
}
}
impl Index<GlobalId> for Definitions {
type Output = Global;
fn index(&self, index: GlobalId) -> &Self::Output {
&self.globals[index]
}
}
impl Index<NonLocalId> for Definitions {
type Output = NonLocal;
fn index(&self, index: NonLocalId) -> &Self::Output {
&self.non_locals[index]
}
}
impl Index<TypeParameterId> for Definitions {
type Output = TypeParameter;
fn index(&self, index: TypeParameterId) -> &Self::Output {
&self.type_parameters[index]
}
}
struct DefinitionsVisitor<'a> {
definitions: Definitions,
ast_ids: &'a AstIds,
file_id: FileId,
}
impl DefinitionsVisitor<'_> {
fn ast_id<N: HasAstId>(&self, node: &N) -> HirAstId<N> {
HirAstId {
file_id: self.file_id,
node_id: self.ast_ids.ast_id(node),
}
}
fn lower_function_def(&mut self, function: &StmtFunctionDef) -> FunctionId {
let name = Name::new(&function.name);
let first_type_parameter_id = self.definitions.type_parameters.next_index();
let mut last_type_parameter_id = first_type_parameter_id;
if let Some(type_params) = &function.type_params {
for parameter in &type_params.type_params {
let id = self.lower_type_parameter(parameter);
last_type_parameter_id = id;
}
}
let parameters = self.lower_parameters(&function.parameters);
self.definitions.functions.push(Function {
name,
ast_id: self.ast_id(function),
parameters,
type_parameters: first_type_parameter_id..last_type_parameter_id,
})
}
fn lower_parameters(&mut self, parameters: &ruff_python_ast::Parameters) -> Range<ParameterId> {
let first_parameter_id = self.definitions.parameters.next_index();
let mut last_parameter_id = first_parameter_id;
for parameter in &parameters.posonlyargs {
last_parameter_id = self.definitions.parameters.push(Parameter {
kind: ParameterKind::PositionalOnly,
name: Name::new(&parameter.parameter.name),
default: None,
ast_id: self.ast_id(&parameter.parameter),
});
}
if let Some(vararg) = &parameters.vararg {
last_parameter_id = self.definitions.parameters.push(Parameter {
kind: ParameterKind::Vararg,
name: Name::new(&vararg.name),
default: None,
ast_id: self.ast_id(vararg),
});
}
for parameter in &parameters.kwonlyargs {
last_parameter_id = self.definitions.parameters.push(Parameter {
kind: ParameterKind::KeywordOnly,
name: Name::new(&parameter.parameter.name),
default: None,
ast_id: self.ast_id(&parameter.parameter),
});
}
if let Some(kwarg) = &parameters.kwarg {
last_parameter_id = self.definitions.parameters.push(Parameter {
kind: ParameterKind::KeywordOnly,
name: Name::new(&kwarg.name),
default: None,
ast_id: self.ast_id(kwarg),
});
}
first_parameter_id..last_parameter_id
}
fn lower_class_def(&mut self, class: &StmtClassDef) -> ClassId {
let name = Name::new(&class.name);
self.definitions.classes.push(Class {
name,
ast_id: self.ast_id(class),
})
}
fn lower_assignment(&mut self, assignment: &StmtAssign) {
// FIXME handle multiple names
if let Some(Expr::Name(name)) = assignment.targets.first() {
self.definitions.assignments.push(Assignment {
name: Name::new(&name.id),
ast_id: self.ast_id(assignment),
});
}
}
fn lower_annotated_assignment(&mut self, annotated_assignment: &StmtAnnAssign) {
if let Expr::Name(name) = &*annotated_assignment.target {
self.definitions
.annotated_assignments
.push(AnnotatedAssignment {
name: Name::new(&name.id),
ast_id: self.ast_id(annotated_assignment),
});
}
}
fn lower_type_alias(&mut self, type_alias: &StmtTypeAlias) {
if let Expr::Name(name) = &*type_alias.name {
let name = Name::new(&name.id);
let lower_parameters_id = self.definitions.type_parameters.next_index();
let mut last_parameter_id = lower_parameters_id;
if let Some(type_params) = &type_alias.type_params {
for type_parameter in &type_params.type_params {
let id = self.lower_type_parameter(type_parameter);
last_parameter_id = id;
}
}
self.definitions.type_aliases.push(TypeAlias {
name,
ast_id: self.ast_id(type_alias),
parameters: lower_parameters_id..last_parameter_id,
});
}
}
fn lower_type_parameter(&mut self, type_parameter: &TypeParam) -> TypeParameterId {
match type_parameter {
TypeParam::TypeVar(type_var) => {
self.definitions
.type_parameters
.push(TypeParameter::TypeVar(TypeParameterTypeVar {
name: Name::new(&type_var.name),
ast_id: self.ast_id(type_var),
}))
}
TypeParam::ParamSpec(param_spec) => {
self.definitions
.type_parameters
.push(TypeParameter::ParamSpec(TypeParameterParamSpec {
name: Name::new(&param_spec.name),
ast_id: self.ast_id(param_spec),
}))
}
TypeParam::TypeVarTuple(type_var_tuple) => {
self.definitions
.type_parameters
.push(TypeParameter::TypeVarTuple(TypeParameterTypeVarTuple {
name: Name::new(&type_var_tuple.name),
ast_id: self.ast_id(type_var_tuple),
}))
}
}
}
fn lower_import(&mut self, _import: &StmtImport) {
// TODO
}
fn lower_import_from(&mut self, _import_from: &StmtImportFrom) {
// TODO
}
fn lower_global(&mut self, global: &StmtGlobal) -> GlobalId {
self.definitions.globals.push(Global {
ast_id: self.ast_id(global),
})
}
fn lower_non_local(&mut self, non_local: &StmtNonlocal) -> NonLocalId {
self.definitions.non_locals.push(NonLocal {
ast_id: self.ast_id(non_local),
})
}
fn lower_except_handler(&mut self, _except_handler: &ExceptHandlerExceptHandler) {
// TODO
}
fn lower_with_item(&mut self, _with_item: &WithItem) {
// TODO
}
fn lower_match_case(&mut self, _match_case: &MatchCase) {
// TODO
}
}
impl PreorderVisitor<'_> for DefinitionsVisitor<'_> {
fn visit_stmt(&mut self, stmt: &Stmt) {
match stmt {
// Definition statements
Stmt::FunctionDef(definition) => {
self.lower_function_def(definition);
self.visit_body(&definition.body);
}
Stmt::ClassDef(definition) => {
self.lower_class_def(definition);
self.visit_body(&definition.body);
}
Stmt::Assign(assignment) => {
self.lower_assignment(assignment);
}
Stmt::AnnAssign(annotated_assignment) => {
self.lower_annotated_assignment(annotated_assignment);
}
Stmt::TypeAlias(type_alias) => {
self.lower_type_alias(type_alias);
}
Stmt::Import(import) => self.lower_import(import),
Stmt::ImportFrom(import_from) => self.lower_import_from(import_from),
Stmt::Global(global) => {
self.lower_global(global);
}
Stmt::Nonlocal(non_local) => {
self.lower_non_local(non_local);
}
// Visit the compound statement bodies because they can contain other definitions.
Stmt::For(_)
| Stmt::While(_)
| Stmt::If(_)
| Stmt::With(_)
| Stmt::Match(_)
| Stmt::Try(_) => {
preorder::walk_stmt(self, stmt);
}
// Skip over simple statements because they can't contain any other definitions.
Stmt::Return(_)
| Stmt::Delete(_)
| Stmt::AugAssign(_)
| Stmt::Raise(_)
| Stmt::Assert(_)
| Stmt::Expr(_)
| Stmt::Pass(_)
| Stmt::Break(_)
| Stmt::Continue(_)
| Stmt::IpyEscapeCommand(_) => {
// No op
}
}
}
fn visit_expr(&mut self, _: &'_ Expr) {}
fn visit_decorator(&mut self, _decorator: &'_ Decorator) {}
fn visit_except_handler(&mut self, except_handler: &'_ ExceptHandler) {
match except_handler {
ExceptHandler::ExceptHandler(except_handler) => {
self.lower_except_handler(except_handler);
}
}
}
fn visit_with_item(&mut self, with_item: &'_ WithItem) {
self.lower_with_item(with_item);
}
fn visit_match_case(&mut self, match_case: &'_ MatchCase) {
self.lower_match_case(match_case);
self.visit_body(&match_case.body);
}
}

View File

@@ -1,108 +0,0 @@
use std::fmt::Formatter;
use std::hash::BuildHasherDefault;
use std::ops::Deref;
use std::path::{Path, PathBuf};
use rustc_hash::{FxHashSet, FxHasher};
use crate::files::FileId;
pub mod ast_ids;
pub mod cache;
pub mod cancellation;
pub mod db;
pub mod files;
pub mod hir;
pub mod lint;
pub mod module;
mod parse;
pub mod program;
mod semantic;
pub mod source;
pub mod watch;
pub(crate) type FxDashMap<K, V> = dashmap::DashMap<K, V, BuildHasherDefault<FxHasher>>;
#[allow(unused)]
pub(crate) type FxDashSet<V> = dashmap::DashSet<V, BuildHasherDefault<FxHasher>>;
pub(crate) type FxIndexSet<V> = indexmap::set::IndexSet<V, BuildHasherDefault<FxHasher>>;
#[derive(Debug, Clone)]
pub struct Workspace {
/// TODO this should be a resolved path. We should probably use a newtype wrapper that guarantees that
/// PATH is a UTF-8 path and is normalized.
root: PathBuf,
/// The files that are open in the workspace.
///
/// * Editor: The files that are actively being edited in the editor (the user has a tab open with the file).
/// * CLI: The resolved files passed as arguments to the CLI.
open_files: FxHashSet<FileId>,
}
impl Workspace {
pub fn new(root: PathBuf) -> Self {
Self {
root,
open_files: FxHashSet::default(),
}
}
pub fn root(&self) -> &Path {
self.root.as_path()
}
// TODO having the content in workspace feels wrong.
pub fn open_file(&mut self, file_id: FileId) {
self.open_files.insert(file_id);
}
pub fn close_file(&mut self, file_id: FileId) {
self.open_files.remove(&file_id);
}
// TODO introduce an `OpenFile` type instead of using an anonymous tuple.
pub fn open_files(&self) -> impl Iterator<Item = FileId> + '_ {
self.open_files.iter().copied()
}
pub fn is_file_open(&self, file_id: FileId) -> bool {
self.open_files.contains(&file_id)
}
}
#[derive(Debug, Clone, Eq, PartialEq, Hash)]
pub struct Name(smol_str::SmolStr);
impl Name {
#[inline]
pub fn new(name: &str) -> Self {
Self(smol_str::SmolStr::new(name))
}
pub fn as_str(&self) -> &str {
self.0.as_str()
}
}
impl Deref for Name {
type Target = str;
#[inline]
fn deref(&self) -> &Self::Target {
self.as_str()
}
}
impl<T> From<T> for Name
where
T: Into<smol_str::SmolStr>,
{
fn from(value: T) -> Self {
Self(value.into())
}
}
impl std::fmt::Display for Name {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
f.write_str(self.as_str())
}
}

View File

@@ -1,332 +0,0 @@
use std::cell::RefCell;
use std::ops::{Deref, DerefMut};
use std::sync::Arc;
use std::time::Duration;
use ruff_python_ast::visitor::Visitor;
use ruff_python_ast::{ModModule, StringLiteral};
use ruff_python_parser::Parsed;
use crate::cache::KeyValueCache;
use crate::db::{LintDb, LintJar, QueryResult};
use crate::files::FileId;
use crate::module::{resolve_module, ModuleName};
use crate::parse::parse;
use crate::semantic::{infer_definition_type, infer_symbol_public_type, Type};
use crate::semantic::{
resolve_global_symbol, semantic_index, Definition, GlobalSymbolId, SemanticIndex, SymbolId,
};
use crate::source::{source_text, Source};
#[tracing::instrument(level = "debug", skip(db))]
pub(crate) fn lint_syntax(db: &dyn LintDb, file_id: FileId) -> QueryResult<Diagnostics> {
let lint_jar: &LintJar = db.jar()?;
let storage = &lint_jar.lint_syntax;
#[allow(clippy::print_stdout)]
if std::env::var("RED_KNOT_SLOW_LINT").is_ok() {
for i in 0..10 {
db.cancelled()?;
println!("RED_KNOT_SLOW_LINT is set, sleeping for {i}/10 seconds");
std::thread::sleep(Duration::from_secs(1));
}
}
storage.get(&file_id, |file_id| {
let mut diagnostics = Vec::new();
let source = source_text(db.upcast(), *file_id)?;
lint_lines(source.text(), &mut diagnostics);
let parsed = parse(db.upcast(), *file_id)?;
if parsed.errors().is_empty() {
let ast = parsed.syntax();
let mut visitor = SyntaxLintVisitor {
diagnostics,
source: source.text(),
};
visitor.visit_body(&ast.body);
diagnostics = visitor.diagnostics;
} else {
diagnostics.extend(parsed.errors().iter().map(std::string::ToString::to_string));
}
Ok(Diagnostics::from(diagnostics))
})
}
fn lint_lines(source: &str, diagnostics: &mut Vec<String>) {
for (line_number, line) in source.lines().enumerate() {
if line.len() < 88 {
continue;
}
let char_count = line.chars().count();
if char_count > 88 {
diagnostics.push(format!(
"Line {} is too long ({} characters)",
line_number + 1,
char_count
));
}
}
}
#[tracing::instrument(level = "debug", skip(db))]
pub(crate) fn lint_semantic(db: &dyn LintDb, file_id: FileId) -> QueryResult<Diagnostics> {
let lint_jar: &LintJar = db.jar()?;
let storage = &lint_jar.lint_semantic;
storage.get(&file_id, |file_id| {
let source = source_text(db.upcast(), *file_id)?;
let parsed = parse(db.upcast(), *file_id)?;
let semantic_index = semantic_index(db.upcast(), *file_id)?;
let context = SemanticLintContext {
file_id: *file_id,
source,
parsed: &parsed,
semantic_index,
db,
diagnostics: RefCell::new(Vec::new()),
};
lint_unresolved_imports(&context)?;
lint_bad_overrides(&context)?;
Ok(Diagnostics::from(context.diagnostics.take()))
})
}
fn lint_unresolved_imports(context: &SemanticLintContext) -> QueryResult<()> {
// TODO: Consider iterating over the dependencies (imports) only instead of all definitions.
for (symbol, definition) in context.semantic_index().symbol_table().all_definitions() {
match definition {
Definition::Import(import) => {
let ty = context.infer_symbol_public_type(symbol)?;
if ty.is_unknown() {
context.push_diagnostic(format!("Unresolved module {}", import.module));
}
}
Definition::ImportFrom(import) => {
let ty = context.infer_symbol_public_type(symbol)?;
if ty.is_unknown() {
let module_name = import.module().map(Deref::deref).unwrap_or_default();
let message = if import.level() > 0 {
format!(
"Unresolved relative import '{}' from {}{}",
import.name(),
".".repeat(import.level() as usize),
module_name
)
} else {
format!(
"Unresolved import '{}' from '{}'",
import.name(),
module_name
)
};
context.push_diagnostic(message);
}
}
_ => {}
}
}
Ok(())
}
fn lint_bad_overrides(context: &SemanticLintContext) -> QueryResult<()> {
// TODO we should have a special marker on the real typing module (from typeshed) so if you
// have your own "typing" module in your project, we don't consider it THE typing module (and
// same for other stdlib modules that our lint rules care about)
let Some(typing_override) = context.resolve_global_symbol("typing", "override")? else {
// TODO once we bundle typeshed, this should be unreachable!()
return Ok(());
};
// TODO we should maybe index definitions by type instead of iterating all, or else iterate all
// just once, match, and branch to all lint rules that care about a type of definition
for (symbol, definition) in context.semantic_index().symbol_table().all_definitions() {
if !matches!(definition, Definition::FunctionDef(_)) {
continue;
}
let ty = infer_definition_type(
context.db.upcast(),
GlobalSymbolId {
file_id: context.file_id,
symbol_id: symbol,
},
definition.clone(),
)?;
let Type::Function(func) = ty else {
unreachable!("type of a FunctionDef should always be a Function");
};
let Some(class) = func.get_containing_class(context.db.upcast())? else {
// not a method of a class
continue;
};
if func.has_decorator(context.db.upcast(), typing_override)? {
let method_name = func.name(context.db.upcast())?;
if class
.get_super_class_member(context.db.upcast(), &method_name)?
.is_none()
{
// TODO should have a qualname() method to support nested classes
context.push_diagnostic(
format!(
"Method {}.{} is decorated with `typing.override` but does not override any base class method",
class.name(context.db.upcast())?,
method_name,
));
}
}
}
Ok(())
}
pub struct SemanticLintContext<'a> {
file_id: FileId,
source: Source,
parsed: &'a Parsed<ModModule>,
semantic_index: Arc<SemanticIndex>,
db: &'a dyn LintDb,
diagnostics: RefCell<Vec<String>>,
}
impl<'a> SemanticLintContext<'a> {
pub fn source_text(&self) -> &str {
self.source.text()
}
pub fn file_id(&self) -> FileId {
self.file_id
}
pub fn ast(&self) -> &'a ModModule {
self.parsed.syntax()
}
pub fn semantic_index(&self) -> &SemanticIndex {
&self.semantic_index
}
pub fn infer_symbol_public_type(&self, symbol_id: SymbolId) -> QueryResult<Type> {
infer_symbol_public_type(
self.db.upcast(),
GlobalSymbolId {
file_id: self.file_id,
symbol_id,
},
)
}
pub fn push_diagnostic(&self, diagnostic: String) {
self.diagnostics.borrow_mut().push(diagnostic);
}
pub fn extend_diagnostics(&mut self, diagnostics: impl IntoIterator<Item = String>) {
self.diagnostics.get_mut().extend(diagnostics);
}
pub fn resolve_global_symbol(
&self,
module: &str,
symbol_name: &str,
) -> QueryResult<Option<GlobalSymbolId>> {
let Some(module) = resolve_module(self.db.upcast(), ModuleName::new(module))? else {
return Ok(None);
};
resolve_global_symbol(self.db.upcast(), module, symbol_name)
}
}
#[derive(Debug)]
struct SyntaxLintVisitor<'a> {
diagnostics: Vec<String>,
source: &'a str,
}
impl Visitor<'_> for SyntaxLintVisitor<'_> {
fn visit_string_literal(&mut self, string_literal: &'_ StringLiteral) {
// A very naive implementation of use double quotes
let text = &self.source[string_literal.range];
if text.starts_with('\'') {
self.diagnostics
.push("Use double quotes for strings".to_string());
}
}
}
#[derive(Debug, Clone)]
pub enum Diagnostics {
Empty,
List(Arc<Vec<String>>),
}
impl Diagnostics {
pub fn as_slice(&self) -> &[String] {
match self {
Diagnostics::Empty => &[],
Diagnostics::List(list) => list.as_slice(),
}
}
}
impl Deref for Diagnostics {
type Target = [String];
fn deref(&self) -> &Self::Target {
self.as_slice()
}
}
impl From<Vec<String>> for Diagnostics {
fn from(value: Vec<String>) -> Self {
if value.is_empty() {
Diagnostics::Empty
} else {
Diagnostics::List(Arc::new(value))
}
}
}
#[derive(Default, Debug)]
pub struct LintSyntaxStorage(KeyValueCache<FileId, Diagnostics>);
impl Deref for LintSyntaxStorage {
type Target = KeyValueCache<FileId, Diagnostics>;
fn deref(&self) -> &Self::Target {
&self.0
}
}
impl DerefMut for LintSyntaxStorage {
fn deref_mut(&mut self) -> &mut Self::Target {
&mut self.0
}
}
#[derive(Default, Debug)]
pub struct LintSemanticStorage(KeyValueCache<FileId, Diagnostics>);
impl Deref for LintSemanticStorage {
type Target = KeyValueCache<FileId, Diagnostics>;
fn deref(&self) -> &Self::Target {
&self.0
}
}
impl DerefMut for LintSemanticStorage {
fn deref_mut(&mut self) -> &mut Self::Target {
&mut self.0
}
}

View File

@@ -0,0 +1,254 @@
//! Sets up logging for Red Knot
use anyhow::Context;
use colored::Colorize;
use std::fmt;
use std::fs::File;
use std::io::BufWriter;
use tracing::log::LevelFilter;
use tracing::{Event, Subscriber};
use tracing_subscriber::fmt::format::Writer;
use tracing_subscriber::fmt::{FmtContext, FormatEvent, FormatFields};
use tracing_subscriber::registry::LookupSpan;
use tracing_subscriber::EnvFilter;
/// Logging flags to `#[command(flatten)]` into your CLI
#[derive(clap::Args, Debug, Clone, Default)]
#[command(about = None, long_about = None)]
pub(crate) struct Verbosity {
#[arg(
long,
short = 'v',
help = "Use verbose output (or `-vv` and `-vvv` for more verbose output)",
action = clap::ArgAction::Count,
global = true,
)]
verbose: u8,
}
impl Verbosity {
/// Returns the verbosity level based on the number of `-v` flags.
///
/// Returns `None` if the user did not specify any verbosity flags.
pub(crate) fn level(&self) -> VerbosityLevel {
match self.verbose {
0 => VerbosityLevel::Default,
1 => VerbosityLevel::Verbose,
2 => VerbosityLevel::ExtraVerbose,
_ => VerbosityLevel::Trace,
}
}
}
#[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd)]
pub(crate) enum VerbosityLevel {
/// Default output level. Only shows Ruff and Red Knot events up to the [`WARN`](tracing::Level::WARN).
Default,
/// Enables verbose output. Emits Ruff and Red Knot events up to the [`INFO`](tracing::Level::INFO).
/// Corresponds to `-v`.
Verbose,
/// Enables a more verbose tracing format and emits Ruff and Red Knot events up to [`DEBUG`](tracing::Level::DEBUG).
/// Corresponds to `-vv`
ExtraVerbose,
/// Enables all tracing events and uses a tree-like output format. Corresponds to `-vvv`.
Trace,
}
impl VerbosityLevel {
const fn level_filter(self) -> LevelFilter {
match self {
VerbosityLevel::Default => LevelFilter::Warn,
VerbosityLevel::Verbose => LevelFilter::Info,
VerbosityLevel::ExtraVerbose => LevelFilter::Debug,
VerbosityLevel::Trace => LevelFilter::Trace,
}
}
pub(crate) const fn is_trace(self) -> bool {
matches!(self, VerbosityLevel::Trace)
}
pub(crate) const fn is_extra_verbose(self) -> bool {
matches!(self, VerbosityLevel::ExtraVerbose)
}
}
pub(crate) fn setup_tracing(level: VerbosityLevel) -> anyhow::Result<TracingGuard> {
use tracing_subscriber::prelude::*;
// The `RED_KNOT_LOG` environment variable overrides the default log level.
let filter = if let Ok(log_env_variable) = std::env::var("RED_KNOT_LOG") {
EnvFilter::builder()
.parse(log_env_variable)
.context("Failed to parse directives specified in RED_KNOT_LOG environment variable.")?
} else {
match level {
VerbosityLevel::Default => {
// Show warning traces
EnvFilter::default().add_directive(tracing::level_filters::LevelFilter::WARN.into())
}
level => {
let level_filter = level.level_filter();
// Show info|debug|trace events, but allow `RED_KNOT_LOG` to override
let filter = EnvFilter::default().add_directive(
format!("red_knot={level_filter}")
.parse()
.expect("Hardcoded directive to be valid"),
);
filter.add_directive(
format!("ruff={level_filter}")
.parse()
.expect("Hardcoded directive to be valid"),
)
}
}
};
let (profiling_layer, guard) = setup_profile();
let registry = tracing_subscriber::registry()
.with(filter)
.with(profiling_layer);
if level.is_trace() {
let subscriber = registry.with(
tracing_tree::HierarchicalLayer::default()
.with_indent_lines(true)
.with_indent_amount(2)
.with_bracketed_fields(true)
.with_thread_ids(true)
.with_targets(true)
.with_writer(std::io::stderr)
.with_timer(tracing_tree::time::Uptime::default()),
);
subscriber.init();
} else {
let subscriber = registry.with(
tracing_subscriber::fmt::layer()
.event_format(RedKnotFormat {
display_level: true,
display_timestamp: level.is_extra_verbose(),
show_spans: false,
})
.with_writer(std::io::stderr),
);
subscriber.init();
}
Ok(TracingGuard {
_flame_guard: guard,
})
}
#[allow(clippy::type_complexity)]
fn setup_profile<S>() -> (
Option<tracing_flame::FlameLayer<S, BufWriter<File>>>,
Option<tracing_flame::FlushGuard<BufWriter<File>>>,
)
where
S: Subscriber + for<'span> LookupSpan<'span>,
{
if let Ok("1" | "true") = std::env::var("RED_KNOT_LOG_PROFILE").as_deref() {
let (layer, guard) = tracing_flame::FlameLayer::with_file("tracing.folded")
.expect("Flame layer to be created");
(Some(layer), Some(guard))
} else {
(None, None)
}
}
pub(crate) struct TracingGuard {
_flame_guard: Option<tracing_flame::FlushGuard<BufWriter<File>>>,
}
struct RedKnotFormat {
display_timestamp: bool,
display_level: bool,
show_spans: bool,
}
/// See <https://docs.rs/tracing-subscriber/0.3.18/src/tracing_subscriber/fmt/format/mod.rs.html#1026-1156>
impl<S, N> FormatEvent<S, N> for RedKnotFormat
where
S: Subscriber + for<'a> LookupSpan<'a>,
N: for<'a> FormatFields<'a> + 'static,
{
fn format_event(
&self,
ctx: &FmtContext<'_, S, N>,
mut writer: Writer<'_>,
event: &Event<'_>,
) -> fmt::Result {
let meta = event.metadata();
let ansi = writer.has_ansi_escapes();
if self.display_timestamp {
let timestamp = chrono::Local::now()
.format("%Y-%m-%d %H:%M:%S.%f")
.to_string();
if ansi {
write!(writer, "{} ", timestamp.dimmed())?;
} else {
write!(
writer,
"{} ",
chrono::Local::now().format("%Y-%m-%d %H:%M:%S.%f")
)?;
}
}
if self.display_level {
let level = meta.level();
// Same colors as tracing
if ansi {
let formatted_level = level.to_string();
match *level {
tracing::Level::TRACE => {
write!(writer, "{} ", formatted_level.purple().bold())?;
}
tracing::Level::DEBUG => write!(writer, "{} ", formatted_level.blue().bold())?,
tracing::Level::INFO => write!(writer, "{} ", formatted_level.green().bold())?,
tracing::Level::WARN => write!(writer, "{} ", formatted_level.yellow().bold())?,
tracing::Level::ERROR => write!(writer, "{} ", level.to_string().red().bold())?,
}
} else {
write!(writer, "{level} ")?;
}
}
if self.show_spans {
let span = event.parent();
let mut seen = false;
let span = span
.and_then(|id| ctx.span(id))
.or_else(|| ctx.lookup_current());
let scope = span.into_iter().flat_map(|span| span.scope().from_root());
for span in scope {
seen = true;
if ansi {
write!(writer, "{}:", span.metadata().name().bold())?;
} else {
write!(writer, "{}:", span.metadata().name())?;
}
}
if seen {
writer.write_char(' ')?;
}
}
ctx.field_format().format_fields(writer.by_ref(), event)?;
writeln!(writer)
}
}

View File

@@ -1,63 +1,190 @@
#![allow(clippy::dbg_macro)]
use std::path::Path;
use std::process::{ExitCode, Termination};
use std::sync::Mutex;
use anyhow::{anyhow, Context};
use clap::Parser;
use colored::Colorize;
use crossbeam::channel as crossbeam_channel;
use tracing::subscriber::Interest;
use tracing::{Level, Metadata};
use tracing_subscriber::filter::LevelFilter;
use tracing_subscriber::layer::{Context, Filter, SubscriberExt};
use tracing_subscriber::{Layer, Registry};
use tracing_tree::time::Uptime;
use salsa::plumbing::ZalsaDatabase;
use red_knot::db::{HasJar, ParallelDatabase, QueryError, SourceDb, SourceJar};
use red_knot::module::{set_module_search_paths, ModuleResolutionInputs};
use red_knot::program::check::ExecutionMode;
use red_knot::program::{FileWatcherChange, Program};
use red_knot::watch::FileWatcher;
use red_knot::Workspace;
use red_knot_python_semantic::{ProgramSettings, SearchPathSettings};
use red_knot_server::run_server;
use red_knot_workspace::db::RootDatabase;
use red_knot_workspace::site_packages::VirtualEnvironment;
use red_knot_workspace::watch;
use red_knot_workspace::watch::WorkspaceWatcher;
use red_knot_workspace::workspace::WorkspaceMetadata;
use ruff_db::system::{OsSystem, System, SystemPath, SystemPathBuf};
use target_version::TargetVersion;
use crate::logging::{setup_tracing, Verbosity};
mod logging;
mod target_version;
mod verbosity;
#[derive(Debug, Parser)]
#[command(
author,
name = "red-knot",
about = "An extremely fast Python type checker."
)]
#[command(version)]
struct Args {
#[command(subcommand)]
pub(crate) command: Option<Command>,
#[arg(
long,
help = "Changes the current working directory.",
long_help = "Changes the current working directory before any specified operations. This affects the workspace and configuration discovery.",
value_name = "PATH"
)]
current_directory: Option<SystemPathBuf>,
#[arg(
long,
help = "Path to the virtual environment the project uses",
long_help = "\
Path to the virtual environment the project uses. \
If provided, red-knot will use the `site-packages` directory of this virtual environment \
to resolve type information for the project's third-party dependencies.",
value_name = "PATH"
)]
venv_path: Option<SystemPathBuf>,
#[arg(
long,
value_name = "DIRECTORY",
help = "Custom directory to use for stdlib typeshed stubs"
)]
custom_typeshed_dir: Option<SystemPathBuf>,
#[arg(
long,
value_name = "PATH",
help = "Additional path to use as a module-resolution source (can be passed multiple times)"
)]
extra_search_path: Vec<SystemPathBuf>,
#[arg(
long,
help = "Python version to assume when resolving types",
default_value_t = TargetVersion::default(),
value_name="VERSION")
]
target_version: TargetVersion,
#[clap(flatten)]
verbosity: Verbosity,
#[arg(
long,
help = "Run in watch mode by re-running whenever files change",
short = 'W'
)]
watch: bool,
}
#[derive(Debug, clap::Subcommand)]
pub enum Command {
/// Start the language server
Server,
}
#[allow(clippy::print_stdout, clippy::unnecessary_wraps, clippy::print_stderr)]
fn main() -> anyhow::Result<()> {
setup_tracing();
pub fn main() -> ExitStatus {
run().unwrap_or_else(|error| {
use std::io::Write;
let arguments: Vec<_> = std::env::args().collect();
// Use `writeln` instead of `eprintln` to avoid panicking when the stderr pipe is broken.
let mut stderr = std::io::stderr().lock();
if arguments.len() < 2 {
eprintln!("Usage: red_knot <path>");
return Err(anyhow::anyhow!("Invalid arguments"));
// This communicates that this isn't a linter error but Red Knot itself hard-errored for
// some reason (e.g. failed to resolve the configuration)
writeln!(stderr, "{}", "Red Knot failed".red().bold()).ok();
// Currently we generally only see one error, but e.g. with io errors when resolving
// the configuration it is help to chain errors ("resolving configuration failed" ->
// "failed to read file: subdir/pyproject.toml")
for cause in error.chain() {
writeln!(stderr, " {} {cause}", "Cause:".bold()).ok();
}
ExitStatus::Error
})
}
fn run() -> anyhow::Result<ExitStatus> {
let Args {
command,
current_directory,
custom_typeshed_dir,
extra_search_path: extra_paths,
venv_path,
target_version,
verbosity,
watch,
} = Args::parse_from(std::env::args().collect::<Vec<_>>());
if matches!(command, Some(Command::Server)) {
return run_server().map(|()| ExitStatus::Success);
}
let entry_point = Path::new(&arguments[1]);
let verbosity = verbosity.level();
countme::enable(verbosity.is_trace());
let _guard = setup_tracing(verbosity)?;
if !entry_point.exists() {
eprintln!("The entry point does not exist.");
return Err(anyhow::anyhow!("Invalid arguments"));
}
if !entry_point.is_file() {
eprintln!("The entry point is not a file.");
return Err(anyhow::anyhow!("Invalid arguments"));
}
let workspace_folder = entry_point.parent().unwrap();
let workspace = Workspace::new(workspace_folder.to_path_buf());
let workspace_search_path = workspace.root().to_path_buf();
let search_paths = ModuleResolutionInputs {
extra_paths: vec![],
workspace_root: workspace_search_path,
site_packages: None,
custom_typeshed: None,
// The base path to which all CLI arguments are relative to.
let cli_base_path = {
let cwd = std::env::current_dir().context("Failed to get the current working directory")?;
SystemPathBuf::from_path_buf(cwd)
.map_err(|path| {
anyhow!(
"The current working directory '{}' contains non-unicode characters. Red Knot only supports unicode paths.",
path.display()
)
})?
};
let mut program = Program::new(workspace);
set_module_search_paths(&mut program, search_paths);
let cwd = current_directory
.map(|cwd| {
if cwd.as_std_path().is_dir() {
Ok(SystemPath::absolute(&cwd, &cli_base_path))
} else {
Err(anyhow!(
"Provided current-directory path '{cwd}' is not a directory."
))
}
})
.transpose()?
.unwrap_or_else(|| cli_base_path.clone());
let entry_id = program.file_id(entry_point);
program.workspace_mut().open_file(entry_id);
let system = OsSystem::new(cwd.clone());
let workspace_metadata = WorkspaceMetadata::from_path(system.current_directory(), &system)?;
// TODO: Verify the remaining search path settings eagerly.
let site_packages = venv_path
.map(|path| {
VirtualEnvironment::new(path, &OsSystem::new(cli_base_path))
.and_then(|venv| venv.site_packages_directories(&system))
})
.transpose()?
.unwrap_or_default();
// TODO: Respect the settings from the workspace metadata. when resolving the program settings.
let program_settings = ProgramSettings {
target_version: target_version.into(),
search_paths: SearchPathSettings {
extra_paths,
src_root: workspace_metadata.root().to_path_buf(),
custom_typeshed: custom_typeshed_dir,
site_packages,
},
};
// TODO: Use the `program_settings` to compute the key for the database's persistent
// cache and load the cache if it exists.
let mut db = RootDatabase::new(workspace_metadata, program_settings, system);
let (main_loop, main_loop_cancellation_token) = MainLoop::new();
@@ -71,122 +198,158 @@ fn main() -> anyhow::Result<()> {
}
})?;
let file_changes_notifier = main_loop.file_changes_notifier();
let exit_status = if watch {
main_loop.watch(&mut db)?
} else {
main_loop.run(&mut db)
};
// Watch for file changes and re-trigger the analysis.
let mut file_watcher = FileWatcher::new(move |changes| {
file_changes_notifier.notify(changes);
})?;
tracing::trace!("Counts for entire CLI run:\n{}", countme::get_all());
file_watcher.watch_folder(workspace_folder)?;
std::mem::forget(db);
main_loop.run(&mut program);
Ok(exit_status)
}
let source_jar: &SourceJar = program.jar().unwrap();
#[derive(Copy, Clone)]
pub enum ExitStatus {
/// Checking was successful and there were no errors.
Success = 0,
dbg!(source_jar.parsed.statistics());
dbg!(source_jar.sources.statistics());
/// Checking was successful but there were errors.
Failure = 1,
Ok(())
/// Checking failed.
Error = 2,
}
impl Termination for ExitStatus {
fn report(self) -> ExitCode {
ExitCode::from(self as u8)
}
}
struct MainLoop {
orchestrator_sender: crossbeam_channel::Sender<OrchestratorMessage>,
main_loop_receiver: crossbeam_channel::Receiver<MainLoopMessage>,
/// Sender that can be used to send messages to the main loop.
sender: crossbeam_channel::Sender<MainLoopMessage>,
/// Receiver for the messages sent **to** the main loop.
receiver: crossbeam_channel::Receiver<MainLoopMessage>,
/// The file system watcher, if running in watch mode.
watcher: Option<WorkspaceWatcher>,
}
impl MainLoop {
fn new() -> (Self, MainLoopCancellationToken) {
let (orchestrator_sender, orchestrator_receiver) = crossbeam_channel::bounded(1);
let (main_loop_sender, main_loop_receiver) = crossbeam_channel::bounded(1);
let mut orchestrator = Orchestrator {
receiver: orchestrator_receiver,
sender: main_loop_sender.clone(),
revision: 0,
};
std::thread::spawn(move || {
orchestrator.run();
});
let (sender, receiver) = crossbeam_channel::bounded(10);
(
Self {
orchestrator_sender,
main_loop_receiver,
},
MainLoopCancellationToken {
sender: main_loop_sender,
sender: sender.clone(),
receiver,
watcher: None,
},
MainLoopCancellationToken { sender },
)
}
fn file_changes_notifier(&self) -> FileChangesNotifier {
FileChangesNotifier {
sender: self.orchestrator_sender.clone(),
}
fn watch(mut self, db: &mut RootDatabase) -> anyhow::Result<ExitStatus> {
tracing::debug!("Starting watch mode");
let sender = self.sender.clone();
let watcher = watch::directory_watcher(move |event| {
sender.send(MainLoopMessage::ApplyChanges(event)).unwrap();
})?;
self.watcher = Some(WorkspaceWatcher::new(watcher, db));
self.run(db);
Ok(ExitStatus::Success)
}
fn run(self, program: &mut Program) {
self.orchestrator_sender
.send(OrchestratorMessage::Run)
.unwrap();
fn run(mut self, db: &mut RootDatabase) -> ExitStatus {
self.sender.send(MainLoopMessage::CheckWorkspace).unwrap();
for message in &self.main_loop_receiver {
tracing::trace!("Main Loop: Tick");
let result = self.main_loop(db);
tracing::debug!("Exiting main loop");
result
}
fn main_loop(&mut self, db: &mut RootDatabase) -> ExitStatus {
// Schedule the first check.
tracing::debug!("Starting main loop");
let mut revision = 0u64;
while let Ok(message) = self.receiver.recv() {
match message {
MainLoopMessage::CheckProgram { revision } => {
let program = program.snapshot();
let sender = self.orchestrator_sender.clone();
MainLoopMessage::CheckWorkspace => {
let db = db.snapshot();
let sender = self.sender.clone();
// Spawn a new task that checks the program. This needs to be done in a separate thread
// Spawn a new task that checks the workspace. This needs to be done in a separate thread
// to prevent blocking the main loop here.
rayon::spawn(move || match program.check(ExecutionMode::ThreadPool) {
Ok(result) => {
rayon::spawn(move || {
if let Ok(result) = db.check() {
// Send the result back to the main loop for printing.
sender
.send(OrchestratorMessage::CheckProgramCompleted {
diagnostics: result,
revision,
})
.send(MainLoopMessage::CheckCompleted { result, revision })
.unwrap();
}
Err(QueryError::Cancelled) => {}
});
}
MainLoopMessage::ApplyChanges(changes) => {
// Automatically cancels any pending queries and waits for them to complete.
program.apply_changes(changes);
MainLoopMessage::CheckCompleted {
result,
revision: check_revision,
} => {
let has_diagnostics = !result.is_empty();
if check_revision == revision {
for diagnostic in result {
tracing::error!("{}", diagnostic);
}
} else {
tracing::debug!(
"Discarding check result for outdated revision: current: {revision}, result revision: {check_revision}"
);
}
if self.watcher.is_none() {
return if has_diagnostics {
ExitStatus::Failure
} else {
ExitStatus::Success
};
}
tracing::trace!("Counts after last check:\n{}", countme::get_all());
}
MainLoopMessage::CheckCompleted(diagnostics) => {
dbg!(diagnostics);
MainLoopMessage::ApplyChanges(changes) => {
revision += 1;
// Automatically cancels any pending queries and waits for them to complete.
db.apply_changes(changes);
if let Some(watcher) = self.watcher.as_mut() {
watcher.update(db);
}
self.sender.send(MainLoopMessage::CheckWorkspace).unwrap();
}
MainLoopMessage::Exit => {
return;
// Cancel any pending queries and wait for them to complete.
// TODO: Don't use Salsa internal APIs
// [Zulip-Thread](https://salsa.zulipchat.com/#narrow/stream/333573-salsa-3.2E0/topic/Expose.20an.20API.20to.20cancel.20other.20queries)
let _ = db.zalsa_mut();
return ExitStatus::Success;
}
}
tracing::debug!("Waiting for next main loop message.");
}
}
}
impl Drop for MainLoop {
fn drop(&mut self) {
self.orchestrator_sender
.send(OrchestratorMessage::Shutdown)
.unwrap();
}
}
#[derive(Debug, Clone)]
struct FileChangesNotifier {
sender: crossbeam_channel::Sender<OrchestratorMessage>,
}
impl FileChangesNotifier {
fn notify(&self, changes: Vec<FileWatcherChange>) {
self.sender
.send(OrchestratorMessage::FileChanges(changes))
.unwrap();
ExitStatus::Success
}
}
@@ -201,164 +364,11 @@ impl MainLoopCancellationToken {
}
}
struct Orchestrator {
/// Sends messages to the main loop.
sender: crossbeam_channel::Sender<MainLoopMessage>,
/// Receives messages from the main loop.
receiver: crossbeam_channel::Receiver<OrchestratorMessage>,
revision: usize,
}
impl Orchestrator {
fn run(&mut self) {
while let Ok(message) = self.receiver.recv() {
match message {
OrchestratorMessage::Run => {
self.sender
.send(MainLoopMessage::CheckProgram {
revision: self.revision,
})
.unwrap();
}
OrchestratorMessage::CheckProgramCompleted {
diagnostics,
revision,
} => {
// Only take the diagnostics if they are for the latest revision.
if self.revision == revision {
self.sender
.send(MainLoopMessage::CheckCompleted(diagnostics))
.unwrap();
} else {
tracing::debug!("Discarding diagnostics for outdated revision {revision} (current: {}).", self.revision);
}
}
OrchestratorMessage::FileChanges(changes) => {
// Request cancellation, but wait until all analysis tasks have completed to
// avoid stale messages in the next main loop.
self.revision += 1;
self.debounce_changes(changes);
}
OrchestratorMessage::Shutdown => {
return self.shutdown();
}
}
}
}
fn debounce_changes(&self, mut changes: Vec<FileWatcherChange>) {
loop {
// Consume possibly incoming file change messages before running a new analysis, but don't wait for more than 100ms.
crossbeam_channel::select! {
recv(self.receiver) -> message => {
match message {
Ok(OrchestratorMessage::Shutdown) => {
return self.shutdown();
}
Ok(OrchestratorMessage::FileChanges(file_changes)) => {
changes.extend(file_changes);
}
Ok(OrchestratorMessage::CheckProgramCompleted { .. })=> {
// disregard any outdated completion message.
}
Ok(OrchestratorMessage::Run) => unreachable!("The orchestrator is already running."),
Err(_) => {
// There are no more senders, no point in waiting for more messages
return;
}
}
},
default(std::time::Duration::from_millis(10)) => {
// No more file changes after 10 ms, send the changes and schedule a new analysis
self.sender.send(MainLoopMessage::ApplyChanges(changes)).unwrap();
self.sender.send(MainLoopMessage::CheckProgram { revision: self.revision}).unwrap();
return;
}
}
}
}
#[allow(clippy::unused_self)]
fn shutdown(&self) {
tracing::trace!("Shutting down orchestrator.");
}
}
/// Message sent from the orchestrator to the main loop.
#[derive(Debug)]
enum MainLoopMessage {
CheckProgram { revision: usize },
CheckCompleted(Vec<String>),
ApplyChanges(Vec<FileWatcherChange>),
CheckWorkspace,
CheckCompleted { result: Vec<String>, revision: u64 },
ApplyChanges(Vec<watch::ChangeEvent>),
Exit,
}
#[derive(Debug)]
enum OrchestratorMessage {
Run,
Shutdown,
CheckProgramCompleted {
diagnostics: Vec<String>,
revision: usize,
},
FileChanges(Vec<FileWatcherChange>),
}
fn setup_tracing() {
let subscriber = Registry::default().with(
tracing_tree::HierarchicalLayer::default()
.with_indent_lines(true)
.with_indent_amount(2)
.with_bracketed_fields(true)
.with_thread_ids(true)
.with_targets(true)
.with_writer(|| Box::new(std::io::stderr()))
.with_timer(Uptime::default())
.with_filter(LoggingFilter {
trace_level: Level::TRACE,
}),
);
tracing::subscriber::set_global_default(subscriber).unwrap();
}
struct LoggingFilter {
trace_level: Level,
}
impl LoggingFilter {
fn is_enabled(&self, meta: &Metadata<'_>) -> bool {
let filter = if meta.target().starts_with("red_knot") || meta.target().starts_with("ruff") {
self.trace_level
} else {
Level::INFO
};
meta.level() <= &filter
}
}
impl<S> Filter<S> for LoggingFilter {
fn enabled(&self, meta: &Metadata<'_>, _cx: &Context<'_, S>) -> bool {
self.is_enabled(meta)
}
fn callsite_enabled(&self, meta: &'static Metadata<'static>) -> Interest {
if self.is_enabled(meta) {
Interest::always()
} else {
Interest::never()
}
}
fn max_level_hint(&self) -> Option<LevelFilter> {
Some(LevelFilter::from_level(self.trace_level))
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -1,41 +0,0 @@
use std::ops::{Deref, DerefMut};
use std::sync::Arc;
use ruff_python_ast::ModModule;
use ruff_python_parser::Parsed;
use crate::cache::KeyValueCache;
use crate::db::{QueryResult, SourceDb};
use crate::files::FileId;
use crate::source::source_text;
#[tracing::instrument(level = "debug", skip(db))]
pub(crate) fn parse(db: &dyn SourceDb, file_id: FileId) -> QueryResult<Arc<Parsed<ModModule>>> {
let jar = db.jar()?;
jar.parsed.get(&file_id, |file_id| {
let source = source_text(db, *file_id)?;
Ok(Arc::new(ruff_python_parser::parse_unchecked_source(
source.text(),
source.kind().into(),
)))
})
}
#[derive(Debug, Default)]
pub struct ParsedStorage(KeyValueCache<FileId, Arc<Parsed<ModModule>>>);
impl Deref for ParsedStorage {
type Target = KeyValueCache<FileId, Arc<Parsed<ModModule>>>;
fn deref(&self) -> &Self::Target {
&self.0
}
}
impl DerefMut for ParsedStorage {
fn deref_mut(&mut self) -> &mut Self::Target {
&mut self.0
}
}

View File

@@ -1,413 +0,0 @@
use rayon::{current_num_threads, yield_local};
use rustc_hash::FxHashSet;
use crate::db::{Database, QueryError, QueryResult};
use crate::files::FileId;
use crate::lint::{lint_semantic, lint_syntax, Diagnostics};
use crate::module::{file_to_module, resolve_module};
use crate::program::Program;
use crate::semantic::{semantic_index, Dependency};
impl Program {
/// Checks all open files in the workspace and its dependencies.
#[tracing::instrument(level = "debug", skip_all)]
pub fn check(&self, mode: ExecutionMode) -> QueryResult<Vec<String>> {
self.cancelled()?;
let mut context = CheckContext::new(self);
match mode {
ExecutionMode::SingleThreaded => SingleThreadedExecutor.run(&mut context)?,
ExecutionMode::ThreadPool => ThreadPoolExecutor.run(&mut context)?,
};
Ok(context.finish())
}
#[tracing::instrument(level = "debug", skip(self, context))]
fn check_file(&self, file: FileId, context: &CheckFileContext) -> QueryResult<Diagnostics> {
self.cancelled()?;
let index = semantic_index(self, file)?;
let dependencies = index.symbol_table().dependencies();
if !dependencies.is_empty() {
let module = file_to_module(self, file)?;
// TODO scheduling all dependencies here is wasteful if we don't infer any types on them
// but I think that's unlikely, so it is okay?
// Anyway, we need to figure out a way to retrieve the dependencies of a module
// from the persistent cache. So maybe it should be a separate query after all.
for dependency in dependencies {
let dependency_name = match dependency {
Dependency::Module(name) => Some(name.clone()),
Dependency::Relative { .. } => match &module {
Some(module) => module.resolve_dependency(self, dependency)?,
None => None,
},
};
if let Some(dependency_name) = dependency_name {
// TODO We may want to have a different check functions for non-first-party
// files because we only need to index them and not check them.
// Supporting non-first-party code also requires supporting typing stubs.
if let Some(dependency) = resolve_module(self, dependency_name)? {
if dependency.path(self)?.root().kind().is_first_party() {
context.schedule_dependency(dependency.path(self)?.file());
}
}
}
}
}
let mut diagnostics = Vec::new();
if self.workspace().is_file_open(file) {
diagnostics.extend_from_slice(&lint_syntax(self, file)?);
diagnostics.extend_from_slice(&lint_semantic(self, file)?);
}
Ok(Diagnostics::from(diagnostics))
}
}
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
pub enum ExecutionMode {
SingleThreaded,
ThreadPool,
}
/// Context that stores state information about the entire check operation.
struct CheckContext<'a> {
/// IDs of the files that have been queued for checking.
///
/// Used to avoid queuing the same file twice.
scheduled_files: FxHashSet<FileId>,
/// Reference to the program that is checked.
program: &'a Program,
/// The aggregated diagnostics
diagnostics: Vec<String>,
}
impl<'a> CheckContext<'a> {
fn new(program: &'a Program) -> Self {
Self {
scheduled_files: FxHashSet::default(),
program,
diagnostics: Vec::new(),
}
}
/// Returns the tasks to check all open files in the workspace.
fn check_open_files(&mut self) -> Vec<CheckOpenFileTask> {
self.scheduled_files
.extend(self.program.workspace().open_files());
self.program
.workspace()
.open_files()
.map(|file_id| CheckOpenFileTask { file_id })
.collect()
}
/// Returns the task to check a dependency.
fn check_dependency(&mut self, file_id: FileId) -> Option<CheckDependencyTask> {
if self.scheduled_files.insert(file_id) {
Some(CheckDependencyTask { file_id })
} else {
None
}
}
/// Pushes the result for a single file check operation
fn push_diagnostics(&mut self, diagnostics: &Diagnostics) {
self.diagnostics.extend_from_slice(diagnostics);
}
/// Returns a reference to the program that is being checked.
fn program(&self) -> &'a Program {
self.program
}
/// Creates a task context that is used to check a single file.
fn task_context<'b, S>(&self, dependency_scheduler: &'b S) -> CheckTaskContext<'a, 'b, S>
where
S: ScheduleDependency,
{
CheckTaskContext {
program: self.program,
dependency_scheduler,
}
}
fn finish(self) -> Vec<String> {
self.diagnostics
}
}
/// Trait that abstracts away how a dependency of a file gets scheduled for checking.
trait ScheduleDependency {
/// Schedules the file with the given ID for checking.
fn schedule(&self, file_id: FileId);
}
impl<T> ScheduleDependency for T
where
T: Fn(FileId),
{
fn schedule(&self, file_id: FileId) {
let f = self;
f(file_id);
}
}
/// Context that is used to run a single file check task.
///
/// The task is generic over `S` because it is passed across thread boundaries and
/// we don't want to add the requirement that [`ScheduleDependency`] must be [`Send`].
struct CheckTaskContext<'a, 'scheduler, S>
where
S: ScheduleDependency,
{
dependency_scheduler: &'scheduler S,
program: &'a Program,
}
impl<'a, 'scheduler, S> CheckTaskContext<'a, 'scheduler, S>
where
S: ScheduleDependency,
{
fn as_file_context(&self) -> CheckFileContext<'scheduler> {
CheckFileContext {
dependency_scheduler: self.dependency_scheduler,
}
}
}
/// Context passed when checking a single file.
///
/// This is a trimmed down version of [`CheckTaskContext`] with the type parameter `S` erased
/// to avoid monomorphization of [`Program:check_file`].
struct CheckFileContext<'a> {
dependency_scheduler: &'a dyn ScheduleDependency,
}
impl<'a> CheckFileContext<'a> {
fn schedule_dependency(&self, file_id: FileId) {
self.dependency_scheduler.schedule(file_id);
}
}
#[derive(Debug)]
enum CheckFileTask {
OpenFile(CheckOpenFileTask),
Dependency(CheckDependencyTask),
}
impl CheckFileTask {
/// Runs the task and returns the results for checking this file.
fn run<S>(&self, context: &CheckTaskContext<S>) -> QueryResult<Diagnostics>
where
S: ScheduleDependency,
{
match self {
Self::OpenFile(task) => task.run(context),
Self::Dependency(task) => task.run(context),
}
}
fn file_id(&self) -> FileId {
match self {
CheckFileTask::OpenFile(task) => task.file_id,
CheckFileTask::Dependency(task) => task.file_id,
}
}
}
/// Task to check an open file.
#[derive(Debug)]
struct CheckOpenFileTask {
file_id: FileId,
}
impl CheckOpenFileTask {
fn run<S>(&self, context: &CheckTaskContext<S>) -> QueryResult<Diagnostics>
where
S: ScheduleDependency,
{
context
.program
.check_file(self.file_id, &context.as_file_context())
}
}
/// Task to check a dependency file.
#[derive(Debug)]
struct CheckDependencyTask {
file_id: FileId,
}
impl CheckDependencyTask {
fn run<S>(&self, context: &CheckTaskContext<S>) -> QueryResult<Diagnostics>
where
S: ScheduleDependency,
{
context
.program
.check_file(self.file_id, &context.as_file_context())
}
}
/// Executor that schedules the checking of individual program files.
trait CheckExecutor {
fn run(self, context: &mut CheckContext) -> QueryResult<()>;
}
/// Executor that runs all check operations on the current thread.
///
/// The executor does not schedule dependencies for checking.
/// The main motivation for scheduling dependencies
/// in a multithreaded environment is to parse and index the dependencies concurrently.
/// However, that doesn't make sense in a single threaded environment, because the dependencies then compute
/// with checking the open files. Checking dependencies in a single threaded environment is more likely
/// to hurt performance because we end up analyzing files in their entirety, even if we only need to type check parts of them.
#[derive(Debug, Default)]
struct SingleThreadedExecutor;
impl CheckExecutor for SingleThreadedExecutor {
fn run(self, context: &mut CheckContext) -> QueryResult<()> {
let mut queue = context.check_open_files();
let noop_schedule_dependency = |_| {};
while let Some(file) = queue.pop() {
context.program().cancelled()?;
let task_context = context.task_context(&noop_schedule_dependency);
context.push_diagnostics(&file.run(&task_context)?);
}
Ok(())
}
}
/// Executor that runs the check operations on a thread pool.
///
/// The executor runs each check operation as its own task using a thread pool.
///
/// Other than [`SingleThreadedExecutor`], this executor schedules dependencies for checking. It
/// even schedules dependencies for checking when the thread pool size is 1 for a better debugging experience.
#[derive(Debug, Default)]
struct ThreadPoolExecutor;
impl CheckExecutor for ThreadPoolExecutor {
fn run(self, context: &mut CheckContext) -> QueryResult<()> {
let num_threads = current_num_threads();
let single_threaded = num_threads == 1;
let span = tracing::trace_span!("ThreadPoolExecutor::run", num_threads);
let _ = span.enter();
let mut queue: Vec<_> = context
.check_open_files()
.into_iter()
.map(CheckFileTask::OpenFile)
.collect();
let (sender, receiver) = if single_threaded {
// Use an unbounded queue for single threaded execution to prevent deadlocks
// when a single file schedules multiple dependencies.
crossbeam::channel::unbounded()
} else {
// Use a bounded queue to apply backpressure when the orchestration thread isn't able to keep
// up processing messages from the worker threads.
crossbeam::channel::bounded(num_threads)
};
let schedule_sender = sender.clone();
let schedule_dependency = move |file_id| {
schedule_sender
.send(ThreadPoolMessage::ScheduleDependency(file_id))
.unwrap();
};
let result = rayon::in_place_scope(|scope| {
let mut pending = 0usize;
loop {
context.program().cancelled()?;
// 1. Try to get a queued message to ensure that we have always remaining space in the channel to prevent blocking the worker threads.
// 2. Try to process a queued file
// 3. If there's no queued file wait for the next incoming message.
// 4. Exit if there are no more messages and no senders.
let message = if let Ok(message) = receiver.try_recv() {
message
} else if let Some(task) = queue.pop() {
pending += 1;
let task_context = context.task_context(&schedule_dependency);
let sender = sender.clone();
let task_span = tracing::trace_span!(
parent: &span,
"CheckFileTask::run",
file_id = task.file_id().as_u32(),
);
scope.spawn(move |_| {
task_span.in_scope(|| match task.run(&task_context) {
Ok(result) => {
sender.send(ThreadPoolMessage::Completed(result)).unwrap();
}
Err(err) => sender.send(ThreadPoolMessage::Errored(err)).unwrap(),
});
});
// If this is a single threaded rayon thread pool, yield the current thread
// or we never start processing the work items.
if single_threaded {
yield_local();
}
continue;
} else if let Ok(message) = receiver.recv() {
message
} else {
break;
};
match message {
ThreadPoolMessage::ScheduleDependency(dependency) => {
if let Some(task) = context.check_dependency(dependency) {
queue.push(CheckFileTask::Dependency(task));
}
}
ThreadPoolMessage::Completed(diagnostics) => {
context.push_diagnostics(&diagnostics);
pending -= 1;
if pending == 0 && queue.is_empty() {
break;
}
}
ThreadPoolMessage::Errored(err) => {
return Err(err);
}
}
}
Ok(())
});
result
}
}
#[derive(Debug)]
enum ThreadPoolMessage {
ScheduleDependency(FileId),
Completed(Diagnostics),
Errored(QueryError),
}

View File

@@ -1,275 +0,0 @@
use std::collections::hash_map::Entry;
use std::path::{Path, PathBuf};
use std::sync::Arc;
use rustc_hash::FxHashMap;
use crate::db::{
Database, Db, DbRuntime, DbWithJar, HasJar, HasJars, JarsStorage, LintDb, LintJar,
ParallelDatabase, QueryResult, SemanticDb, SemanticJar, Snapshot, SourceDb, SourceJar, Upcast,
};
use crate::files::{FileId, Files};
use crate::Workspace;
pub mod check;
#[derive(Debug)]
pub struct Program {
jars: JarsStorage<Program>,
files: Files,
workspace: Workspace,
}
impl Program {
pub fn new(workspace: Workspace) -> Self {
Self {
jars: JarsStorage::default(),
files: Files::default(),
workspace,
}
}
pub fn apply_changes<I>(&mut self, changes: I)
where
I: IntoIterator<Item = FileWatcherChange>,
{
let mut aggregated_changes = AggregatedChanges::default();
aggregated_changes.extend(changes.into_iter().map(|change| FileChange {
id: self.files.intern(&change.path),
kind: change.kind,
}));
let (source, semantic, lint) = self.jars_mut();
for change in aggregated_changes.iter() {
semantic.module_resolver.remove_module_by_file(change.id);
semantic.semantic_indices.remove(&change.id);
source.sources.remove(&change.id);
source.parsed.remove(&change.id);
// TODO: remove all dependent modules as well
semantic.type_store.remove_module(change.id);
lint.lint_syntax.remove(&change.id);
lint.lint_semantic.remove(&change.id);
}
}
pub fn files(&self) -> &Files {
&self.files
}
pub fn workspace(&self) -> &Workspace {
&self.workspace
}
pub fn workspace_mut(&mut self) -> &mut Workspace {
&mut self.workspace
}
}
impl SourceDb for Program {
fn file_id(&self, path: &Path) -> FileId {
self.files.intern(path)
}
fn file_path(&self, file_id: FileId) -> Arc<Path> {
self.files.path(file_id)
}
}
impl DbWithJar<SourceJar> for Program {}
impl SemanticDb for Program {}
impl DbWithJar<SemanticJar> for Program {}
impl LintDb for Program {}
impl DbWithJar<LintJar> for Program {}
impl Upcast<dyn SemanticDb> for Program {
fn upcast(&self) -> &(dyn SemanticDb + 'static) {
self
}
}
impl Upcast<dyn SourceDb> for Program {
fn upcast(&self) -> &(dyn SourceDb + 'static) {
self
}
}
impl Upcast<dyn LintDb> for Program {
fn upcast(&self) -> &(dyn LintDb + 'static) {
self
}
}
impl Db for Program {}
impl Database for Program {
fn runtime(&self) -> &DbRuntime {
self.jars.runtime()
}
fn runtime_mut(&mut self) -> &mut DbRuntime {
self.jars.runtime_mut()
}
}
impl ParallelDatabase for Program {
fn snapshot(&self) -> Snapshot<Self> {
Snapshot::new(Self {
jars: self.jars.snapshot(),
files: self.files.snapshot(),
workspace: self.workspace.clone(),
})
}
}
impl HasJars for Program {
type Jars = (SourceJar, SemanticJar, LintJar);
fn jars(&self) -> QueryResult<&Self::Jars> {
self.jars.jars()
}
fn jars_mut(&mut self) -> &mut Self::Jars {
self.jars.jars_mut()
}
}
impl HasJar<SourceJar> for Program {
fn jar(&self) -> QueryResult<&SourceJar> {
Ok(&self.jars()?.0)
}
fn jar_mut(&mut self) -> &mut SourceJar {
&mut self.jars_mut().0
}
}
impl HasJar<SemanticJar> for Program {
fn jar(&self) -> QueryResult<&SemanticJar> {
Ok(&self.jars()?.1)
}
fn jar_mut(&mut self) -> &mut SemanticJar {
&mut self.jars_mut().1
}
}
impl HasJar<LintJar> for Program {
fn jar(&self) -> QueryResult<&LintJar> {
Ok(&self.jars()?.2)
}
fn jar_mut(&mut self) -> &mut LintJar {
&mut self.jars_mut().2
}
}
#[derive(Clone, Debug)]
pub struct FileWatcherChange {
path: PathBuf,
kind: FileChangeKind,
}
impl FileWatcherChange {
pub fn new(path: PathBuf, kind: FileChangeKind) -> Self {
Self { path, kind }
}
}
#[derive(Copy, Clone, Debug)]
struct FileChange {
id: FileId,
kind: FileChangeKind,
}
impl FileChange {
fn file_id(self) -> FileId {
self.id
}
fn kind(self) -> FileChangeKind {
self.kind
}
}
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
pub enum FileChangeKind {
Created,
Modified,
Deleted,
}
#[derive(Default, Debug)]
struct AggregatedChanges {
changes: FxHashMap<FileId, FileChangeKind>,
}
impl AggregatedChanges {
fn add(&mut self, change: FileChange) {
match self.changes.entry(change.file_id()) {
Entry::Occupied(mut entry) => {
let merged = entry.get_mut();
match (merged, change.kind()) {
(FileChangeKind::Created, FileChangeKind::Deleted) => {
// Deletion after creations means that ruff never saw the file.
entry.remove();
}
(FileChangeKind::Created, FileChangeKind::Modified) => {
// No-op, for ruff, modifying a file that it doesn't yet know that it exists is still considered a creation.
}
(FileChangeKind::Modified, FileChangeKind::Created) => {
// Uhh, that should probably not happen. Continue considering it a modification.
}
(FileChangeKind::Modified, FileChangeKind::Deleted) => {
*entry.get_mut() = FileChangeKind::Deleted;
}
(FileChangeKind::Deleted, FileChangeKind::Created) => {
*entry.get_mut() = FileChangeKind::Modified;
}
(FileChangeKind::Deleted, FileChangeKind::Modified) => {
// That's weird, but let's consider it a modification.
*entry.get_mut() = FileChangeKind::Modified;
}
(FileChangeKind::Created, FileChangeKind::Created)
| (FileChangeKind::Modified, FileChangeKind::Modified)
| (FileChangeKind::Deleted, FileChangeKind::Deleted) => {
// No-op transitions. Some of them should be impossible but we handle them anyway.
}
}
}
Entry::Vacant(entry) => {
entry.insert(change.kind());
}
}
}
fn extend<I>(&mut self, changes: I)
where
I: IntoIterator<Item = FileChange>,
{
let iter = changes.into_iter();
let (lower, _) = iter.size_hint();
self.changes.reserve(lower);
for change in iter {
self.add(change);
}
}
fn iter(&self) -> impl Iterator<Item = FileChange> + '_ {
self.changes.iter().map(|(id, kind)| FileChange {
id: *id,
kind: *kind,
})
}
}

View File

@@ -1,882 +0,0 @@
use std::num::NonZeroU32;
use ruff_python_ast as ast;
use ruff_python_ast::visitor::source_order::SourceOrderVisitor;
use ruff_python_ast::AstNode;
use crate::ast_ids::{NodeKey, TypedNodeKey};
use crate::cache::KeyValueCache;
use crate::db::{QueryResult, SemanticDb, SemanticJar};
use crate::files::FileId;
use crate::module::Module;
use crate::module::ModuleName;
use crate::parse::parse;
use crate::Name;
pub(crate) use definitions::Definition;
use definitions::{ImportDefinition, ImportFromDefinition};
pub(crate) use flow_graph::ConstrainedDefinition;
use flow_graph::{FlowGraph, FlowGraphBuilder, FlowNodeId, ReachableDefinitionsIterator};
use ruff_index::{newtype_index, IndexVec};
use rustc_hash::FxHashMap;
use std::ops::{Deref, DerefMut};
use std::sync::Arc;
pub(crate) use symbol_table::{Dependency, SymbolId};
use symbol_table::{ScopeId, ScopeKind, SymbolFlags, SymbolTable, SymbolTableBuilder};
pub(crate) use types::{infer_definition_type, infer_symbol_public_type, Type, TypeStore};
mod definitions;
mod flow_graph;
mod symbol_table;
mod types;
#[tracing::instrument(level = "debug", skip(db))]
pub fn semantic_index(db: &dyn SemanticDb, file_id: FileId) -> QueryResult<Arc<SemanticIndex>> {
let jar: &SemanticJar = db.jar()?;
jar.semantic_indices.get(&file_id, |_| {
let parsed = parse(db.upcast(), file_id)?;
Ok(Arc::from(SemanticIndex::from_ast(parsed.syntax())))
})
}
#[tracing::instrument(level = "debug", skip(db))]
pub fn resolve_global_symbol(
db: &dyn SemanticDb,
module: Module,
name: &str,
) -> QueryResult<Option<GlobalSymbolId>> {
let file_id = module.path(db)?.file();
let symbol_table = &semantic_index(db, file_id)?.symbol_table;
let Some(symbol_id) = symbol_table.root_symbol_id_by_name(name) else {
return Ok(None);
};
Ok(Some(GlobalSymbolId { file_id, symbol_id }))
}
#[newtype_index]
pub struct ExpressionId;
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
pub struct GlobalSymbolId {
pub(crate) file_id: FileId,
pub(crate) symbol_id: SymbolId,
}
#[derive(Debug)]
pub struct SemanticIndex {
symbol_table: SymbolTable,
flow_graph: FlowGraph,
expressions: FxHashMap<NodeKey, ExpressionId>,
expressions_by_id: IndexVec<ExpressionId, NodeKey>,
}
impl SemanticIndex {
pub fn from_ast(module: &ast::ModModule) -> Self {
let root_scope_id = SymbolTable::root_scope_id();
let mut indexer = SemanticIndexer {
symbol_table_builder: SymbolTableBuilder::new(),
flow_graph_builder: FlowGraphBuilder::new(),
scopes: vec![ScopeState {
scope_id: root_scope_id,
current_flow_node_id: FlowGraph::start(),
}],
expressions: FxHashMap::default(),
expressions_by_id: IndexVec::default(),
current_definition: None,
};
indexer.visit_body(&module.body);
indexer.finish()
}
fn resolve_expression_id<'a>(
&self,
ast: &'a ast::ModModule,
expression_id: ExpressionId,
) -> ast::AnyNodeRef<'a> {
let node_key = self.expressions_by_id[expression_id];
node_key
.resolve(ast.as_any_node_ref())
.expect("node to resolve")
}
/// Return an iterator over all definitions of `symbol_id` reachable from `use_expr`. The value
/// of `symbol_id` in `use_expr` must originate from one of the iterated definitions (or from
/// an external reassignment of the name outside of this scope).
pub fn reachable_definitions(
&self,
symbol_id: SymbolId,
use_expr: &ast::Expr,
) -> ReachableDefinitionsIterator {
let expression_id = self.expression_id(use_expr);
ReachableDefinitionsIterator::new(
&self.flow_graph,
symbol_id,
self.flow_graph.for_expr(expression_id),
)
}
pub fn expression_id(&self, expression: &ast::Expr) -> ExpressionId {
self.expressions[&NodeKey::from_node(expression.into())]
}
pub fn symbol_table(&self) -> &SymbolTable {
&self.symbol_table
}
}
#[derive(Debug)]
struct ScopeState {
scope_id: ScopeId,
current_flow_node_id: FlowNodeId,
}
#[derive(Debug)]
struct SemanticIndexer {
symbol_table_builder: SymbolTableBuilder,
flow_graph_builder: FlowGraphBuilder,
scopes: Vec<ScopeState>,
/// the definition whose target(s) we are currently walking
current_definition: Option<Definition>,
expressions: FxHashMap<NodeKey, ExpressionId>,
expressions_by_id: IndexVec<ExpressionId, NodeKey>,
}
impl SemanticIndexer {
pub(crate) fn finish(mut self) -> SemanticIndex {
let SemanticIndexer {
flow_graph_builder,
symbol_table_builder,
..
} = self;
self.expressions.shrink_to_fit();
self.expressions_by_id.shrink_to_fit();
SemanticIndex {
flow_graph: flow_graph_builder.finish(),
symbol_table: symbol_table_builder.finish(),
expressions: self.expressions,
expressions_by_id: self.expressions_by_id,
}
}
fn set_current_flow_node(&mut self, new_flow_node_id: FlowNodeId) {
let scope_state = self.scopes.last_mut().expect("scope stack is never empty");
scope_state.current_flow_node_id = new_flow_node_id;
}
fn current_flow_node(&self) -> FlowNodeId {
self.scopes
.last()
.expect("scope stack is never empty")
.current_flow_node_id
}
fn add_or_update_symbol(&mut self, identifier: &str, flags: SymbolFlags) -> SymbolId {
self.symbol_table_builder
.add_or_update_symbol(self.cur_scope(), identifier, flags)
}
fn add_or_update_symbol_with_def(
&mut self,
identifier: &str,
definition: Definition,
) -> SymbolId {
let symbol_id = self.add_or_update_symbol(identifier, SymbolFlags::IS_DEFINED);
self.symbol_table_builder
.add_definition(symbol_id, definition.clone());
let new_flow_node_id =
self.flow_graph_builder
.add_definition(symbol_id, definition, self.current_flow_node());
self.set_current_flow_node(new_flow_node_id);
symbol_id
}
fn push_scope(
&mut self,
name: &str,
kind: ScopeKind,
definition: Option<Definition>,
defining_symbol: Option<SymbolId>,
) -> ScopeId {
let scope_id = self.symbol_table_builder.add_child_scope(
self.cur_scope(),
name,
kind,
definition,
defining_symbol,
);
self.scopes.push(ScopeState {
scope_id,
current_flow_node_id: FlowGraph::start(),
});
scope_id
}
fn pop_scope(&mut self) -> ScopeId {
self.scopes
.pop()
.expect("Scope stack should never be empty")
.scope_id
}
fn cur_scope(&self) -> ScopeId {
self.scopes
.last()
.expect("Scope stack should never be empty")
.scope_id
}
fn record_scope_for_node(&mut self, node_key: NodeKey, scope_id: ScopeId) {
self.symbol_table_builder
.record_scope_for_node(node_key, scope_id);
}
fn insert_constraint(&mut self, expr: &ast::Expr) {
let node_key = NodeKey::from_node(expr.into());
let expression_id = self.expressions[&node_key];
let constraint = self
.flow_graph_builder
.add_constraint(self.current_flow_node(), expression_id);
self.set_current_flow_node(constraint);
}
fn with_type_params(
&mut self,
name: &str,
params: &Option<Box<ast::TypeParams>>,
definition: Option<Definition>,
defining_symbol: Option<SymbolId>,
nested: impl FnOnce(&mut Self) -> ScopeId,
) -> ScopeId {
if let Some(type_params) = params {
self.push_scope(name, ScopeKind::Annotation, definition, defining_symbol);
for type_param in &type_params.type_params {
let name = match type_param {
ast::TypeParam::TypeVar(ast::TypeParamTypeVar { name, .. }) => name,
ast::TypeParam::ParamSpec(ast::TypeParamParamSpec { name, .. }) => name,
ast::TypeParam::TypeVarTuple(ast::TypeParamTypeVarTuple { name, .. }) => name,
};
self.add_or_update_symbol(name, SymbolFlags::IS_DEFINED);
}
}
let scope_id = nested(self);
if params.is_some() {
self.pop_scope();
}
scope_id
}
}
impl SourceOrderVisitor<'_> for SemanticIndexer {
fn visit_expr(&mut self, expr: &ast::Expr) {
let node_key = NodeKey::from_node(expr.into());
let expression_id = self.expressions_by_id.push(node_key);
let flow_expression_id = self
.flow_graph_builder
.record_expr(self.current_flow_node());
debug_assert_eq!(expression_id, flow_expression_id);
let symbol_expression_id = self
.symbol_table_builder
.record_expression(self.cur_scope());
debug_assert_eq!(expression_id, symbol_expression_id);
self.expressions.insert(node_key, expression_id);
match expr {
ast::Expr::Name(ast::ExprName { id, ctx, .. }) => {
let flags = match ctx {
ast::ExprContext::Load => SymbolFlags::IS_USED,
ast::ExprContext::Store => SymbolFlags::IS_DEFINED,
ast::ExprContext::Del => SymbolFlags::IS_DEFINED,
ast::ExprContext::Invalid => SymbolFlags::empty(),
};
self.add_or_update_symbol(id, flags);
if flags.contains(SymbolFlags::IS_DEFINED) {
if let Some(curdef) = self.current_definition.clone() {
self.add_or_update_symbol_with_def(id, curdef);
}
}
ast::visitor::source_order::walk_expr(self, expr);
}
ast::Expr::Named(node) => {
debug_assert!(self.current_definition.is_none());
self.current_definition =
Some(Definition::NamedExpr(TypedNodeKey::from_node(node)));
// TODO walrus in comprehensions is implicitly nonlocal
self.visit_expr(&node.target);
self.current_definition = None;
self.visit_expr(&node.value);
}
ast::Expr::If(ast::ExprIf {
body, test, orelse, ..
}) => {
// TODO detect statically known truthy or falsy test (via type inference, not naive
// AST inspection, so we can't simplify here, need to record test expression in CFG
// for later checking)
self.visit_expr(test);
let if_branch = self.flow_graph_builder.add_branch(self.current_flow_node());
self.set_current_flow_node(if_branch);
self.insert_constraint(test);
self.visit_expr(body);
let post_body = self.current_flow_node();
self.set_current_flow_node(if_branch);
self.visit_expr(orelse);
let post_else = self
.flow_graph_builder
.add_phi(self.current_flow_node(), post_body);
self.set_current_flow_node(post_else);
}
_ => {
ast::visitor::source_order::walk_expr(self, expr);
}
}
}
fn visit_stmt(&mut self, stmt: &ast::Stmt) {
// TODO need to capture more definition statements here
match stmt {
ast::Stmt::ClassDef(node) => {
let node_key = TypedNodeKey::from_node(node);
let def = Definition::ClassDef(node_key.clone());
let symbol_id = self.add_or_update_symbol_with_def(&node.name, def.clone());
for decorator in &node.decorator_list {
self.visit_decorator(decorator);
}
let scope_id = self.with_type_params(
&node.name,
&node.type_params,
Some(def.clone()),
Some(symbol_id),
|indexer| {
if let Some(arguments) = &node.arguments {
indexer.visit_arguments(arguments);
}
let scope_id = indexer.push_scope(
&node.name,
ScopeKind::Class,
Some(def.clone()),
Some(symbol_id),
);
indexer.visit_body(&node.body);
indexer.pop_scope();
scope_id
},
);
self.record_scope_for_node(*node_key.erased(), scope_id);
}
ast::Stmt::FunctionDef(node) => {
let node_key = TypedNodeKey::from_node(node);
let def = Definition::FunctionDef(node_key.clone());
let symbol_id = self.add_or_update_symbol_with_def(&node.name, def.clone());
for decorator in &node.decorator_list {
self.visit_decorator(decorator);
}
let scope_id = self.with_type_params(
&node.name,
&node.type_params,
Some(def.clone()),
Some(symbol_id),
|indexer| {
indexer.visit_parameters(&node.parameters);
for expr in &node.returns {
indexer.visit_annotation(expr);
}
let scope_id = indexer.push_scope(
&node.name,
ScopeKind::Function,
Some(def.clone()),
Some(symbol_id),
);
indexer.visit_body(&node.body);
indexer.pop_scope();
scope_id
},
);
self.record_scope_for_node(*node_key.erased(), scope_id);
}
ast::Stmt::Import(ast::StmtImport { names, .. }) => {
for alias in names {
let symbol_name = if let Some(asname) = &alias.asname {
asname.id.as_str()
} else {
alias.name.id.split('.').next().unwrap()
};
let module = ModuleName::new(&alias.name.id);
let def = Definition::Import(ImportDefinition {
module: module.clone(),
});
self.add_or_update_symbol_with_def(symbol_name, def);
self.symbol_table_builder
.add_dependency(Dependency::Module(module));
}
}
ast::Stmt::ImportFrom(ast::StmtImportFrom {
module,
names,
level,
..
}) => {
let module = module.as_ref().map(|m| ModuleName::new(&m.id));
for alias in names {
let symbol_name = if let Some(asname) = &alias.asname {
asname.id.as_str()
} else {
alias.name.id.as_str()
};
let def = Definition::ImportFrom(ImportFromDefinition {
module: module.clone(),
name: Name::new(&alias.name.id),
level: *level,
});
self.add_or_update_symbol_with_def(symbol_name, def);
}
let dependency = if let Some(module) = module {
match NonZeroU32::new(*level) {
Some(level) => Dependency::Relative {
level,
module: Some(module),
},
None => Dependency::Module(module),
}
} else {
Dependency::Relative {
level: NonZeroU32::new(*level)
.expect("Import without a module to have a level > 0"),
module,
}
};
self.symbol_table_builder.add_dependency(dependency);
}
ast::Stmt::Assign(node) => {
debug_assert!(self.current_definition.is_none());
self.visit_expr(&node.value);
self.current_definition =
Some(Definition::Assignment(TypedNodeKey::from_node(node)));
for expr in &node.targets {
self.visit_expr(expr);
}
self.current_definition = None;
}
ast::Stmt::If(node) => {
// TODO detect statically known truthy or falsy test (via type inference, not naive
// AST inspection, so we can't simplify here, need to record test expression in CFG
// for later checking)
// we visit the if "test" condition first regardless
self.visit_expr(&node.test);
// create branch node: does the if test pass or not?
let if_branch = self.flow_graph_builder.add_branch(self.current_flow_node());
// visit the body of the `if` clause
self.set_current_flow_node(if_branch);
self.insert_constraint(&node.test);
self.visit_body(&node.body);
// Flow node for the last if/elif condition branch; represents the "no branch
// taken yet" possibility (where "taking a branch" means that the condition in an
// if or elif evaluated to true and control flow went into that clause).
let mut prior_branch = if_branch;
// Flow node for the state after the prior if/elif/else clause; represents "we have
// taken one of the branches up to this point." Initially set to the post-if-clause
// state, later will be set to the phi node joining that possible path with the
// possibility that we took a later if/elif/else clause instead.
let mut post_prior_clause = self.current_flow_node();
// Flag to mark if the final clause is an "else" -- if so, that means the "match no
// clauses" path is not possible, we have to go through one of the clauses.
let mut last_branch_is_else = false;
for clause in &node.elif_else_clauses {
if let Some(test) = &clause.test {
self.visit_expr(test);
// This is an elif clause. Create a new branch node. Its predecessor is the
// previous branch node, because we can only take one branch in an entire
// if/elif/else chain, so if we take this branch, it can only be because we
// didn't take the previous one.
prior_branch = self.flow_graph_builder.add_branch(prior_branch);
self.set_current_flow_node(prior_branch);
self.insert_constraint(test);
} else {
// This is an else clause. No need to create a branch node; there's no
// branch here, if we haven't taken any previous branch, we definitely go
// into the "else" clause.
self.set_current_flow_node(prior_branch);
last_branch_is_else = true;
}
self.visit_elif_else_clause(clause);
// Update `post_prior_clause` to a new phi node joining the possibility that we
// took any of the previous branches with the possibility that we took the one
// just visited.
post_prior_clause = self
.flow_graph_builder
.add_phi(self.current_flow_node(), post_prior_clause);
}
if !last_branch_is_else {
// Final branch was not an "else", which means it's possible we took zero
// branches in the entire if/elif chain, so we need one more phi node to join
// the "no branches taken" possibility.
post_prior_clause = self
.flow_graph_builder
.add_phi(post_prior_clause, prior_branch);
}
// Onward, with current flow node set to our final Phi node.
self.set_current_flow_node(post_prior_clause);
}
_ => {
ast::visitor::source_order::walk_stmt(self, stmt);
}
}
}
}
#[derive(Debug, Default)]
pub struct SemanticIndexStorage(KeyValueCache<FileId, Arc<SemanticIndex>>);
impl Deref for SemanticIndexStorage {
type Target = KeyValueCache<FileId, Arc<SemanticIndex>>;
fn deref(&self) -> &Self::Target {
&self.0
}
}
impl DerefMut for SemanticIndexStorage {
fn deref_mut(&mut self) -> &mut Self::Target {
&mut self.0
}
}
#[cfg(test)]
mod tests {
use crate::semantic::symbol_table::{Symbol, SymbolIterator};
use ruff_python_ast as ast;
use ruff_python_ast::ModModule;
use ruff_python_parser::{Mode, Parsed};
use super::{Definition, ScopeKind, SemanticIndex, SymbolId};
fn parse(code: &str) -> Parsed<ModModule> {
ruff_python_parser::parse_unchecked(code, Mode::Module)
.try_into_module()
.unwrap()
}
fn names<I>(it: SymbolIterator<I>) -> Vec<&str>
where
I: Iterator<Item = SymbolId>,
{
let mut symbols: Vec<_> = it.map(Symbol::name).collect();
symbols.sort_unstable();
symbols
}
#[test]
fn empty() {
let parsed = parse("");
let table = SemanticIndex::from_ast(parsed.syntax()).symbol_table;
assert_eq!(names(table.root_symbols()).len(), 0);
}
#[test]
fn simple() {
let parsed = parse("x");
let table = SemanticIndex::from_ast(parsed.syntax()).symbol_table;
assert_eq!(names(table.root_symbols()), vec!["x"]);
assert_eq!(
table
.definitions(table.root_symbol_id_by_name("x").unwrap())
.len(),
0
);
}
#[test]
fn annotation_only() {
let parsed = parse("x: int");
let table = SemanticIndex::from_ast(parsed.syntax()).symbol_table;
assert_eq!(names(table.root_symbols()), vec!["int", "x"]);
// TODO record definition
}
#[test]
fn import() {
let parsed = parse("import foo");
let table = SemanticIndex::from_ast(parsed.syntax()).symbol_table;
assert_eq!(names(table.root_symbols()), vec!["foo"]);
assert_eq!(
table
.definitions(table.root_symbol_id_by_name("foo").unwrap())
.len(),
1
);
}
#[test]
fn import_sub() {
let parsed = parse("import foo.bar");
let table = SemanticIndex::from_ast(parsed.syntax()).symbol_table;
assert_eq!(names(table.root_symbols()), vec!["foo"]);
}
#[test]
fn import_as() {
let parsed = parse("import foo.bar as baz");
let table = SemanticIndex::from_ast(parsed.syntax()).symbol_table;
assert_eq!(names(table.root_symbols()), vec!["baz"]);
}
#[test]
fn import_from() {
let parsed = parse("from bar import foo");
let table = SemanticIndex::from_ast(parsed.syntax()).symbol_table;
assert_eq!(names(table.root_symbols()), vec!["foo"]);
assert_eq!(
table
.definitions(table.root_symbol_id_by_name("foo").unwrap())
.len(),
1
);
assert!(
table.root_symbol_id_by_name("foo").is_some_and(|sid| {
let s = sid.symbol(&table);
s.is_defined() || !s.is_used()
}),
"symbols that are defined get the defined flag"
);
}
#[test]
fn assign() {
let parsed = parse("x = foo");
let table = SemanticIndex::from_ast(parsed.syntax()).symbol_table;
assert_eq!(names(table.root_symbols()), vec!["foo", "x"]);
assert_eq!(
table
.definitions(table.root_symbol_id_by_name("x").unwrap())
.len(),
1
);
assert!(
table.root_symbol_id_by_name("foo").is_some_and(|sid| {
let s = sid.symbol(&table);
!s.is_defined() && s.is_used()
}),
"a symbol used but not defined in a scope should have only the used flag"
);
}
#[test]
fn class_scope() {
let parsed = parse(
"
class C:
x = 1
y = 2
",
);
let table = SemanticIndex::from_ast(parsed.syntax()).symbol_table;
assert_eq!(names(table.root_symbols()), vec!["C", "y"]);
let scopes = table.root_child_scope_ids();
assert_eq!(scopes.len(), 1);
let c_scope = scopes[0].scope(&table);
assert_eq!(c_scope.kind(), ScopeKind::Class);
assert_eq!(c_scope.name(), "C");
assert_eq!(names(table.symbols_for_scope(scopes[0])), vec!["x"]);
assert_eq!(
table
.definitions(table.root_symbol_id_by_name("C").unwrap())
.len(),
1
);
}
#[test]
fn func_scope() {
let parsed = parse(
"
def func():
x = 1
y = 2
",
);
let table = SemanticIndex::from_ast(parsed.syntax()).symbol_table;
assert_eq!(names(table.root_symbols()), vec!["func", "y"]);
let scopes = table.root_child_scope_ids();
assert_eq!(scopes.len(), 1);
let func_scope = scopes[0].scope(&table);
assert_eq!(func_scope.kind(), ScopeKind::Function);
assert_eq!(func_scope.name(), "func");
assert_eq!(names(table.symbols_for_scope(scopes[0])), vec!["x"]);
assert_eq!(
table
.definitions(table.root_symbol_id_by_name("func").unwrap())
.len(),
1
);
}
#[test]
fn dupes() {
let parsed = parse(
"
def func():
x = 1
def func():
y = 2
",
);
let table = SemanticIndex::from_ast(parsed.syntax()).symbol_table;
assert_eq!(names(table.root_symbols()), vec!["func"]);
let scopes = table.root_child_scope_ids();
assert_eq!(scopes.len(), 2);
let func_scope_1 = scopes[0].scope(&table);
let func_scope_2 = scopes[1].scope(&table);
assert_eq!(func_scope_1.kind(), ScopeKind::Function);
assert_eq!(func_scope_1.name(), "func");
assert_eq!(func_scope_2.kind(), ScopeKind::Function);
assert_eq!(func_scope_2.name(), "func");
assert_eq!(names(table.symbols_for_scope(scopes[0])), vec!["x"]);
assert_eq!(names(table.symbols_for_scope(scopes[1])), vec!["y"]);
assert_eq!(
table
.definitions(table.root_symbol_id_by_name("func").unwrap())
.len(),
2
);
}
#[test]
fn generic_func() {
let parsed = parse(
"
def func[T]():
x = 1
",
);
let table = SemanticIndex::from_ast(parsed.syntax()).symbol_table;
assert_eq!(names(table.root_symbols()), vec!["func"]);
let scopes = table.root_child_scope_ids();
assert_eq!(scopes.len(), 1);
let ann_scope_id = scopes[0];
let ann_scope = ann_scope_id.scope(&table);
assert_eq!(ann_scope.kind(), ScopeKind::Annotation);
assert_eq!(ann_scope.name(), "func");
assert_eq!(names(table.symbols_for_scope(ann_scope_id)), vec!["T"]);
let scopes = table.child_scope_ids_of(ann_scope_id);
assert_eq!(scopes.len(), 1);
let func_scope_id = scopes[0];
let func_scope = func_scope_id.scope(&table);
assert_eq!(func_scope.kind(), ScopeKind::Function);
assert_eq!(func_scope.name(), "func");
assert_eq!(names(table.symbols_for_scope(func_scope_id)), vec!["x"]);
}
#[test]
fn generic_class() {
let parsed = parse(
"
class C[T]:
x = 1
",
);
let table = SemanticIndex::from_ast(parsed.syntax()).symbol_table;
assert_eq!(names(table.root_symbols()), vec!["C"]);
let scopes = table.root_child_scope_ids();
assert_eq!(scopes.len(), 1);
let ann_scope_id = scopes[0];
let ann_scope = ann_scope_id.scope(&table);
assert_eq!(ann_scope.kind(), ScopeKind::Annotation);
assert_eq!(ann_scope.name(), "C");
assert_eq!(names(table.symbols_for_scope(ann_scope_id)), vec!["T"]);
assert!(
table
.symbol_by_name(ann_scope_id, "T")
.is_some_and(|s| s.is_defined() && !s.is_used()),
"type parameters are defined by the scope that introduces them"
);
let scopes = table.child_scope_ids_of(ann_scope_id);
assert_eq!(scopes.len(), 1);
let func_scope_id = scopes[0];
let func_scope = func_scope_id.scope(&table);
assert_eq!(func_scope.kind(), ScopeKind::Class);
assert_eq!(func_scope.name(), "C");
assert_eq!(names(table.symbols_for_scope(func_scope_id)), vec!["x"]);
}
#[test]
fn reachability_trivial() {
let parsed = parse("x = 1; x");
let ast = parsed.syntax();
let index = SemanticIndex::from_ast(ast);
let table = &index.symbol_table;
let x_sym = table
.root_symbol_id_by_name("x")
.expect("x symbol should exist");
let ast::Stmt::Expr(ast::StmtExpr { value: x_use, .. }) = &ast.body[1] else {
panic!("should be an expr")
};
let x_defs: Vec<_> = index
.reachable_definitions(x_sym, x_use)
.map(|constrained_definition| constrained_definition.definition)
.collect();
assert_eq!(x_defs.len(), 1);
let Definition::Assignment(node_key) = &x_defs[0] else {
panic!("def should be an assignment")
};
let Some(def_node) = node_key.resolve(ast.into()) else {
panic!("node key should resolve")
};
let ast::Expr::NumberLiteral(ast::ExprNumberLiteral {
value: ast::Number::Int(num),
..
}) = &*def_node.value
else {
panic!("should be a number literal")
};
assert_eq!(*num, 1);
}
#[test]
fn expression_scope() {
let parsed = parse("x = 1;\ndef test():\n y = 4");
let ast = parsed.syntax();
let index = SemanticIndex::from_ast(ast);
let table = &index.symbol_table;
let x_sym = table
.root_symbol_by_name("x")
.expect("x symbol should exist");
let x_stmt = ast.body[0].as_assign_stmt().unwrap();
let x_id = index.expression_id(&x_stmt.targets[0]);
assert_eq!(table.scope_of_expression(x_id).kind(), ScopeKind::Module);
assert_eq!(table.scope_id_of_expression(x_id), x_sym.scope_id());
let def = ast.body[1].as_function_def_stmt().unwrap();
let y_stmt = def.body[0].as_assign_stmt().unwrap();
let y_id = index.expression_id(&y_stmt.targets[0]);
assert_eq!(table.scope_of_expression(y_id).kind(), ScopeKind::Function);
}
}

View File

@@ -1,52 +0,0 @@
use crate::ast_ids::TypedNodeKey;
use crate::semantic::ModuleName;
use crate::Name;
use ruff_python_ast as ast;
// TODO storing TypedNodeKey for definitions means we have to search to find them again in the AST;
// this is at best O(log n). If looking up definitions is a bottleneck we should look for
// alternatives here.
// TODO intern Definitions in SymbolTable and reference using IDs?
#[derive(Clone, Debug)]
pub enum Definition {
// For the import cases, we don't need reference to any arbitrary AST subtrees (annotations,
// RHS), and referencing just the import statement node is imprecise (a single import statement
// can assign many symbols, we'd have to re-search for the one we care about), so we just copy
// the small amount of information we need from the AST.
Import(ImportDefinition),
ImportFrom(ImportFromDefinition),
ClassDef(TypedNodeKey<ast::StmtClassDef>),
FunctionDef(TypedNodeKey<ast::StmtFunctionDef>),
Assignment(TypedNodeKey<ast::StmtAssign>),
AnnotatedAssignment(TypedNodeKey<ast::StmtAnnAssign>),
NamedExpr(TypedNodeKey<ast::ExprNamed>),
/// represents the implicit initial definition of every name as "unbound"
Unbound,
// TODO with statements, except handlers, function args...
}
#[derive(Clone, Debug)]
pub struct ImportDefinition {
pub module: ModuleName,
}
#[derive(Clone, Debug)]
pub struct ImportFromDefinition {
pub module: Option<ModuleName>,
pub name: Name,
pub level: u32,
}
impl ImportFromDefinition {
pub fn module(&self) -> Option<&ModuleName> {
self.module.as_ref()
}
pub fn name(&self) -> &Name {
&self.name
}
pub fn level(&self) -> u32 {
self.level
}
}

View File

@@ -1,270 +0,0 @@
use super::symbol_table::SymbolId;
use crate::semantic::{Definition, ExpressionId};
use ruff_index::{newtype_index, IndexVec};
use std::iter::FusedIterator;
use std::ops::Range;
#[newtype_index]
pub struct FlowNodeId;
#[derive(Debug)]
pub(crate) enum FlowNode {
Start,
Definition(DefinitionFlowNode),
Branch(BranchFlowNode),
Phi(PhiFlowNode),
Constraint(ConstraintFlowNode),
}
/// A point in control flow where a symbol is defined
#[derive(Debug)]
pub(crate) struct DefinitionFlowNode {
symbol_id: SymbolId,
definition: Definition,
predecessor: FlowNodeId,
}
/// A branch in control flow
#[derive(Debug)]
pub(crate) struct BranchFlowNode {
predecessor: FlowNodeId,
}
/// A join point where control flow paths come together
#[derive(Debug)]
pub(crate) struct PhiFlowNode {
first_predecessor: FlowNodeId,
second_predecessor: FlowNodeId,
}
/// A branch test which may apply constraints to a symbol's type
#[derive(Debug)]
pub(crate) struct ConstraintFlowNode {
predecessor: FlowNodeId,
test_expression: ExpressionId,
}
#[derive(Debug)]
pub struct FlowGraph {
flow_nodes_by_id: IndexVec<FlowNodeId, FlowNode>,
expression_map: IndexVec<ExpressionId, FlowNodeId>,
}
impl FlowGraph {
pub fn start() -> FlowNodeId {
FlowNodeId::from_usize(0)
}
pub fn for_expr(&self, expr: ExpressionId) -> FlowNodeId {
self.expression_map[expr]
}
}
#[derive(Debug)]
pub(crate) struct FlowGraphBuilder {
flow_graph: FlowGraph,
}
impl FlowGraphBuilder {
pub(crate) fn new() -> Self {
let mut graph = FlowGraph {
flow_nodes_by_id: IndexVec::default(),
expression_map: IndexVec::default(),
};
graph.flow_nodes_by_id.push(FlowNode::Start);
Self { flow_graph: graph }
}
pub(crate) fn add(&mut self, node: FlowNode) -> FlowNodeId {
self.flow_graph.flow_nodes_by_id.push(node)
}
pub(crate) fn add_definition(
&mut self,
symbol_id: SymbolId,
definition: Definition,
predecessor: FlowNodeId,
) -> FlowNodeId {
self.add(FlowNode::Definition(DefinitionFlowNode {
symbol_id,
definition,
predecessor,
}))
}
pub(crate) fn add_branch(&mut self, predecessor: FlowNodeId) -> FlowNodeId {
self.add(FlowNode::Branch(BranchFlowNode { predecessor }))
}
pub(crate) fn add_phi(
&mut self,
first_predecessor: FlowNodeId,
second_predecessor: FlowNodeId,
) -> FlowNodeId {
self.add(FlowNode::Phi(PhiFlowNode {
first_predecessor,
second_predecessor,
}))
}
pub(crate) fn add_constraint(
&mut self,
predecessor: FlowNodeId,
test_expression: ExpressionId,
) -> FlowNodeId {
self.add(FlowNode::Constraint(ConstraintFlowNode {
predecessor,
test_expression,
}))
}
pub(super) fn record_expr(&mut self, node_id: FlowNodeId) -> ExpressionId {
self.flow_graph.expression_map.push(node_id)
}
pub(super) fn finish(mut self) -> FlowGraph {
self.flow_graph.flow_nodes_by_id.shrink_to_fit();
self.flow_graph.expression_map.shrink_to_fit();
self.flow_graph
}
}
/// A definition, and the set of constraints between a use and the definition
#[derive(Debug, Clone)]
pub struct ConstrainedDefinition {
pub definition: Definition,
pub constraints: Vec<ExpressionId>,
}
/// A flow node and the constraints we passed through to reach it
#[derive(Debug)]
struct FlowState {
node_id: FlowNodeId,
constraints_range: Range<usize>,
}
#[derive(Debug)]
pub struct ReachableDefinitionsIterator<'a> {
flow_graph: &'a FlowGraph,
symbol_id: SymbolId,
pending: Vec<FlowState>,
constraints: Vec<ExpressionId>,
}
impl<'a> ReachableDefinitionsIterator<'a> {
pub fn new(flow_graph: &'a FlowGraph, symbol_id: SymbolId, start_node_id: FlowNodeId) -> Self {
Self {
flow_graph,
symbol_id,
pending: vec![FlowState {
node_id: start_node_id,
constraints_range: 0..0,
}],
constraints: vec![],
}
}
}
impl<'a> Iterator for ReachableDefinitionsIterator<'a> {
type Item = ConstrainedDefinition;
fn next(&mut self) -> Option<Self::Item> {
let FlowState {
mut node_id,
mut constraints_range,
} = self.pending.pop()?;
self.constraints.truncate(constraints_range.end + 1);
loop {
match &self.flow_graph.flow_nodes_by_id[node_id] {
FlowNode::Start => {
// constraints on unbound are irrelevant
return Some(ConstrainedDefinition {
definition: Definition::Unbound,
constraints: vec![],
});
}
FlowNode::Definition(def_node) => {
if def_node.symbol_id == self.symbol_id {
return Some(ConstrainedDefinition {
definition: def_node.definition.clone(),
constraints: self.constraints[constraints_range].to_vec(),
});
}
node_id = def_node.predecessor;
}
FlowNode::Branch(branch_node) => {
node_id = branch_node.predecessor;
}
FlowNode::Phi(phi_node) => {
self.pending.push(FlowState {
node_id: phi_node.first_predecessor,
constraints_range: constraints_range.clone(),
});
node_id = phi_node.second_predecessor;
}
FlowNode::Constraint(constraint_node) => {
node_id = constraint_node.predecessor;
self.constraints.push(constraint_node.test_expression);
constraints_range.end += 1;
}
}
}
}
}
impl<'a> FusedIterator for ReachableDefinitionsIterator<'a> {}
impl std::fmt::Display for FlowGraph {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
writeln!(f, "flowchart TD")?;
for (id, node) in self.flow_nodes_by_id.iter_enumerated() {
write!(f, " id{}", id.as_u32())?;
match node {
FlowNode::Start => writeln!(f, r"[\Start/]")?,
FlowNode::Definition(def_node) => {
writeln!(f, r"(Define symbol {})", def_node.symbol_id.as_u32())?;
writeln!(
f,
r" id{}-->id{}",
def_node.predecessor.as_u32(),
id.as_u32()
)?;
}
FlowNode::Branch(branch_node) => {
writeln!(f, r"{{Branch}}")?;
writeln!(
f,
r" id{}-->id{}",
branch_node.predecessor.as_u32(),
id.as_u32()
)?;
}
FlowNode::Phi(phi_node) => {
writeln!(f, r"((Phi))")?;
writeln!(
f,
r" id{}-->id{}",
phi_node.second_predecessor.as_u32(),
id.as_u32()
)?;
writeln!(
f,
r" id{}-->id{}",
phi_node.first_predecessor.as_u32(),
id.as_u32()
)?;
}
FlowNode::Constraint(constraint_node) => {
writeln!(f, r"((Constraint))")?;
writeln!(
f,
r" id{}-->id{}",
constraint_node.predecessor.as_u32(),
id.as_u32()
)?;
}
}
}
Ok(())
}
}

View File

@@ -1,560 +0,0 @@
#![allow(dead_code)]
use std::hash::{Hash, Hasher};
use std::iter::{Copied, DoubleEndedIterator, FusedIterator};
use std::num::NonZeroU32;
use bitflags::bitflags;
use hashbrown::hash_map::{Keys, RawEntryMut};
use rustc_hash::{FxHashMap, FxHasher};
use ruff_index::{newtype_index, IndexVec};
use crate::ast_ids::NodeKey;
use crate::module::ModuleName;
use crate::semantic::{Definition, ExpressionId};
use crate::Name;
type Map<K, V> = hashbrown::HashMap<K, V, ()>;
#[newtype_index]
pub struct ScopeId;
impl ScopeId {
pub fn scope(self, table: &SymbolTable) -> &Scope {
&table.scopes_by_id[self]
}
}
#[newtype_index]
pub struct SymbolId;
impl SymbolId {
pub fn symbol(self, table: &SymbolTable) -> &Symbol {
&table.symbols_by_id[self]
}
}
#[derive(Copy, Clone, Debug, PartialEq)]
pub enum ScopeKind {
Module,
Annotation,
Class,
Function,
}
#[derive(Debug)]
pub struct Scope {
name: Name,
kind: ScopeKind,
parent: Option<ScopeId>,
children: Vec<ScopeId>,
/// the definition (e.g. class or function) that created this scope
definition: Option<Definition>,
/// the symbol (e.g. class or function) that owns this scope
defining_symbol: Option<SymbolId>,
/// symbol IDs, hashed by symbol name
symbols_by_name: Map<SymbolId, ()>,
}
impl Scope {
pub fn name(&self) -> &str {
self.name.as_str()
}
pub fn kind(&self) -> ScopeKind {
self.kind
}
pub fn definition(&self) -> Option<Definition> {
self.definition.clone()
}
pub fn defining_symbol(&self) -> Option<SymbolId> {
self.defining_symbol
}
}
#[derive(Debug)]
pub(crate) enum Kind {
FreeVar,
CellVar,
CellVarAssigned,
ExplicitGlobal,
ImplicitGlobal,
}
bitflags! {
#[derive(Copy,Clone,Debug)]
pub struct SymbolFlags: u8 {
const IS_USED = 1 << 0;
const IS_DEFINED = 1 << 1;
/// TODO: This flag is not yet set by anything
const MARKED_GLOBAL = 1 << 2;
/// TODO: This flag is not yet set by anything
const MARKED_NONLOCAL = 1 << 3;
}
}
#[derive(Debug)]
pub struct Symbol {
name: Name,
flags: SymbolFlags,
scope_id: ScopeId,
// kind: Kind,
}
impl Symbol {
pub fn name(&self) -> &str {
self.name.as_str()
}
pub fn scope_id(&self) -> ScopeId {
self.scope_id
}
/// Is the symbol used in its containing scope?
pub fn is_used(&self) -> bool {
self.flags.contains(SymbolFlags::IS_USED)
}
/// Is the symbol defined in its containing scope?
pub fn is_defined(&self) -> bool {
self.flags.contains(SymbolFlags::IS_DEFINED)
}
// TODO: implement Symbol.kind 2-pass analysis to categorize as: free-var, cell-var,
// explicit-global, implicit-global and implement Symbol.kind by modifying the preorder
// traversal code
}
#[derive(Debug, Clone)]
pub enum Dependency {
Module(ModuleName),
Relative {
level: NonZeroU32,
module: Option<ModuleName>,
},
}
/// Table of all symbols in all scopes for a module.
#[derive(Debug)]
pub struct SymbolTable {
scopes_by_id: IndexVec<ScopeId, Scope>,
symbols_by_id: IndexVec<SymbolId, Symbol>,
/// the definitions for each symbol
defs: FxHashMap<SymbolId, Vec<Definition>>,
/// map of AST node (e.g. class/function def) to sub-scope it creates
scopes_by_node: FxHashMap<NodeKey, ScopeId>,
/// Maps expressions to their enclosing scope.
expression_scopes: IndexVec<ExpressionId, ScopeId>,
/// dependencies of this module
dependencies: Vec<Dependency>,
}
impl SymbolTable {
pub fn dependencies(&self) -> &[Dependency] {
&self.dependencies
}
pub const fn root_scope_id() -> ScopeId {
ScopeId::from_usize(0)
}
pub fn root_scope(&self) -> &Scope {
&self.scopes_by_id[SymbolTable::root_scope_id()]
}
pub fn symbol_ids_for_scope(&self, scope_id: ScopeId) -> Copied<Keys<SymbolId, ()>> {
self.scopes_by_id[scope_id].symbols_by_name.keys().copied()
}
pub fn symbols_for_scope(
&self,
scope_id: ScopeId,
) -> SymbolIterator<Copied<Keys<SymbolId, ()>>> {
SymbolIterator {
table: self,
ids: self.symbol_ids_for_scope(scope_id),
}
}
pub fn root_symbol_ids(&self) -> Copied<Keys<SymbolId, ()>> {
self.symbol_ids_for_scope(SymbolTable::root_scope_id())
}
pub fn root_symbols(&self) -> SymbolIterator<Copied<Keys<SymbolId, ()>>> {
self.symbols_for_scope(SymbolTable::root_scope_id())
}
pub fn child_scope_ids_of(&self, scope_id: ScopeId) -> &[ScopeId] {
&self.scopes_by_id[scope_id].children
}
pub fn child_scopes_of(&self, scope_id: ScopeId) -> ScopeIterator<&[ScopeId]> {
ScopeIterator {
table: self,
ids: self.child_scope_ids_of(scope_id),
}
}
pub fn root_child_scope_ids(&self) -> &[ScopeId] {
self.child_scope_ids_of(SymbolTable::root_scope_id())
}
pub fn root_child_scopes(&self) -> ScopeIterator<&[ScopeId]> {
self.child_scopes_of(SymbolTable::root_scope_id())
}
pub fn symbol_id_by_name(&self, scope_id: ScopeId, name: &str) -> Option<SymbolId> {
let scope = &self.scopes_by_id[scope_id];
let hash = SymbolTable::hash_name(name);
let name = Name::new(name);
Some(
*scope
.symbols_by_name
.raw_entry()
.from_hash(hash, |symid| self.symbols_by_id[*symid].name == name)?
.0,
)
}
pub fn symbol_by_name(&self, scope_id: ScopeId, name: &str) -> Option<&Symbol> {
Some(&self.symbols_by_id[self.symbol_id_by_name(scope_id, name)?])
}
pub fn root_symbol_id_by_name(&self, name: &str) -> Option<SymbolId> {
self.symbol_id_by_name(SymbolTable::root_scope_id(), name)
}
pub fn root_symbol_by_name(&self, name: &str) -> Option<&Symbol> {
self.symbol_by_name(SymbolTable::root_scope_id(), name)
}
pub fn scope_id_of_symbol(&self, symbol_id: SymbolId) -> ScopeId {
self.symbols_by_id[symbol_id].scope_id
}
pub fn scope_of_symbol(&self, symbol_id: SymbolId) -> &Scope {
&self.scopes_by_id[self.scope_id_of_symbol(symbol_id)]
}
pub fn scope_id_of_expression(&self, expression: ExpressionId) -> ScopeId {
self.expression_scopes[expression]
}
pub fn scope_of_expression(&self, expr_id: ExpressionId) -> &Scope {
&self.scopes_by_id[self.scope_id_of_expression(expr_id)]
}
pub fn parent_scopes(
&self,
scope_id: ScopeId,
) -> ScopeIterator<impl Iterator<Item = ScopeId> + '_> {
ScopeIterator {
table: self,
ids: std::iter::successors(Some(scope_id), |scope| self.scopes_by_id[*scope].parent),
}
}
pub fn parent_scope(&self, scope_id: ScopeId) -> Option<ScopeId> {
self.scopes_by_id[scope_id].parent
}
pub fn scope_id_for_node(&self, node_key: &NodeKey) -> ScopeId {
self.scopes_by_node[node_key]
}
pub fn definitions(&self, symbol_id: SymbolId) -> &[Definition] {
self.defs
.get(&symbol_id)
.map(std::vec::Vec::as_slice)
.unwrap_or_default()
}
pub fn all_definitions(&self) -> impl Iterator<Item = (SymbolId, &Definition)> + '_ {
self.defs
.iter()
.flat_map(|(sym_id, defs)| defs.iter().map(move |def| (*sym_id, def)))
}
fn hash_name(name: &str) -> u64 {
let mut hasher = FxHasher::default();
name.hash(&mut hasher);
hasher.finish()
}
}
pub struct SymbolIterator<'a, I> {
table: &'a SymbolTable,
ids: I,
}
impl<'a, I> Iterator for SymbolIterator<'a, I>
where
I: Iterator<Item = SymbolId>,
{
type Item = &'a Symbol;
fn next(&mut self) -> Option<Self::Item> {
let id = self.ids.next()?;
Some(&self.table.symbols_by_id[id])
}
fn size_hint(&self) -> (usize, Option<usize>) {
self.ids.size_hint()
}
}
impl<'a, I> FusedIterator for SymbolIterator<'a, I> where
I: Iterator<Item = SymbolId> + FusedIterator
{
}
impl<'a, I> DoubleEndedIterator for SymbolIterator<'a, I>
where
I: Iterator<Item = SymbolId> + DoubleEndedIterator,
{
fn next_back(&mut self) -> Option<Self::Item> {
let id = self.ids.next_back()?;
Some(&self.table.symbols_by_id[id])
}
}
// TODO maybe get rid of this and just do all data access via methods on ScopeId?
pub struct ScopeIterator<'a, I> {
table: &'a SymbolTable,
ids: I,
}
/// iterate (`ScopeId`, `Scope`) pairs for given `ScopeId` iterator
impl<'a, I> Iterator for ScopeIterator<'a, I>
where
I: Iterator<Item = ScopeId>,
{
type Item = (ScopeId, &'a Scope);
fn next(&mut self) -> Option<Self::Item> {
let id = self.ids.next()?;
Some((id, &self.table.scopes_by_id[id]))
}
fn size_hint(&self) -> (usize, Option<usize>) {
self.ids.size_hint()
}
}
impl<'a, I> FusedIterator for ScopeIterator<'a, I> where I: Iterator<Item = ScopeId> + FusedIterator {}
impl<'a, I> DoubleEndedIterator for ScopeIterator<'a, I>
where
I: Iterator<Item = ScopeId> + DoubleEndedIterator,
{
fn next_back(&mut self) -> Option<Self::Item> {
let id = self.ids.next_back()?;
Some((id, &self.table.scopes_by_id[id]))
}
}
#[derive(Debug)]
pub(super) struct SymbolTableBuilder {
symbol_table: SymbolTable,
}
impl SymbolTableBuilder {
pub(super) fn new() -> Self {
let mut table = SymbolTable {
scopes_by_id: IndexVec::new(),
symbols_by_id: IndexVec::new(),
defs: FxHashMap::default(),
scopes_by_node: FxHashMap::default(),
expression_scopes: IndexVec::new(),
dependencies: Vec::new(),
};
table.scopes_by_id.push(Scope {
name: Name::new("<module>"),
kind: ScopeKind::Module,
parent: None,
children: Vec::new(),
definition: None,
defining_symbol: None,
symbols_by_name: Map::default(),
});
Self {
symbol_table: table,
}
}
pub(super) fn finish(self) -> SymbolTable {
let mut symbol_table = self.symbol_table;
symbol_table.scopes_by_id.shrink_to_fit();
symbol_table.symbols_by_id.shrink_to_fit();
symbol_table.defs.shrink_to_fit();
symbol_table.scopes_by_node.shrink_to_fit();
symbol_table.expression_scopes.shrink_to_fit();
symbol_table.dependencies.shrink_to_fit();
symbol_table
}
pub(super) fn add_or_update_symbol(
&mut self,
scope_id: ScopeId,
name: &str,
flags: SymbolFlags,
) -> SymbolId {
let hash = SymbolTable::hash_name(name);
let scope = &mut self.symbol_table.scopes_by_id[scope_id];
let name = Name::new(name);
let entry = scope
.symbols_by_name
.raw_entry_mut()
.from_hash(hash, |existing| {
self.symbol_table.symbols_by_id[*existing].name == name
});
match entry {
RawEntryMut::Occupied(entry) => {
if let Some(symbol) = self.symbol_table.symbols_by_id.get_mut(*entry.key()) {
symbol.flags.insert(flags);
};
*entry.key()
}
RawEntryMut::Vacant(entry) => {
let id = self.symbol_table.symbols_by_id.push(Symbol {
name,
flags,
scope_id,
});
entry.insert_with_hasher(hash, id, (), |symid| {
SymbolTable::hash_name(&self.symbol_table.symbols_by_id[*symid].name)
});
id
}
}
}
pub(super) fn add_definition(&mut self, symbol_id: SymbolId, definition: Definition) {
self.symbol_table
.defs
.entry(symbol_id)
.or_default()
.push(definition);
}
pub(super) fn add_child_scope(
&mut self,
parent_scope_id: ScopeId,
name: &str,
kind: ScopeKind,
definition: Option<Definition>,
defining_symbol: Option<SymbolId>,
) -> ScopeId {
let new_scope_id = self.symbol_table.scopes_by_id.push(Scope {
name: Name::new(name),
kind,
parent: Some(parent_scope_id),
children: Vec::new(),
definition,
defining_symbol,
symbols_by_name: Map::default(),
});
let parent_scope = &mut self.symbol_table.scopes_by_id[parent_scope_id];
parent_scope.children.push(new_scope_id);
new_scope_id
}
pub(super) fn record_scope_for_node(&mut self, node_key: NodeKey, scope_id: ScopeId) {
self.symbol_table.scopes_by_node.insert(node_key, scope_id);
}
pub(super) fn add_dependency(&mut self, dependency: Dependency) {
self.symbol_table.dependencies.push(dependency);
}
/// Records the scope for the current expression
pub(super) fn record_expression(&mut self, scope: ScopeId) -> ExpressionId {
self.symbol_table.expression_scopes.push(scope)
}
}
#[cfg(test)]
mod tests {
use super::{ScopeKind, SymbolFlags, SymbolTable, SymbolTableBuilder};
#[test]
fn insert_same_name_symbol_twice() {
let mut builder = SymbolTableBuilder::new();
let root_scope_id = SymbolTable::root_scope_id();
let symbol_id_1 =
builder.add_or_update_symbol(root_scope_id, "foo", SymbolFlags::IS_DEFINED);
let symbol_id_2 = builder.add_or_update_symbol(root_scope_id, "foo", SymbolFlags::IS_USED);
let table = builder.finish();
assert_eq!(symbol_id_1, symbol_id_2);
assert!(symbol_id_1.symbol(&table).is_used(), "flags must merge");
assert!(symbol_id_1.symbol(&table).is_defined(), "flags must merge");
}
#[test]
fn insert_different_named_symbols() {
let mut builder = SymbolTableBuilder::new();
let root_scope_id = SymbolTable::root_scope_id();
let symbol_id_1 = builder.add_or_update_symbol(root_scope_id, "foo", SymbolFlags::empty());
let symbol_id_2 = builder.add_or_update_symbol(root_scope_id, "bar", SymbolFlags::empty());
assert_ne!(symbol_id_1, symbol_id_2);
}
#[test]
fn add_child_scope_with_symbol() {
let mut builder = SymbolTableBuilder::new();
let root_scope_id = SymbolTable::root_scope_id();
let foo_symbol_top =
builder.add_or_update_symbol(root_scope_id, "foo", SymbolFlags::empty());
let c_scope = builder.add_child_scope(root_scope_id, "C", ScopeKind::Class, None, None);
let foo_symbol_inner = builder.add_or_update_symbol(c_scope, "foo", SymbolFlags::empty());
assert_ne!(foo_symbol_top, foo_symbol_inner);
}
#[test]
fn scope_from_id() {
let table = SymbolTableBuilder::new().finish();
let root_scope_id = SymbolTable::root_scope_id();
let scope = root_scope_id.scope(&table);
assert_eq!(scope.name.as_str(), "<module>");
assert_eq!(scope.kind, ScopeKind::Module);
}
#[test]
fn symbol_from_id() {
let mut builder = SymbolTableBuilder::new();
let root_scope_id = SymbolTable::root_scope_id();
let foo_symbol_id =
builder.add_or_update_symbol(root_scope_id, "foo", SymbolFlags::empty());
let table = builder.finish();
let symbol = foo_symbol_id.symbol(&table);
assert_eq!(symbol.name(), "foo");
}
#[test]
fn bigger_symbol_table() {
let mut builder = SymbolTableBuilder::new();
let root_scope_id = SymbolTable::root_scope_id();
let foo_symbol_id =
builder.add_or_update_symbol(root_scope_id, "foo", SymbolFlags::empty());
builder.add_or_update_symbol(root_scope_id, "bar", SymbolFlags::empty());
builder.add_or_update_symbol(root_scope_id, "baz", SymbolFlags::empty());
builder.add_or_update_symbol(root_scope_id, "qux", SymbolFlags::empty());
let table = builder.finish();
let foo_symbol_id_2 = table
.root_symbol_id_by_name("foo")
.expect("foo symbol to be found");
assert_eq!(foo_symbol_id_2, foo_symbol_id);
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -1,762 +0,0 @@
#![allow(dead_code)]
use ruff_python_ast as ast;
use ruff_python_ast::AstNode;
use std::fmt::Debug;
use crate::db::{QueryResult, SemanticDb, SemanticJar};
use crate::module::{resolve_module, ModuleName};
use crate::parse::parse;
use crate::semantic::types::{ModuleTypeId, Type};
use crate::semantic::{
resolve_global_symbol, semantic_index, ConstrainedDefinition, Definition, GlobalSymbolId,
ImportDefinition, ImportFromDefinition,
};
use crate::{FileId, Name};
// FIXME: Figure out proper dead-lock free synchronisation now that this takes `&db` instead of `&mut db`.
/// Resolve the public-facing type for a symbol (the type seen by other scopes: other modules, or
/// nested functions). Because calls to nested functions and imports can occur anywhere in control
/// flow, this type must be conservative and consider all definitions of the symbol that could
/// possibly be seen by another scope. Currently we take the most conservative approach, which is
/// the union of all definitions. We may be able to narrow this in future to eliminate definitions
/// which can't possibly (or at least likely) be seen by any other scope, so that e.g. we could
/// infer `Literal["1"]` instead of `Literal[1] | Literal["1"]` for `x` in `x = x; x = str(x);`.
#[tracing::instrument(level = "trace", skip(db))]
pub fn infer_symbol_public_type(db: &dyn SemanticDb, symbol: GlobalSymbolId) -> QueryResult<Type> {
let index = semantic_index(db, symbol.file_id)?;
let defs = index.symbol_table().definitions(symbol.symbol_id).to_vec();
let jar: &SemanticJar = db.jar()?;
if let Some(ty) = jar.type_store.get_cached_symbol_public_type(symbol) {
return Ok(ty);
}
let ty = infer_type_from_definitions(db, symbol, defs.iter().cloned())?;
jar.type_store.cache_symbol_public_type(symbol, ty);
// TODO record dependencies
Ok(ty)
}
/// Infer type of a symbol as union of the given `Definitions`.
fn infer_type_from_definitions<T>(
db: &dyn SemanticDb,
symbol: GlobalSymbolId,
definitions: T,
) -> QueryResult<Type>
where
T: Debug + IntoIterator<Item = Definition>,
{
infer_type_from_constrained_definitions(
db,
symbol,
definitions
.into_iter()
.map(|definition| ConstrainedDefinition {
definition,
constraints: vec![],
}),
)
}
/// Infer type of a symbol as union of the given `ConstrainedDefinitions`.
fn infer_type_from_constrained_definitions<T>(
db: &dyn SemanticDb,
symbol: GlobalSymbolId,
constrained_definitions: T,
) -> QueryResult<Type>
where
T: IntoIterator<Item = ConstrainedDefinition>,
{
let jar: &SemanticJar = db.jar()?;
let mut tys = constrained_definitions
.into_iter()
.map(|def| infer_constrained_definition_type(db, symbol, def.clone()))
.peekable();
if let Some(first) = tys.next() {
if tys.peek().is_some() {
Ok(jar.type_store.add_union(
symbol.file_id,
&Iterator::chain(std::iter::once(first), tys).collect::<QueryResult<Vec<_>>>()?,
))
} else {
first
}
} else {
Ok(Type::Unknown)
}
}
/// Infer type for a ConstrainedDefinition (intersection of the definition type and the
/// constraints)
#[tracing::instrument(level = "trace", skip(db))]
pub fn infer_constrained_definition_type(
db: &dyn SemanticDb,
symbol: GlobalSymbolId,
constrained_definition: ConstrainedDefinition,
) -> QueryResult<Type> {
let ConstrainedDefinition {
definition,
constraints,
} = constrained_definition;
let index = semantic_index(db, symbol.file_id)?;
let parsed = parse(db.upcast(), symbol.file_id)?;
let mut intersected_types = vec![infer_definition_type(db, symbol, definition)?];
for constraint in constraints {
if let Some(constraint_type) = infer_constraint_type(
db,
symbol,
index.resolve_expression_id(parsed.syntax(), constraint),
)? {
intersected_types.push(constraint_type);
}
}
let jar: &SemanticJar = db.jar()?;
Ok(jar
.type_store
.add_intersection(symbol.file_id, &intersected_types, &[]))
}
/// Infer a type for a Definition
#[tracing::instrument(level = "trace", skip(db))]
pub fn infer_definition_type(
db: &dyn SemanticDb,
symbol: GlobalSymbolId,
definition: Definition,
) -> QueryResult<Type> {
let jar: &SemanticJar = db.jar()?;
let type_store = &jar.type_store;
let file_id = symbol.file_id;
match definition {
Definition::Unbound => Ok(Type::Unbound),
Definition::Import(ImportDefinition {
module: module_name,
}) => {
if let Some(module) = resolve_module(db, module_name.clone())? {
Ok(Type::Module(ModuleTypeId { module, file_id }))
} else {
Ok(Type::Unknown)
}
}
Definition::ImportFrom(ImportFromDefinition {
module,
name,
level,
}) => {
// TODO relative imports
assert!(matches!(level, 0));
let module_name = ModuleName::new(module.as_ref().expect("TODO relative imports"));
let Some(module) = resolve_module(db, module_name.clone())? else {
return Ok(Type::Unknown);
};
if let Some(remote_symbol) = resolve_global_symbol(db, module, &name)? {
infer_symbol_public_type(db, remote_symbol)
} else {
Ok(Type::Unknown)
}
}
Definition::ClassDef(node_key) => {
if let Some(ty) = type_store.get_cached_node_type(file_id, node_key.erased()) {
Ok(ty)
} else {
let parsed = parse(db.upcast(), file_id)?;
let ast = parsed.syntax();
let index = semantic_index(db, file_id)?;
let node = node_key.resolve_unwrap(ast.as_any_node_ref());
let mut bases = Vec::with_capacity(node.bases().len());
for base in node.bases() {
bases.push(infer_expr_type(db, file_id, base)?);
}
let scope_id = index.symbol_table().scope_id_for_node(node_key.erased());
let ty = type_store.add_class(file_id, &node.name.id, scope_id, bases);
type_store.cache_node_type(file_id, *node_key.erased(), ty);
Ok(ty)
}
}
Definition::FunctionDef(node_key) => {
if let Some(ty) = type_store.get_cached_node_type(file_id, node_key.erased()) {
Ok(ty)
} else {
let parsed = parse(db.upcast(), file_id)?;
let ast = parsed.syntax();
let index = semantic_index(db, file_id)?;
let node = node_key
.resolve(ast.as_any_node_ref())
.expect("node key should resolve");
let decorator_tys = node
.decorator_list
.iter()
.map(|decorator| infer_expr_type(db, file_id, &decorator.expression))
.collect::<QueryResult<_>>()?;
let scope_id = index.symbol_table().scope_id_for_node(node_key.erased());
let ty = type_store.add_function(
file_id,
&node.name.id,
symbol.symbol_id,
scope_id,
decorator_tys,
);
type_store.cache_node_type(file_id, *node_key.erased(), ty);
Ok(ty)
}
}
Definition::Assignment(node_key) => {
let parsed = parse(db.upcast(), file_id)?;
let ast = parsed.syntax();
let node = node_key.resolve_unwrap(ast.as_any_node_ref());
// TODO handle unpacking assignment
infer_expr_type(db, file_id, &node.value)
}
Definition::AnnotatedAssignment(node_key) => {
let parsed = parse(db.upcast(), file_id)?;
let ast = parsed.syntax();
let node = node_key.resolve_unwrap(ast.as_any_node_ref());
// TODO actually look at the annotation
let Some(value) = &node.value else {
return Ok(Type::Unknown);
};
// TODO handle unpacking assignment
infer_expr_type(db, file_id, value)
}
Definition::NamedExpr(node_key) => {
let parsed = parse(db.upcast(), file_id)?;
let ast = parsed.syntax();
let node = node_key.resolve_unwrap(ast.as_any_node_ref());
infer_expr_type(db, file_id, &node.value)
}
}
}
/// Return the type that the given constraint (an expression from a control-flow test) requires the
/// given symbol to have. For example, returns the Type "~None" as the constraint type if given the
/// symbol ID for x and the expression ID for `x is not None`. Returns (Rust) None if the given
/// expression applies no constraints on the given symbol.
#[tracing::instrument(level = "trace", skip(db))]
fn infer_constraint_type(
db: &dyn SemanticDb,
symbol_id: GlobalSymbolId,
// TODO this should preferably take an &ast::Expr instead of AnyNodeRef
expression: ast::AnyNodeRef,
) -> QueryResult<Option<Type>> {
let file_id = symbol_id.file_id;
let index = semantic_index(db, file_id)?;
let jar: &SemanticJar = db.jar()?;
let symbol_name = symbol_id.symbol_id.symbol(&index.symbol_table).name();
// TODO narrowing attributes
// TODO narrowing dict keys
// TODO isinstance, ==/!=, type(...), literals, bools...
match expression {
ast::AnyNodeRef::ExprCompare(ast::ExprCompare {
left,
ops,
comparators,
..
}) => {
// TODO chained comparisons
match left.as_ref() {
ast::Expr::Name(ast::ExprName { id, .. }) if id == symbol_name => match ops[0] {
ast::CmpOp::Is | ast::CmpOp::IsNot => {
Ok(match infer_expr_type(db, file_id, &comparators[0])? {
Type::None => Some(Type::None),
_ => None,
}
.map(|ty| {
if matches!(ops[0], ast::CmpOp::IsNot) {
jar.type_store.add_intersection(file_id, &[], &[ty])
} else {
ty
}
}))
}
_ => Ok(None),
},
_ => Ok(None),
}
}
_ => Ok(None),
}
}
/// Infer type of the given expression.
fn infer_expr_type(db: &dyn SemanticDb, file_id: FileId, expr: &ast::Expr) -> QueryResult<Type> {
// TODO cache the resolution of the type on the node
let index = semantic_index(db, file_id)?;
match expr {
ast::Expr::NoneLiteral(_) => Ok(Type::None),
ast::Expr::NumberLiteral(ast::ExprNumberLiteral { value, .. }) => {
match value {
ast::Number::Int(n) => {
// TODO support big int literals
Ok(n.as_i64().map(Type::IntLiteral).unwrap_or(Type::Unknown))
}
// TODO builtins.float or builtins.complex
_ => Ok(Type::Unknown),
}
}
ast::Expr::Name(name) => {
// TODO look up in the correct scope, don't assume global
if let Some(symbol_id) = index.symbol_table().root_symbol_id_by_name(&name.id) {
infer_type_from_constrained_definitions(
db,
GlobalSymbolId { file_id, symbol_id },
index.reachable_definitions(symbol_id, expr),
)
} else {
Ok(Type::Unknown)
}
}
ast::Expr::Attribute(ast::ExprAttribute { value, attr, .. }) => {
let value_type = infer_expr_type(db, file_id, value)?;
let attr_name = &Name::new(&attr.id);
value_type
.get_member(db, attr_name)
.map(|ty| ty.unwrap_or(Type::Unknown))
}
ast::Expr::BinOp(ast::ExprBinOp {
left, op, right, ..
}) => {
let left_ty = infer_expr_type(db, file_id, left)?;
let right_ty = infer_expr_type(db, file_id, right)?;
// TODO add reverse bin op support if right <: left
left_ty.resolve_bin_op(db, *op, right_ty)
}
ast::Expr::Named(ast::ExprNamed { value, .. }) => infer_expr_type(db, file_id, value),
ast::Expr::If(ast::ExprIf { body, orelse, .. }) => {
// TODO detect statically known truthy or falsy test
let body_ty = infer_expr_type(db, file_id, body)?;
let else_ty = infer_expr_type(db, file_id, orelse)?;
let jar: &SemanticJar = db.jar()?;
Ok(jar.type_store.add_union(file_id, &[body_ty, else_ty]))
}
_ => todo!("expression type resolution for {:?}", expr),
}
}
#[cfg(test)]
mod tests {
use std::path::PathBuf;
use crate::db::tests::TestDb;
use crate::db::{HasJar, SemanticJar};
use crate::module::{
resolve_module, set_module_search_paths, ModuleName, ModuleResolutionInputs,
};
use crate::semantic::{infer_symbol_public_type, resolve_global_symbol, Type};
use crate::Name;
// TODO with virtual filesystem we shouldn't have to write files to disk for these
// tests
struct TestCase {
temp_dir: tempfile::TempDir,
db: TestDb,
src: PathBuf,
}
fn create_test() -> std::io::Result<TestCase> {
let temp_dir = tempfile::tempdir()?;
let src = temp_dir.path().join("src");
std::fs::create_dir(&src)?;
let src = src.canonicalize()?;
let search_paths = ModuleResolutionInputs {
extra_paths: vec![],
workspace_root: src.clone(),
site_packages: None,
custom_typeshed: None,
};
let mut db = TestDb::default();
set_module_search_paths(&mut db, search_paths);
Ok(TestCase { temp_dir, db, src })
}
fn write_to_path(case: &TestCase, relative_path: &str, contents: &str) -> anyhow::Result<()> {
let path = case.src.join(relative_path);
std::fs::write(path, contents)?;
Ok(())
}
fn get_public_type(
case: &TestCase,
module_name: &str,
variable_name: &str,
) -> anyhow::Result<Type> {
let db = &case.db;
let module = resolve_module(db, ModuleName::new(module_name))?.expect("Module to exist");
let symbol = resolve_global_symbol(db, module, variable_name)?.expect("symbol to exist");
Ok(infer_symbol_public_type(db, symbol)?)
}
fn assert_public_type(
case: &TestCase,
module_name: &str,
variable_name: &str,
type_name: &str,
) -> anyhow::Result<()> {
let ty = get_public_type(case, module_name, variable_name)?;
let jar = HasJar::<SemanticJar>::jar(&case.db)?;
assert_eq!(format!("{}", ty.display(&jar.type_store)), type_name);
Ok(())
}
#[test]
fn follow_import_to_class() -> anyhow::Result<()> {
let case = create_test()?;
write_to_path(&case, "a.py", "from b import C as D; E = D")?;
write_to_path(&case, "b.py", "class C: pass")?;
assert_public_type(&case, "a", "E", "Literal[C]")
}
#[test]
fn resolve_base_class_by_name() -> anyhow::Result<()> {
let case = create_test()?;
write_to_path(
&case,
"mod.py",
"
class Base: pass
class Sub(Base): pass
",
)?;
let ty = get_public_type(&case, "mod", "Sub")?;
let Type::Class(class_id) = ty else {
panic!("Sub is not a Class")
};
let jar = HasJar::<SemanticJar>::jar(&case.db)?;
let base_names: Vec<_> = jar
.type_store
.get_class(class_id)
.bases()
.iter()
.map(|base_ty| format!("{}", base_ty.display(&jar.type_store)))
.collect();
assert_eq!(base_names, vec!["Literal[Base]"]);
Ok(())
}
#[test]
fn resolve_method() -> anyhow::Result<()> {
let case = create_test()?;
write_to_path(
&case,
"mod.py",
"
class C:
def f(self): pass
",
)?;
let ty = get_public_type(&case, "mod", "C")?;
let Type::Class(class_id) = ty else {
panic!("C is not a Class");
};
let member_ty = class_id
.get_own_class_member(&case.db, &Name::new("f"))
.expect("C.f to resolve");
let Some(Type::Function(func_id)) = member_ty else {
panic!("C.f is not a Function");
};
let jar = HasJar::<SemanticJar>::jar(&case.db)?;
let function = jar.type_store.get_function(func_id);
assert_eq!(function.name(), "f");
Ok(())
}
#[test]
fn resolve_module_member() -> anyhow::Result<()> {
let case = create_test()?;
write_to_path(&case, "a.py", "import b; D = b.C")?;
write_to_path(&case, "b.py", "class C: pass")?;
assert_public_type(&case, "a", "D", "Literal[C]")
}
#[test]
fn resolve_literal() -> anyhow::Result<()> {
let case = create_test()?;
write_to_path(&case, "a.py", "x = 1")?;
assert_public_type(&case, "a", "x", "Literal[1]")
}
#[test]
fn resolve_union() -> anyhow::Result<()> {
let case = create_test()?;
write_to_path(
&case,
"a.py",
"
if flag:
x = 1
else:
x = 2
",
)?;
assert_public_type(&case, "a", "x", "Literal[1, 2]")
}
#[test]
fn resolve_visible_def() -> anyhow::Result<()> {
let case = create_test()?;
write_to_path(&case, "a.py", "y = 1; y = 2; x = y")?;
assert_public_type(&case, "a", "x", "Literal[2]")
}
#[test]
fn join_paths() -> anyhow::Result<()> {
let case = create_test()?;
write_to_path(
&case,
"a.py",
"
y = 1
y = 2
if flag:
y = 3
x = y
",
)?;
assert_public_type(&case, "a", "x", "Literal[2, 3]")
}
#[test]
fn maybe_unbound() -> anyhow::Result<()> {
let case = create_test()?;
write_to_path(
&case,
"a.py",
"
if flag:
y = 1
x = y
",
)?;
assert_public_type(&case, "a", "x", "Literal[1] | Unbound")
}
#[test]
fn if_elif_else() -> anyhow::Result<()> {
let case = create_test()?;
write_to_path(
&case,
"a.py",
"
y = 1
y = 2
if flag:
y = 3
elif flag2:
y = 4
else:
r = y
y = 5
s = y
x = y
",
)?;
assert_public_type(&case, "a", "x", "Literal[3, 4, 5]")?;
assert_public_type(&case, "a", "r", "Literal[2]")?;
assert_public_type(&case, "a", "s", "Literal[5]")
}
#[test]
fn if_elif() -> anyhow::Result<()> {
let case = create_test()?;
write_to_path(
&case,
"a.py",
"
y = 1
y = 2
if flag:
y = 3
elif flag2:
y = 4
x = y
",
)?;
assert_public_type(&case, "a", "x", "Literal[2, 3, 4]")
}
#[test]
fn literal_int_arithmetic() -> anyhow::Result<()> {
let case = create_test()?;
write_to_path(
&case,
"a.py",
"
a = 2 + 1
b = a - 4
c = a * b
d = c / 3
e = 5 % 3
",
)?;
assert_public_type(&case, "a", "a", "Literal[3]")?;
assert_public_type(&case, "a", "b", "Literal[-1]")?;
assert_public_type(&case, "a", "c", "Literal[-3]")?;
assert_public_type(&case, "a", "d", "Literal[-1]")?;
assert_public_type(&case, "a", "e", "Literal[2]")
}
#[test]
fn walrus() -> anyhow::Result<()> {
let case = create_test()?;
write_to_path(
&case,
"a.py",
"
x = (y := 1) + 1
",
)?;
assert_public_type(&case, "a", "x", "Literal[2]")?;
assert_public_type(&case, "a", "y", "Literal[1]")
}
#[test]
fn ifexpr() -> anyhow::Result<()> {
let case = create_test()?;
write_to_path(
&case,
"a.py",
"
x = 1 if flag else 2
",
)?;
assert_public_type(&case, "a", "x", "Literal[1, 2]")
}
#[test]
fn ifexpr_walrus() -> anyhow::Result<()> {
let case = create_test()?;
write_to_path(
&case,
"a.py",
"
y = z = 0
x = (y := 1) if flag else (z := 2)
a = y
b = z
",
)?;
assert_public_type(&case, "a", "x", "Literal[1, 2]")?;
assert_public_type(&case, "a", "a", "Literal[0, 1]")?;
assert_public_type(&case, "a", "b", "Literal[0, 2]")
}
#[test]
fn ifexpr_walrus_2() -> anyhow::Result<()> {
let case = create_test()?;
write_to_path(
&case,
"a.py",
"
y = 0
(y := 1) if flag else (y := 2)
a = y
",
)?;
assert_public_type(&case, "a", "a", "Literal[1, 2]")
}
#[test]
fn ifexpr_nested() -> anyhow::Result<()> {
let case = create_test()?;
write_to_path(
&case,
"a.py",
"
x = 1 if flag else 2 if flag2 else 3
",
)?;
assert_public_type(&case, "a", "x", "Literal[1, 2, 3]")
}
#[test]
fn none() -> anyhow::Result<()> {
let case = create_test()?;
write_to_path(
&case,
"a.py",
"
x = 1 if flag else None
",
)?;
assert_public_type(&case, "a", "x", "Literal[1] | None")
}
#[test]
fn narrow_none() -> anyhow::Result<()> {
let case = create_test()?;
write_to_path(
&case,
"a.py",
"
x = 1 if flag else None
y = 0
if x is not None:
y = x
z = y
",
)?;
// TODO normalization of unions and intersections: this type is technically correct but
// begging for normalization
assert_public_type(&case, "a", "z", "Literal[0] | Literal[1] | None & ~None")
}
}

View File

@@ -1,105 +0,0 @@
use std::ops::{Deref, DerefMut};
use std::sync::Arc;
use ruff_notebook::Notebook;
use ruff_python_ast::PySourceType;
use crate::cache::KeyValueCache;
use crate::db::{QueryResult, SourceDb};
use crate::files::FileId;
#[tracing::instrument(level = "debug", skip(db))]
pub(crate) fn source_text(db: &dyn SourceDb, file_id: FileId) -> QueryResult<Source> {
let jar = db.jar()?;
let sources = &jar.sources;
sources.get(&file_id, |file_id| {
let path = db.file_path(*file_id);
let source_text = std::fs::read_to_string(&path).unwrap_or_else(|err| {
tracing::error!("Failed to read file '{path:?}: {err}'. Falling back to empty text");
String::new()
});
let python_ty = PySourceType::from(&path);
let kind = match python_ty {
PySourceType::Python => {
SourceKind::Python(Arc::from(source_text))
}
PySourceType::Stub => SourceKind::Stub(Arc::from(source_text)),
PySourceType::Ipynb => {
let notebook = Notebook::from_source_code(&source_text).unwrap_or_else(|err| {
// TODO should this be changed to never fail?
// or should we instead add a diagnostic somewhere? But what would we return in this case?
tracing::error!(
"Failed to parse notebook '{path:?}: {err}'. Falling back to an empty notebook"
);
Notebook::from_source_code("").unwrap()
});
SourceKind::IpyNotebook(Arc::new(notebook))
}
};
Ok(Source { kind })
})
}
#[derive(Debug, Clone, PartialEq)]
pub enum SourceKind {
Python(Arc<str>),
Stub(Arc<str>),
IpyNotebook(Arc<Notebook>),
}
impl<'a> From<&'a SourceKind> for PySourceType {
fn from(value: &'a SourceKind) -> Self {
match value {
SourceKind::Python(_) => PySourceType::Python,
SourceKind::Stub(_) => PySourceType::Stub,
SourceKind::IpyNotebook(_) => PySourceType::Ipynb,
}
}
}
#[derive(Debug, Clone, PartialEq)]
pub struct Source {
kind: SourceKind,
}
impl Source {
pub fn python<T: Into<Arc<str>>>(source: T) -> Self {
Self {
kind: SourceKind::Python(source.into()),
}
}
pub fn kind(&self) -> &SourceKind {
&self.kind
}
pub fn text(&self) -> &str {
match &self.kind {
SourceKind::Python(text) => text,
SourceKind::Stub(text) => text,
SourceKind::IpyNotebook(notebook) => notebook.source_code(),
}
}
}
#[derive(Debug, Default)]
pub struct SourceStorage(pub(crate) KeyValueCache<FileId, Source>);
impl Deref for SourceStorage {
type Target = KeyValueCache<FileId, Source>;
fn deref(&self) -> &Self::Target {
&self.0
}
}
impl DerefMut for SourceStorage {
fn deref_mut(&mut self) -> &mut Self::Target {
&mut self.0
}
}

View File

@@ -0,0 +1,48 @@
/// Enumeration of all supported Python versions
///
/// TODO: unify with the `PythonVersion` enum in the linter/formatter crates?
#[derive(Copy, Clone, Hash, Debug, PartialEq, Eq, PartialOrd, Ord, Default, clap::ValueEnum)]
pub enum TargetVersion {
Py37,
#[default]
Py38,
Py39,
Py310,
Py311,
Py312,
Py313,
}
impl TargetVersion {
const fn as_str(self) -> &'static str {
match self {
Self::Py37 => "py37",
Self::Py38 => "py38",
Self::Py39 => "py39",
Self::Py310 => "py310",
Self::Py311 => "py311",
Self::Py312 => "py312",
Self::Py313 => "py313",
}
}
}
impl std::fmt::Display for TargetVersion {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.write_str(self.as_str())
}
}
impl From<TargetVersion> for red_knot_python_semantic::PythonVersion {
fn from(value: TargetVersion) -> Self {
match value {
TargetVersion::Py37 => Self::PY37,
TargetVersion::Py38 => Self::PY38,
TargetVersion::Py39 => Self::PY39,
TargetVersion::Py310 => Self::PY310,
TargetVersion::Py311 => Self::PY311,
TargetVersion::Py312 => Self::PY312,
TargetVersion::Py313 => Self::PY313,
}
}
}

View File

@@ -0,0 +1 @@

View File

@@ -1,77 +0,0 @@
use std::path::Path;
use anyhow::Context;
use notify::event::{CreateKind, RemoveKind};
use notify::{recommended_watcher, Event, EventKind, RecommendedWatcher, RecursiveMode, Watcher};
use crate::program::{FileChangeKind, FileWatcherChange};
pub struct FileWatcher {
watcher: RecommendedWatcher,
}
pub trait EventHandler: Send + 'static {
fn handle(&self, changes: Vec<FileWatcherChange>);
}
impl<F> EventHandler for F
where
F: Fn(Vec<FileWatcherChange>) + Send + 'static,
{
fn handle(&self, changes: Vec<FileWatcherChange>) {
let f = self;
f(changes);
}
}
impl FileWatcher {
pub fn new<E>(handler: E) -> anyhow::Result<Self>
where
E: EventHandler,
{
Self::from_handler(Box::new(handler))
}
fn from_handler(handler: Box<dyn EventHandler>) -> anyhow::Result<Self> {
let watcher = recommended_watcher(move |changes: notify::Result<Event>| {
match changes {
Ok(event) => {
// TODO verify that this handles all events correctly
let change_kind = match event.kind {
EventKind::Create(CreateKind::File) => FileChangeKind::Created,
EventKind::Modify(_) => FileChangeKind::Modified,
EventKind::Remove(RemoveKind::File) => FileChangeKind::Deleted,
_ => {
return;
}
};
let mut changes = Vec::new();
for path in event.paths {
if path.is_file() {
changes.push(FileWatcherChange::new(path, change_kind));
}
}
if !changes.is_empty() {
handler.handle(changes);
}
}
// TODO proper error handling
Err(err) => {
panic!("Error: {err}");
}
}
})
.context("Failed to create file watcher.")?;
Ok(Self { watcher })
}
pub fn watch_folder(&mut self, path: &Path) -> anyhow::Result<()> {
self.watcher.watch(path, RecursiveMode::Recursive)?;
Ok(())
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -1,35 +0,0 @@
[package]
name = "red_knot_module_resolver"
version = "0.0.0"
publish = false
authors = { workspace = true }
edition = { workspace = true }
rust-version = { workspace = true }
homepage = { workspace = true }
documentation = { workspace = true }
repository = { workspace = true }
license = { workspace = true }
[dependencies]
ruff_db = { workspace = true }
ruff_python_stdlib = { workspace = true }
rustc-hash = { workspace = true }
salsa = { workspace = true }
smol_str = { workspace = true }
tracing = { workspace = true }
zip = { workspace = true }
[build-dependencies]
path-slash = { workspace = true }
walkdir = { workspace = true }
zip = { workspace = true }
[dev-dependencies]
anyhow = { workspace = true }
insta = { workspace = true }
tempfile = { workspace = true }
walkdir = { workspace = true }
[lints]
workspace = true

View File

@@ -1,156 +0,0 @@
use ruff_db::Upcast;
use crate::resolver::{
file_to_module,
internal::{ModuleNameIngredient, ModuleResolverSearchPaths},
resolve_module_query,
};
#[salsa::jar(db=Db)]
pub struct Jar(
ModuleNameIngredient<'_>,
ModuleResolverSearchPaths,
resolve_module_query,
file_to_module,
);
pub trait Db: salsa::DbWithJar<Jar> + ruff_db::Db + Upcast<dyn ruff_db::Db> {}
pub(crate) mod tests {
use std::sync;
use salsa::DebugWithDb;
use ruff_db::file_system::{FileSystem, MemoryFileSystem, OsFileSystem};
use ruff_db::vfs::Vfs;
use super::*;
#[salsa::db(Jar, ruff_db::Jar)]
pub(crate) struct TestDb {
storage: salsa::Storage<Self>,
file_system: TestFileSystem,
events: sync::Arc<sync::Mutex<Vec<salsa::Event>>>,
vfs: Vfs,
}
impl TestDb {
#[allow(unused)]
pub(crate) fn new() -> Self {
Self {
storage: salsa::Storage::default(),
file_system: TestFileSystem::Memory(MemoryFileSystem::default()),
events: sync::Arc::default(),
vfs: Vfs::with_stubbed_vendored(),
}
}
/// Returns the memory file system.
///
/// ## Panics
/// If this test db isn't using a memory file system.
#[allow(unused)]
pub(crate) fn memory_file_system(&self) -> &MemoryFileSystem {
if let TestFileSystem::Memory(fs) = &self.file_system {
fs
} else {
panic!("The test db is not using a memory file system");
}
}
/// Uses the real file system instead of the memory file system.
///
/// This useful for testing advanced file system features like permissions, symlinks, etc.
///
/// Note that any files written to the memory file system won't be copied over.
#[allow(unused)]
pub(crate) fn with_os_file_system(&mut self) {
self.file_system = TestFileSystem::Os(OsFileSystem);
}
#[allow(unused)]
pub(crate) fn vfs_mut(&mut self) -> &mut Vfs {
&mut self.vfs
}
/// Takes the salsa events.
///
/// ## Panics
/// If there are any pending salsa snapshots.
#[allow(unused)]
pub(crate) fn take_salsa_events(&mut self) -> Vec<salsa::Event> {
let inner = sync::Arc::get_mut(&mut self.events).expect("no pending salsa snapshots");
let events = inner.get_mut().unwrap();
std::mem::take(&mut *events)
}
/// Clears the salsa events.
///
/// ## Panics
/// If there are any pending salsa snapshots.
#[allow(unused)]
pub(crate) fn clear_salsa_events(&mut self) {
self.take_salsa_events();
}
}
impl Upcast<dyn ruff_db::Db> for TestDb {
fn upcast(&self) -> &(dyn ruff_db::Db + 'static) {
self
}
}
impl ruff_db::Db for TestDb {
fn file_system(&self) -> &dyn ruff_db::file_system::FileSystem {
self.file_system.inner()
}
fn vfs(&self) -> &ruff_db::vfs::Vfs {
&self.vfs
}
}
impl Db for TestDb {}
impl salsa::Database for TestDb {
fn salsa_event(&self, event: salsa::Event) {
tracing::trace!("event: {:?}", event.debug(self));
let mut events = self.events.lock().unwrap();
events.push(event);
}
}
impl salsa::ParallelDatabase for TestDb {
fn snapshot(&self) -> salsa::Snapshot<Self> {
salsa::Snapshot::new(Self {
storage: self.storage.snapshot(),
file_system: self.file_system.snapshot(),
events: self.events.clone(),
vfs: self.vfs.snapshot(),
})
}
}
enum TestFileSystem {
Memory(MemoryFileSystem),
#[allow(unused)]
Os(OsFileSystem),
}
impl TestFileSystem {
fn inner(&self) -> &dyn FileSystem {
match self {
Self::Memory(inner) => inner,
Self::Os(inner) => inner,
}
}
fn snapshot(&self) -> Self {
match self {
Self::Memory(inner) => Self::Memory(inner.snapshot()),
Self::Os(inner) => Self::Os(inner.snapshot()),
}
}
}
}

View File

@@ -1,9 +0,0 @@
mod db;
mod module;
mod resolver;
mod typeshed;
pub use db::{Db, Jar};
pub use module::{ModuleKind, ModuleName};
pub use resolver::{resolve_module, set_module_resolution_settings, ModuleResolutionSettings};
pub use typeshed::versions::TypeshedVersions;

View File

@@ -1,346 +0,0 @@
use std::fmt::Formatter;
use std::ops::Deref;
use std::sync::Arc;
use ruff_db::file_system::FileSystemPath;
use ruff_db::vfs::{VfsFile, VfsPath};
use ruff_python_stdlib::identifiers::is_identifier;
use crate::Db;
/// A module name, e.g. `foo.bar`.
///
/// Always normalized to the absolute form (never a relative module name, i.e., never `.foo`).
#[derive(Clone, Debug, Eq, PartialEq, Hash, PartialOrd, Ord)]
pub struct ModuleName(smol_str::SmolStr);
impl ModuleName {
/// Creates a new module name for `name`. Returns `Some` if `name` is a valid, absolute
/// module name and `None` otherwise.
///
/// The module name is invalid if:
///
/// * The name is empty
/// * The name is relative
/// * The name ends with a `.`
/// * The name contains a sequence of multiple dots
/// * A component of a name (the part between two dots) isn't a valid python identifier.
#[inline]
pub fn new(name: &str) -> Option<Self> {
Self::new_from_smol(smol_str::SmolStr::new(name))
}
/// Creates a new module name for `name` where `name` is a static string.
/// Returns `Some` if `name` is a valid, absolute module name and `None` otherwise.
///
/// The module name is invalid if:
///
/// * The name is empty
/// * The name is relative
/// * The name ends with a `.`
/// * The name contains a sequence of multiple dots
/// * A component of a name (the part between two dots) isn't a valid python identifier.
///
/// ## Examples
///
/// ```
/// use red_knot_module_resolver::ModuleName;
///
/// assert_eq!(ModuleName::new_static("foo.bar").as_deref(), Some("foo.bar"));
/// assert_eq!(ModuleName::new_static(""), None);
/// assert_eq!(ModuleName::new_static("..foo"), None);
/// assert_eq!(ModuleName::new_static(".foo"), None);
/// assert_eq!(ModuleName::new_static("foo."), None);
/// assert_eq!(ModuleName::new_static("foo..bar"), None);
/// assert_eq!(ModuleName::new_static("2000"), None);
/// ```
#[inline]
pub fn new_static(name: &'static str) -> Option<Self> {
Self::new_from_smol(smol_str::SmolStr::new_static(name))
}
fn new_from_smol(name: smol_str::SmolStr) -> Option<Self> {
if name.is_empty() {
return None;
}
if name.split('.').all(is_identifier) {
Some(Self(name))
} else {
None
}
}
/// An iterator over the components of the module name:
///
/// # Examples
///
/// ```
/// use red_knot_module_resolver::ModuleName;
///
/// assert_eq!(ModuleName::new_static("foo.bar.baz").unwrap().components().collect::<Vec<_>>(), vec!["foo", "bar", "baz"]);
/// ```
pub fn components(&self) -> impl DoubleEndedIterator<Item = &str> {
self.0.split('.')
}
/// The name of this module's immediate parent, if it has a parent.
///
/// # Examples
///
/// ```
/// use red_knot_module_resolver::ModuleName;
///
/// assert_eq!(ModuleName::new_static("foo.bar").unwrap().parent(), Some(ModuleName::new_static("foo").unwrap()));
/// assert_eq!(ModuleName::new_static("foo.bar.baz").unwrap().parent(), Some(ModuleName::new_static("foo.bar").unwrap()));
/// assert_eq!(ModuleName::new_static("root").unwrap().parent(), None);
/// ```
pub fn parent(&self) -> Option<ModuleName> {
let (parent, _) = self.0.rsplit_once('.')?;
Some(Self(smol_str::SmolStr::new(parent)))
}
/// Returns `true` if the name starts with `other`.
///
/// This is equivalent to checking if `self` is a sub-module of `other`.
///
/// # Examples
///
/// ```
/// use red_knot_module_resolver::ModuleName;
///
/// assert!(ModuleName::new_static("foo.bar").unwrap().starts_with(&ModuleName::new_static("foo").unwrap()));
///
/// assert!(!ModuleName::new_static("foo.bar").unwrap().starts_with(&ModuleName::new_static("bar").unwrap()));
/// assert!(!ModuleName::new_static("foo_bar").unwrap().starts_with(&ModuleName::new_static("foo").unwrap()));
/// ```
pub fn starts_with(&self, other: &ModuleName) -> bool {
let mut self_components = self.components();
let other_components = other.components();
for other_component in other_components {
if self_components.next() != Some(other_component) {
return false;
}
}
true
}
#[inline]
pub fn as_str(&self) -> &str {
&self.0
}
pub(crate) fn from_relative_path(path: &FileSystemPath) -> Option<Self> {
let path = if path.ends_with("__init__.py") || path.ends_with("__init__.pyi") {
path.parent()?
} else {
path
};
let name = if let Some(parent) = path.parent() {
let mut name = String::with_capacity(path.as_str().len());
for component in parent.components() {
name.push_str(component.as_os_str().to_str()?);
name.push('.');
}
// SAFETY: Unwrap is safe here or `parent` would have returned `None`.
name.push_str(path.file_stem().unwrap());
smol_str::SmolStr::from(name)
} else {
smol_str::SmolStr::new(path.file_stem()?)
};
Some(Self(name))
}
}
impl Deref for ModuleName {
type Target = str;
#[inline]
fn deref(&self) -> &Self::Target {
self.as_str()
}
}
impl PartialEq<str> for ModuleName {
fn eq(&self, other: &str) -> bool {
self.as_str() == other
}
}
impl PartialEq<ModuleName> for str {
fn eq(&self, other: &ModuleName) -> bool {
self == other.as_str()
}
}
impl std::fmt::Display for ModuleName {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
f.write_str(&self.0)
}
}
/// Representation of a Python module.
#[derive(Clone, PartialEq, Eq)]
pub struct Module {
inner: Arc<ModuleInner>,
}
impl Module {
pub(crate) fn new(
name: ModuleName,
kind: ModuleKind,
search_path: ModuleSearchPath,
file: VfsFile,
) -> Self {
Self {
inner: Arc::new(ModuleInner {
name,
kind,
search_path,
file,
}),
}
}
/// The absolute name of the module (e.g. `foo.bar`)
pub fn name(&self) -> &ModuleName {
&self.inner.name
}
/// The file to the source code that defines this module
pub fn file(&self) -> VfsFile {
self.inner.file
}
/// The search path from which the module was resolved.
pub fn search_path(&self) -> &ModuleSearchPath {
&self.inner.search_path
}
/// Determine whether this module is a single-file module or a package
pub fn kind(&self) -> ModuleKind {
self.inner.kind
}
}
impl std::fmt::Debug for Module {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
f.debug_struct("Module")
.field("name", &self.name())
.field("kind", &self.kind())
.field("file", &self.file())
.field("search_path", &self.search_path())
.finish()
}
}
impl salsa::DebugWithDb<dyn Db> for Module {
fn fmt(&self, f: &mut Formatter<'_>, db: &dyn Db) -> std::fmt::Result {
f.debug_struct("Module")
.field("name", &self.name())
.field("kind", &self.kind())
.field("file", &self.file().debug(db.upcast()))
.field("search_path", &self.search_path())
.finish()
}
}
#[derive(PartialEq, Eq)]
struct ModuleInner {
name: ModuleName,
kind: ModuleKind,
search_path: ModuleSearchPath,
file: VfsFile,
}
#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)]
pub enum ModuleKind {
/// A single-file module (e.g. `foo.py` or `foo.pyi`)
Module,
/// A python package (`foo/__init__.py` or `foo/__init__.pyi`)
Package,
}
/// A search path in which to search modules.
/// Corresponds to a path in [`sys.path`](https://docs.python.org/3/library/sys_path_init.html) at runtime.
///
/// Cloning a search path is cheap because it's an `Arc`.
#[derive(Clone, PartialEq, Eq)]
pub struct ModuleSearchPath {
inner: Arc<ModuleSearchPathInner>,
}
impl ModuleSearchPath {
pub fn new<P>(path: P, kind: ModuleSearchPathKind) -> Self
where
P: Into<VfsPath>,
{
Self {
inner: Arc::new(ModuleSearchPathInner {
path: path.into(),
kind,
}),
}
}
/// Determine whether this is a first-party, third-party or standard-library search path
pub fn kind(&self) -> ModuleSearchPathKind {
self.inner.kind
}
/// Return the location of the search path on the file system
pub fn path(&self) -> &VfsPath {
&self.inner.path
}
}
impl std::fmt::Debug for ModuleSearchPath {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
f.debug_struct("ModuleSearchPath")
.field("path", &self.inner.path)
.field("kind", &self.kind())
.finish()
}
}
#[derive(Eq, PartialEq)]
struct ModuleSearchPathInner {
path: VfsPath,
kind: ModuleSearchPathKind,
}
/// Enumeration of the different kinds of search paths type checkers are expected to support.
///
/// N.B. Although we don't implement `Ord` for this enum, they are ordered in terms of the
/// priority that we want to give these modules when resolving them.
/// This is roughly [the order given in the typing spec], but typeshed's stubs
/// for the standard library are moved higher up to match Python's semantics at runtime.
///
/// [the order given in the typing spec]: https://typing.readthedocs.io/en/latest/spec/distributing.html#import-resolution-ordering
#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)]
pub enum ModuleSearchPathKind {
/// "Extra" paths provided by the user in a config file, env var or CLI flag.
/// E.g. mypy's `MYPYPATH` env var, or pyright's `stubPath` configuration setting
Extra,
/// Files in the project we're directly being invoked on
FirstParty,
/// The `stdlib` directory of typeshed (either vendored or custom)
StandardLibrary,
/// Stubs or runtime modules installed in site-packages
SitePackagesThirdParty,
/// Vendored third-party stubs from typeshed
VendoredThirdParty,
}

View File

@@ -1,938 +0,0 @@
use salsa::DebugWithDb;
use std::ops::Deref;
use ruff_db::file_system::{FileSystem, FileSystemPath, FileSystemPathBuf};
use ruff_db::vfs::{system_path_to_file, vfs_path_to_file, VfsFile, VfsPath};
use crate::module::{Module, ModuleKind, ModuleName, ModuleSearchPath, ModuleSearchPathKind};
use crate::resolver::internal::ModuleResolverSearchPaths;
use crate::Db;
const TYPESHED_STDLIB_DIRECTORY: &str = "stdlib";
/// Configures the module search paths for the module resolver.
///
/// Must be called before calling any other module resolution functions.
pub fn set_module_resolution_settings(db: &mut dyn Db, config: ModuleResolutionSettings) {
// There's no concurrency issue here because we hold a `&mut dyn Db` reference. No other
// thread can mutate the `Db` while we're in this call, so using `try_get` to test if
// the settings have already been set is safe.
if let Some(existing) = ModuleResolverSearchPaths::try_get(db) {
existing
.set_search_paths(db)
.to(config.into_ordered_search_paths());
} else {
ModuleResolverSearchPaths::new(db, config.into_ordered_search_paths());
}
}
/// Resolves a module name to a module.
pub fn resolve_module(db: &dyn Db, module_name: ModuleName) -> Option<Module> {
let interned_name = internal::ModuleNameIngredient::new(db, module_name);
resolve_module_query(db, interned_name)
}
/// Salsa query that resolves an interned [`ModuleNameIngredient`] to a module.
///
/// This query should not be called directly. Instead, use [`resolve_module`]. It only exists
/// because Salsa requires the module name to be an ingredient.
#[salsa::tracked]
pub(crate) fn resolve_module_query<'db>(
db: &'db dyn Db,
module_name: internal::ModuleNameIngredient<'db>,
) -> Option<Module> {
let _ = tracing::trace_span!("resolve_module", module_name = ?module_name.debug(db)).enter();
let name = module_name.name(db);
let (search_path, module_file, kind) = resolve_name(db, name)?;
let module = Module::new(name.clone(), kind, search_path, module_file);
Some(module)
}
/// Resolves the module for the given path.
///
/// Returns `None` if the path is not a module locatable via `sys.path`.
#[tracing::instrument(level = "debug", skip(db))]
pub fn path_to_module(db: &dyn Db, path: &VfsPath) -> Option<Module> {
// It's not entirely clear on first sight why this method calls `file_to_module` instead of
// it being the other way round, considering that the first thing that `file_to_module` does
// is to retrieve the file's path.
//
// The reason is that `file_to_module` is a tracked Salsa query and salsa queries require that
// all arguments are Salsa ingredients (something stored in Salsa). `Path`s aren't salsa ingredients but
// `VfsFile` is. So what we do here is to retrieve the `path`'s `VfsFile` so that we can make
// use of Salsa's caching and invalidation.
let file = vfs_path_to_file(db.upcast(), path)?;
file_to_module(db, file)
}
/// Resolves the module for the file with the given id.
///
/// Returns `None` if the file is not a module locatable via `sys.path`.
#[salsa::tracked]
#[allow(unused)]
pub(crate) fn file_to_module(db: &dyn Db, file: VfsFile) -> Option<Module> {
let _ = tracing::trace_span!("file_to_module", file = ?file.debug(db.upcast())).enter();
let path = file.path(db.upcast());
let search_paths = module_search_paths(db);
let relative_path = search_paths
.iter()
.find_map(|root| match (root.path(), path) {
(VfsPath::FileSystem(root_path), VfsPath::FileSystem(path)) => {
let relative_path = path.strip_prefix(root_path).ok()?;
Some(relative_path)
}
(VfsPath::Vendored(_), VfsPath::Vendored(_)) => {
todo!("Add support for vendored modules")
}
(VfsPath::Vendored(_), VfsPath::FileSystem(_))
| (VfsPath::FileSystem(_), VfsPath::Vendored(_)) => None,
})?;
let module_name = ModuleName::from_relative_path(relative_path)?;
// Resolve the module name to see if Python would resolve the name to the same path.
// If it doesn't, then that means that multiple modules have the same name in different
// root paths, but that the module corresponding to `path` is in a lower priority search path,
// in which case we ignore it.
let module = resolve_module(db, module_name)?;
if file == module.file() {
Some(module)
} else {
// This path is for a module with the same name but with a different precedence. For example:
// ```
// src/foo.py
// src/foo/__init__.py
// ```
// The module name of `src/foo.py` is `foo`, but the module loaded by Python is `src/foo/__init__.py`.
// That means we need to ignore `src/foo.py` even though it resolves to the same module name.
None
}
}
/// Configures the search paths that are used to resolve modules.
#[derive(Eq, PartialEq, Debug)]
pub struct ModuleResolutionSettings {
/// List of user-provided paths that should take first priority in the module resolution.
/// Examples in other type checkers are mypy's MYPYPATH environment variable,
/// or pyright's stubPath configuration setting.
pub extra_paths: Vec<FileSystemPathBuf>,
/// The root of the workspace, used for finding first-party modules.
pub workspace_root: FileSystemPathBuf,
/// The path to the user's `site-packages` directory, where third-party packages from ``PyPI`` are installed.
pub site_packages: Option<FileSystemPathBuf>,
/// Optional path to standard-library typeshed stubs.
/// Currently this has to be a directory that exists on disk.
///
/// (TODO: fall back to vendored stubs if no custom directory is provided.)
pub custom_typeshed: Option<FileSystemPathBuf>,
}
impl ModuleResolutionSettings {
/// Implementation of PEP 561's module resolution order
/// (with some small, deliberate, differences)
fn into_ordered_search_paths(self) -> OrderedSearchPaths {
let ModuleResolutionSettings {
extra_paths,
workspace_root,
site_packages,
custom_typeshed,
} = self;
let mut paths: Vec<_> = extra_paths
.into_iter()
.map(|path| ModuleSearchPath::new(path, ModuleSearchPathKind::Extra))
.collect();
paths.push(ModuleSearchPath::new(
workspace_root,
ModuleSearchPathKind::FirstParty,
));
// TODO fallback to vendored typeshed stubs if no custom typeshed directory is provided by the user
if let Some(custom_typeshed) = custom_typeshed {
paths.push(ModuleSearchPath::new(
custom_typeshed.join(TYPESHED_STDLIB_DIRECTORY),
ModuleSearchPathKind::StandardLibrary,
));
}
// TODO vendor typeshed's third-party stubs as well as the stdlib and fallback to them as a final step
if let Some(site_packages) = site_packages {
paths.push(ModuleSearchPath::new(
site_packages,
ModuleSearchPathKind::SitePackagesThirdParty,
));
}
OrderedSearchPaths(paths)
}
}
/// A resolved module resolution order, implementing PEP 561
/// (with some small, deliberate differences)
#[derive(Clone, Debug, Default, Eq, PartialEq)]
pub(crate) struct OrderedSearchPaths(Vec<ModuleSearchPath>);
impl Deref for OrderedSearchPaths {
type Target = [ModuleSearchPath];
fn deref(&self) -> &Self::Target {
&self.0
}
}
// The singleton methods generated by salsa are all `pub` instead of `pub(crate)` which triggers
// `unreachable_pub`. Work around this by creating a module and allow `unreachable_pub` for it.
// Salsa also generates uses to `_db` variables for `interned` which triggers `clippy::used_underscore_binding`. Suppress that too
// TODO(micha): Contribute a fix for this upstream where the singleton methods have the same visibility as the struct.
#[allow(unreachable_pub, clippy::used_underscore_binding)]
pub(crate) mod internal {
use crate::module::ModuleName;
use crate::resolver::OrderedSearchPaths;
#[salsa::input(singleton)]
pub(crate) struct ModuleResolverSearchPaths {
#[return_ref]
pub(super) search_paths: OrderedSearchPaths,
}
/// A thin wrapper around `ModuleName` to make it a Salsa ingredient.
///
/// This is needed because Salsa requires that all query arguments are salsa ingredients.
#[salsa::interned]
pub(crate) struct ModuleNameIngredient<'db> {
#[return_ref]
pub(super) name: ModuleName,
}
}
fn module_search_paths(db: &dyn Db) -> &[ModuleSearchPath] {
ModuleResolverSearchPaths::get(db).search_paths(db)
}
/// Given a module name and a list of search paths in which to lookup modules,
/// attempt to resolve the module name
fn resolve_name(db: &dyn Db, name: &ModuleName) -> Option<(ModuleSearchPath, VfsFile, ModuleKind)> {
let search_paths = module_search_paths(db);
for search_path in search_paths {
let mut components = name.components();
let module_name = components.next_back()?;
let VfsPath::FileSystem(fs_search_path) = search_path.path() else {
todo!("Vendored search paths are not yet supported");
};
match resolve_package(db.file_system(), fs_search_path, components) {
Ok(resolved_package) => {
let mut package_path = resolved_package.path;
package_path.push(module_name);
// Must be a `__init__.pyi` or `__init__.py` or it isn't a package.
let kind = if db.file_system().is_directory(&package_path) {
package_path.push("__init__");
ModuleKind::Package
} else {
ModuleKind::Module
};
// TODO Implement full https://peps.python.org/pep-0561/#type-checker-module-resolution-order resolution
let stub = package_path.with_extension("pyi");
if let Some(stub) = system_path_to_file(db.upcast(), &stub) {
return Some((search_path.clone(), stub, kind));
}
let module = package_path.with_extension("py");
if let Some(module) = system_path_to_file(db.upcast(), &module) {
return Some((search_path.clone(), module, kind));
}
// For regular packages, don't search the next search path. All files of that
// package must be in the same location
if resolved_package.kind.is_regular_package() {
return None;
}
}
Err(parent_kind) => {
if parent_kind.is_regular_package() {
// For regular packages, don't search the next search path.
return None;
}
}
}
}
None
}
fn resolve_package<'a, I>(
fs: &dyn FileSystem,
module_search_path: &FileSystemPath,
components: I,
) -> Result<ResolvedPackage, PackageKind>
where
I: Iterator<Item = &'a str>,
{
let mut package_path = module_search_path.to_path_buf();
// `true` if inside a folder that is a namespace package (has no `__init__.py`).
// Namespace packages are special because they can be spread across multiple search paths.
// https://peps.python.org/pep-0420/
let mut in_namespace_package = false;
// `true` if resolving a sub-package. For example, `true` when resolving `bar` of `foo.bar`.
let mut in_sub_package = false;
// For `foo.bar.baz`, test that `foo` and `baz` both contain a `__init__.py`.
for folder in components {
package_path.push(folder);
let has_init_py = fs.is_file(&package_path.join("__init__.py"))
|| fs.is_file(&package_path.join("__init__.pyi"));
if has_init_py {
in_namespace_package = false;
} else if fs.is_directory(&package_path) {
// A directory without an `__init__.py` is a namespace package, continue with the next folder.
in_namespace_package = true;
} else if in_namespace_package {
// Package not found but it is part of a namespace package.
return Err(PackageKind::Namespace);
} else if in_sub_package {
// A regular sub package wasn't found.
return Err(PackageKind::Regular);
} else {
// We couldn't find `foo` for `foo.bar.baz`, search the next search path.
return Err(PackageKind::Root);
}
in_sub_package = true;
}
let kind = if in_namespace_package {
PackageKind::Namespace
} else if in_sub_package {
PackageKind::Regular
} else {
PackageKind::Root
};
Ok(ResolvedPackage {
kind,
path: package_path,
})
}
#[derive(Debug)]
struct ResolvedPackage {
path: FileSystemPathBuf,
kind: PackageKind,
}
#[derive(Copy, Clone, Eq, PartialEq, Debug)]
enum PackageKind {
/// A root package or module. E.g. `foo` in `foo.bar.baz` or just `foo`.
Root,
/// A regular sub-package where the parent contains an `__init__.py`.
///
/// For example, `bar` in `foo.bar` when the `foo` directory contains an `__init__.py`.
Regular,
/// A sub-package in a namespace package. A namespace package is a package without an `__init__.py`.
///
/// For example, `bar` in `foo.bar` if the `foo` directory contains no `__init__.py`.
Namespace,
}
impl PackageKind {
const fn is_regular_package(self) -> bool {
matches!(self, PackageKind::Regular)
}
}
#[cfg(test)]
mod tests {
use ruff_db::file_system::{FileSystemPath, FileSystemPathBuf};
use ruff_db::vfs::{system_path_to_file, VfsFile, VfsPath};
use crate::db::tests::TestDb;
use crate::module::{ModuleKind, ModuleName};
use super::{
path_to_module, resolve_module, set_module_resolution_settings, ModuleResolutionSettings,
TYPESHED_STDLIB_DIRECTORY,
};
struct TestCase {
db: TestDb,
src: FileSystemPathBuf,
custom_typeshed: FileSystemPathBuf,
site_packages: FileSystemPathBuf,
}
fn create_resolver() -> std::io::Result<TestCase> {
let mut db = TestDb::new();
let src = FileSystemPath::new("src").to_path_buf();
let site_packages = FileSystemPath::new("site_packages").to_path_buf();
let custom_typeshed = FileSystemPath::new("typeshed").to_path_buf();
let fs = db.memory_file_system();
fs.create_directory_all(&src)?;
fs.create_directory_all(&site_packages)?;
fs.create_directory_all(&custom_typeshed)?;
let settings = ModuleResolutionSettings {
extra_paths: vec![],
workspace_root: src.clone(),
site_packages: Some(site_packages.clone()),
custom_typeshed: Some(custom_typeshed.clone()),
};
set_module_resolution_settings(&mut db, settings);
Ok(TestCase {
db,
src,
custom_typeshed,
site_packages,
})
}
#[test]
fn first_party_module() -> anyhow::Result<()> {
let TestCase { db, src, .. } = create_resolver()?;
let foo_module_name = ModuleName::new_static("foo").unwrap();
let foo_path = src.join("foo.py");
db.memory_file_system()
.write_file(&foo_path, "print('Hello, world!')")?;
let foo_module = resolve_module(&db, foo_module_name.clone()).unwrap();
assert_eq!(
Some(&foo_module),
resolve_module(&db, foo_module_name.clone()).as_ref()
);
assert_eq!("foo", foo_module.name());
assert_eq!(&src, foo_module.search_path().path());
assert_eq!(ModuleKind::Module, foo_module.kind());
assert_eq!(&foo_path, foo_module.file().path(&db));
assert_eq!(
Some(foo_module),
path_to_module(&db, &VfsPath::FileSystem(foo_path))
);
Ok(())
}
#[test]
fn stdlib() -> anyhow::Result<()> {
let TestCase {
db,
custom_typeshed,
..
} = create_resolver()?;
let stdlib_dir = custom_typeshed.join(TYPESHED_STDLIB_DIRECTORY);
let functools_path = stdlib_dir.join("functools.py");
db.memory_file_system()
.write_file(&functools_path, "def update_wrapper(): ...")?;
let functools_module_name = ModuleName::new_static("functools").unwrap();
let functools_module = resolve_module(&db, functools_module_name.clone()).unwrap();
assert_eq!(
Some(&functools_module),
resolve_module(&db, functools_module_name).as_ref()
);
assert_eq!(&stdlib_dir, functools_module.search_path().path());
assert_eq!(ModuleKind::Module, functools_module.kind());
assert_eq!(&functools_path.clone(), functools_module.file().path(&db));
assert_eq!(
Some(functools_module),
path_to_module(&db, &VfsPath::FileSystem(functools_path))
);
Ok(())
}
#[test]
fn first_party_precedence_over_stdlib() -> anyhow::Result<()> {
let TestCase {
db,
src,
custom_typeshed,
..
} = create_resolver()?;
let stdlib_dir = custom_typeshed.join(TYPESHED_STDLIB_DIRECTORY);
let stdlib_functools_path = stdlib_dir.join("functools.py");
let first_party_functools_path = src.join("functools.py");
db.memory_file_system().write_files([
(&stdlib_functools_path, "def update_wrapper(): ..."),
(&first_party_functools_path, "def update_wrapper(): ..."),
])?;
let functools_module_name = ModuleName::new_static("functools").unwrap();
let functools_module = resolve_module(&db, functools_module_name.clone()).unwrap();
assert_eq!(
Some(&functools_module),
resolve_module(&db, functools_module_name).as_ref()
);
assert_eq!(&src, functools_module.search_path().path());
assert_eq!(ModuleKind::Module, functools_module.kind());
assert_eq!(
&first_party_functools_path.clone(),
functools_module.file().path(&db)
);
assert_eq!(
Some(functools_module),
path_to_module(&db, &VfsPath::FileSystem(first_party_functools_path))
);
Ok(())
}
// TODO: Port typeshed test case. Porting isn't possible at the moment because the vendored zip
// is part of the red knot crate
// #[test]
// fn typeshed_zip_created_at_build_time() -> anyhow::Result<()> {
// // The file path here is hardcoded in this crate's `build.rs` script.
// // Luckily this crate will fail to build if this file isn't available at build time.
// const TYPESHED_ZIP_BYTES: &[u8] =
// include_bytes!(concat!(env!("OUT_DIR"), "/zipped_typeshed.zip"));
// assert!(!TYPESHED_ZIP_BYTES.is_empty());
// let mut typeshed_zip_archive = ZipArchive::new(Cursor::new(TYPESHED_ZIP_BYTES))?;
//
// let path_to_functools = Path::new("stdlib").join("functools.pyi");
// let mut functools_module_stub = typeshed_zip_archive
// .by_name(path_to_functools.to_str().unwrap())
// .unwrap();
// assert!(functools_module_stub.is_file());
//
// let mut functools_module_stub_source = String::new();
// functools_module_stub.read_to_string(&mut functools_module_stub_source)?;
//
// assert!(functools_module_stub_source.contains("def update_wrapper("));
// Ok(())
// }
#[test]
fn resolve_package() -> anyhow::Result<()> {
let TestCase { src, db, .. } = create_resolver()?;
let foo_dir = src.join("foo");
let foo_path = foo_dir.join("__init__.py");
db.memory_file_system()
.write_file(&foo_path, "print('Hello, world!')")?;
let foo_module = resolve_module(&db, ModuleName::new_static("foo").unwrap()).unwrap();
assert_eq!("foo", foo_module.name());
assert_eq!(&src, foo_module.search_path().path());
assert_eq!(&foo_path, foo_module.file().path(&db));
assert_eq!(
Some(&foo_module),
path_to_module(&db, &VfsPath::FileSystem(foo_path)).as_ref()
);
// Resolving by directory doesn't resolve to the init file.
assert_eq!(None, path_to_module(&db, &VfsPath::FileSystem(foo_dir)));
Ok(())
}
#[test]
fn package_priority_over_module() -> anyhow::Result<()> {
let TestCase { db, src, .. } = create_resolver()?;
let foo_dir = src.join("foo");
let foo_init = foo_dir.join("__init__.py");
db.memory_file_system()
.write_file(&foo_init, "print('Hello, world!')")?;
let foo_py = src.join("foo.py");
db.memory_file_system()
.write_file(&foo_py, "print('Hello, world!')")?;
let foo_module = resolve_module(&db, ModuleName::new_static("foo").unwrap()).unwrap();
assert_eq!(&src, foo_module.search_path().path());
assert_eq!(&foo_init, foo_module.file().path(&db));
assert_eq!(ModuleKind::Package, foo_module.kind());
assert_eq!(
Some(foo_module),
path_to_module(&db, &VfsPath::FileSystem(foo_init))
);
assert_eq!(None, path_to_module(&db, &VfsPath::FileSystem(foo_py)));
Ok(())
}
#[test]
fn typing_stub_over_module() -> anyhow::Result<()> {
let TestCase { db, src, .. } = create_resolver()?;
let foo_stub = src.join("foo.pyi");
let foo_py = src.join("foo.py");
db.memory_file_system()
.write_files([(&foo_stub, "x: int"), (&foo_py, "print('Hello, world!')")])?;
let foo = resolve_module(&db, ModuleName::new_static("foo").unwrap()).unwrap();
assert_eq!(&src, foo.search_path().path());
assert_eq!(&foo_stub, foo.file().path(&db));
assert_eq!(
Some(foo),
path_to_module(&db, &VfsPath::FileSystem(foo_stub))
);
assert_eq!(None, path_to_module(&db, &VfsPath::FileSystem(foo_py)));
Ok(())
}
#[test]
fn sub_packages() -> anyhow::Result<()> {
let TestCase { db, src, .. } = create_resolver()?;
let foo = src.join("foo");
let bar = foo.join("bar");
let baz = bar.join("baz.py");
db.memory_file_system().write_files([
(&foo.join("__init__.py"), ""),
(&bar.join("__init__.py"), ""),
(&baz, "print('Hello, world!')"),
])?;
let baz_module =
resolve_module(&db, ModuleName::new_static("foo.bar.baz").unwrap()).unwrap();
assert_eq!(&src, baz_module.search_path().path());
assert_eq!(&baz, baz_module.file().path(&db));
assert_eq!(
Some(baz_module),
path_to_module(&db, &VfsPath::FileSystem(baz))
);
Ok(())
}
#[test]
fn namespace_package() -> anyhow::Result<()> {
let TestCase {
db,
src,
site_packages,
..
} = create_resolver()?;
// From [PEP420](https://peps.python.org/pep-0420/#nested-namespace-packages).
// But uses `src` for `project1` and `site_packages2` for `project2`.
// ```
// src
// parent
// child
// one.py
// site_packages
// parent
// child
// two.py
// ```
let parent1 = src.join("parent");
let child1 = parent1.join("child");
let one = child1.join("one.py");
let parent2 = site_packages.join("parent");
let child2 = parent2.join("child");
let two = child2.join("two.py");
db.memory_file_system().write_files([
(&one, "print('Hello, world!')"),
(&two, "print('Hello, world!')"),
])?;
let one_module =
resolve_module(&db, ModuleName::new_static("parent.child.one").unwrap()).unwrap();
assert_eq!(
Some(one_module),
path_to_module(&db, &VfsPath::FileSystem(one))
);
let two_module =
resolve_module(&db, ModuleName::new_static("parent.child.two").unwrap()).unwrap();
assert_eq!(
Some(two_module),
path_to_module(&db, &VfsPath::FileSystem(two))
);
Ok(())
}
#[test]
fn regular_package_in_namespace_package() -> anyhow::Result<()> {
let TestCase {
db,
src,
site_packages,
..
} = create_resolver()?;
// Adopted test case from the [PEP420 examples](https://peps.python.org/pep-0420/#nested-namespace-packages).
// The `src/parent/child` package is a regular package. Therefore, `site_packages/parent/child/two.py` should not be resolved.
// ```
// src
// parent
// child
// one.py
// site_packages
// parent
// child
// two.py
// ```
let parent1 = src.join("parent");
let child1 = parent1.join("child");
let one = child1.join("one.py");
let parent2 = site_packages.join("parent");
let child2 = parent2.join("child");
let two = child2.join("two.py");
db.memory_file_system().write_files([
(&child1.join("__init__.py"), "print('Hello, world!')"),
(&one, "print('Hello, world!')"),
(&two, "print('Hello, world!')"),
])?;
let one_module =
resolve_module(&db, ModuleName::new_static("parent.child.one").unwrap()).unwrap();
assert_eq!(
Some(one_module),
path_to_module(&db, &VfsPath::FileSystem(one))
);
assert_eq!(
None,
resolve_module(&db, ModuleName::new_static("parent.child.two").unwrap())
);
Ok(())
}
#[test]
fn module_search_path_priority() -> anyhow::Result<()> {
let TestCase {
db,
src,
site_packages,
..
} = create_resolver()?;
let foo_src = src.join("foo.py");
let foo_site_packages = site_packages.join("foo.py");
db.memory_file_system()
.write_files([(&foo_src, ""), (&foo_site_packages, "")])?;
let foo_module = resolve_module(&db, ModuleName::new_static("foo").unwrap()).unwrap();
assert_eq!(&src, foo_module.search_path().path());
assert_eq!(&foo_src, foo_module.file().path(&db));
assert_eq!(
Some(foo_module),
path_to_module(&db, &VfsPath::FileSystem(foo_src))
);
assert_eq!(
None,
path_to_module(&db, &VfsPath::FileSystem(foo_site_packages))
);
Ok(())
}
#[test]
#[cfg(target_family = "unix")]
fn symlink() -> anyhow::Result<()> {
let TestCase {
mut db,
src,
site_packages,
custom_typeshed,
} = create_resolver()?;
db.with_os_file_system();
let temp_dir = tempfile::tempdir()?;
let root = FileSystemPath::from_std_path(temp_dir.path()).unwrap();
let src = root.join(src);
let site_packages = root.join(site_packages);
let custom_typeshed = root.join(custom_typeshed);
let foo = src.join("foo.py");
let bar = src.join("bar.py");
std::fs::create_dir_all(src.as_std_path())?;
std::fs::create_dir_all(site_packages.as_std_path())?;
std::fs::create_dir_all(custom_typeshed.as_std_path())?;
std::fs::write(foo.as_std_path(), "")?;
std::os::unix::fs::symlink(foo.as_std_path(), bar.as_std_path())?;
let settings = ModuleResolutionSettings {
extra_paths: vec![],
workspace_root: src.clone(),
site_packages: Some(site_packages),
custom_typeshed: Some(custom_typeshed),
};
set_module_resolution_settings(&mut db, settings);
let foo_module = resolve_module(&db, ModuleName::new_static("foo").unwrap()).unwrap();
let bar_module = resolve_module(&db, ModuleName::new_static("bar").unwrap()).unwrap();
assert_ne!(foo_module, bar_module);
assert_eq!(&src, foo_module.search_path().path());
assert_eq!(&foo, foo_module.file().path(&db));
// `foo` and `bar` shouldn't resolve to the same file
assert_eq!(&src, bar_module.search_path().path());
assert_eq!(&bar, bar_module.file().path(&db));
assert_eq!(&foo, foo_module.file().path(&db));
assert_ne!(&foo_module, &bar_module);
assert_eq!(
Some(foo_module),
path_to_module(&db, &VfsPath::FileSystem(foo))
);
assert_eq!(
Some(bar_module),
path_to_module(&db, &VfsPath::FileSystem(bar))
);
Ok(())
}
#[test]
fn deleting_an_unrealted_file_doesnt_change_module_resolution() -> anyhow::Result<()> {
let TestCase { mut db, src, .. } = create_resolver()?;
let foo_path = src.join("foo.py");
let bar_path = src.join("bar.py");
db.memory_file_system()
.write_files([(&foo_path, "x = 1"), (&bar_path, "y = 2")])?;
let foo_module_name = ModuleName::new_static("foo").unwrap();
let foo_module = resolve_module(&db, foo_module_name.clone()).unwrap();
let bar = system_path_to_file(&db, &bar_path).expect("bar.py to exist");
db.clear_salsa_events();
// Delete `bar.py`
db.memory_file_system().remove_file(&bar_path)?;
bar.touch(&mut db);
// Re-query the foo module. The foo module should still be cached because `bar.py` isn't relevant
// for resolving `foo`.
let foo_module2 = resolve_module(&db, foo_module_name);
assert!(!db
.take_salsa_events()
.iter()
.any(|event| { matches!(event.kind, salsa::EventKind::WillExecute { .. }) }));
assert_eq!(Some(foo_module), foo_module2);
Ok(())
}
#[test]
fn adding_a_file_on_which_the_module_resolution_depends_on_invalidates_the_query(
) -> anyhow::Result<()> {
let TestCase { mut db, src, .. } = create_resolver()?;
let foo_path = src.join("foo.py");
let foo_module_name = ModuleName::new_static("foo").unwrap();
assert_eq!(resolve_module(&db, foo_module_name.clone()), None);
// Now write the foo file
db.memory_file_system().write_file(&foo_path, "x = 1")?;
VfsFile::touch_path(&mut db, &VfsPath::FileSystem(foo_path.clone()));
let foo_file = system_path_to_file(&db, &foo_path).expect("foo.py to exist");
let foo_module = resolve_module(&db, foo_module_name).expect("Foo module to resolve");
assert_eq!(foo_file, foo_module.file());
Ok(())
}
#[test]
fn removing_a_file_that_the_module_resolution_depends_on_invalidates_the_query(
) -> anyhow::Result<()> {
let TestCase { mut db, src, .. } = create_resolver()?;
let foo_path = src.join("foo.py");
let foo_init_path = src.join("foo/__init__.py");
db.memory_file_system()
.write_files([(&foo_path, "x = 1"), (&foo_init_path, "x = 2")])?;
let foo_module_name = ModuleName::new_static("foo").unwrap();
let foo_module = resolve_module(&db, foo_module_name.clone()).expect("foo module to exist");
assert_eq!(&foo_init_path, foo_module.file().path(&db));
// Delete `foo/__init__.py` and the `foo` folder. `foo` should now resolve to `foo.py`
db.memory_file_system().remove_file(&foo_init_path)?;
db.memory_file_system()
.remove_directory(foo_init_path.parent().unwrap())?;
VfsFile::touch_path(&mut db, &VfsPath::FileSystem(foo_init_path.clone()));
let foo_module = resolve_module(&db, foo_module_name).expect("Foo module to resolve");
assert_eq!(&foo_path, foo_module.file().path(&db));
Ok(())
}
}

View File

@@ -1 +0,0 @@
114409d49b43ba62a179ebb856fa70a5161f751e

View File

@@ -1,117 +0,0 @@
import sys
from typing import Literal
SF_APPEND: Literal[0x00040000]
SF_ARCHIVED: Literal[0x00010000]
SF_IMMUTABLE: Literal[0x00020000]
SF_NOUNLINK: Literal[0x00100000]
SF_SNAPSHOT: Literal[0x00200000]
ST_MODE: Literal[0]
ST_INO: Literal[1]
ST_DEV: Literal[2]
ST_NLINK: Literal[3]
ST_UID: Literal[4]
ST_GID: Literal[5]
ST_SIZE: Literal[6]
ST_ATIME: Literal[7]
ST_MTIME: Literal[8]
ST_CTIME: Literal[9]
S_IFIFO: Literal[0o010000]
S_IFLNK: Literal[0o120000]
S_IFREG: Literal[0o100000]
S_IFSOCK: Literal[0o140000]
S_IFBLK: Literal[0o060000]
S_IFCHR: Literal[0o020000]
S_IFDIR: Literal[0o040000]
# These are 0 on systems that don't support the specific kind of file.
# Example: Linux doesn't support door files, so S_IFDOOR is 0 on linux.
S_IFDOOR: int
S_IFPORT: int
S_IFWHT: int
S_ISUID: Literal[0o4000]
S_ISGID: Literal[0o2000]
S_ISVTX: Literal[0o1000]
S_IRWXU: Literal[0o0700]
S_IRUSR: Literal[0o0400]
S_IWUSR: Literal[0o0200]
S_IXUSR: Literal[0o0100]
S_IRWXG: Literal[0o0070]
S_IRGRP: Literal[0o0040]
S_IWGRP: Literal[0o0020]
S_IXGRP: Literal[0o0010]
S_IRWXO: Literal[0o0007]
S_IROTH: Literal[0o0004]
S_IWOTH: Literal[0o0002]
S_IXOTH: Literal[0o0001]
S_ENFMT: Literal[0o2000]
S_IREAD: Literal[0o0400]
S_IWRITE: Literal[0o0200]
S_IEXEC: Literal[0o0100]
UF_APPEND: Literal[0x00000004]
UF_COMPRESSED: Literal[0x00000020] # OS X 10.6+ only
UF_HIDDEN: Literal[0x00008000] # OX X 10.5+ only
UF_IMMUTABLE: Literal[0x00000002]
UF_NODUMP: Literal[0x00000001]
UF_NOUNLINK: Literal[0x00000010]
UF_OPAQUE: Literal[0x00000008]
def S_IMODE(mode: int, /) -> int: ...
def S_IFMT(mode: int, /) -> int: ...
def S_ISBLK(mode: int, /) -> bool: ...
def S_ISCHR(mode: int, /) -> bool: ...
def S_ISDIR(mode: int, /) -> bool: ...
def S_ISDOOR(mode: int, /) -> bool: ...
def S_ISFIFO(mode: int, /) -> bool: ...
def S_ISLNK(mode: int, /) -> bool: ...
def S_ISPORT(mode: int, /) -> bool: ...
def S_ISREG(mode: int, /) -> bool: ...
def S_ISSOCK(mode: int, /) -> bool: ...
def S_ISWHT(mode: int, /) -> bool: ...
def filemode(mode: int, /) -> str: ...
if sys.platform == "win32":
IO_REPARSE_TAG_SYMLINK: int
IO_REPARSE_TAG_MOUNT_POINT: int
IO_REPARSE_TAG_APPEXECLINK: int
if sys.platform == "win32":
FILE_ATTRIBUTE_ARCHIVE: Literal[32]
FILE_ATTRIBUTE_COMPRESSED: Literal[2048]
FILE_ATTRIBUTE_DEVICE: Literal[64]
FILE_ATTRIBUTE_DIRECTORY: Literal[16]
FILE_ATTRIBUTE_ENCRYPTED: Literal[16384]
FILE_ATTRIBUTE_HIDDEN: Literal[2]
FILE_ATTRIBUTE_INTEGRITY_STREAM: Literal[32768]
FILE_ATTRIBUTE_NORMAL: Literal[128]
FILE_ATTRIBUTE_NOT_CONTENT_INDEXED: Literal[8192]
FILE_ATTRIBUTE_NO_SCRUB_DATA: Literal[131072]
FILE_ATTRIBUTE_OFFLINE: Literal[4096]
FILE_ATTRIBUTE_READONLY: Literal[1]
FILE_ATTRIBUTE_REPARSE_POINT: Literal[1024]
FILE_ATTRIBUTE_SPARSE_FILE: Literal[512]
FILE_ATTRIBUTE_SYSTEM: Literal[4]
FILE_ATTRIBUTE_TEMPORARY: Literal[256]
FILE_ATTRIBUTE_VIRTUAL: Literal[65536]
if sys.version_info >= (3, 13):
SF_SETTABLE: Literal[0x3FFF0000]
# https://github.com/python/cpython/issues/114081#issuecomment-2119017790
# SF_RESTRICTED: Literal[0x00080000]
SF_FIRMLINK: Literal[0x00800000]
SF_DATALESS: Literal[0x40000000]
SF_SUPPORTED: Literal[0x9F0000]
SF_SYNTHETIC: Literal[0xC0000000]
UF_TRACKED: Literal[0x00000040]
UF_DATAVAULT: Literal[0x00000080]
UF_SETTABLE: Literal[0x0000FFFF]

View File

@@ -1,20 +0,0 @@
import enum
import sys
from typing import Literal
LOG_THRESHOLD_FOR_CONNLOST_WRITES: Literal[5]
ACCEPT_RETRY_DELAY: Literal[1]
DEBUG_STACK_DEPTH: Literal[10]
SSL_HANDSHAKE_TIMEOUT: float
SENDFILE_FALLBACK_READBUFFER_SIZE: Literal[262144]
if sys.version_info >= (3, 11):
SSL_SHUTDOWN_TIMEOUT: float
FLOW_CONTROL_HIGH_WATER_SSL_READ: Literal[256]
FLOW_CONTROL_HIGH_WATER_SSL_WRITE: Literal[512]
if sys.version_info >= (3, 12):
THREAD_JOIN_TIMEOUT: Literal[300]
class _SendfileMode(enum.Enum):
UNSUPPORTED = 1
TRY_NATIVE = 2
FALLBACK = 3

View File

@@ -1,20 +0,0 @@
import functools
import traceback
from collections.abc import Iterable
from types import FrameType, FunctionType
from typing import Any, overload
from typing_extensions import TypeAlias
class _HasWrapper:
__wrapper__: _HasWrapper | FunctionType
_FuncType: TypeAlias = FunctionType | _HasWrapper | functools.partial[Any] | functools.partialmethod[Any]
@overload
def _get_function_source(func: _FuncType) -> tuple[str, int]: ...
@overload
def _get_function_source(func: object) -> tuple[str, int] | None: ...
def _format_callback_source(func: object, args: Iterable[Any]) -> str: ...
def _format_args_and_kwargs(args: Iterable[Any], kwargs: dict[str, Any]) -> str: ...
def _format_callback(func: object, args: Iterable[Any], kwargs: dict[str, Any], suffix: str = "") -> str: ...
def extract_stack(f: FrameType | None = None, limit: int | None = None) -> traceback.StackSummary: ...

View File

@@ -1,196 +0,0 @@
import sys
import types
from abc import ABCMeta, abstractmethod
from collections.abc import Callable
from typing import Literal
from typing_extensions import Self, TypeVarTuple, Unpack, deprecated
from .events import AbstractEventLoop, BaseDefaultEventLoopPolicy
from .selector_events import BaseSelectorEventLoop
_Ts = TypeVarTuple("_Ts")
# This is also technically not available on Win,
# but other parts of typeshed need this definition.
# So, it is special cased.
if sys.version_info >= (3, 12):
@deprecated("Deprecated as of Python 3.12; will be removed in Python 3.14")
class AbstractChildWatcher:
@abstractmethod
def add_child_handler(
self, pid: int, callback: Callable[[int, int, Unpack[_Ts]], object], *args: Unpack[_Ts]
) -> None: ...
@abstractmethod
def remove_child_handler(self, pid: int) -> bool: ...
@abstractmethod
def attach_loop(self, loop: AbstractEventLoop | None) -> None: ...
@abstractmethod
def close(self) -> None: ...
@abstractmethod
def __enter__(self) -> Self: ...
@abstractmethod
def __exit__(
self, typ: type[BaseException] | None, exc: BaseException | None, tb: types.TracebackType | None
) -> None: ...
@abstractmethod
def is_active(self) -> bool: ...
else:
class AbstractChildWatcher:
@abstractmethod
def add_child_handler(
self, pid: int, callback: Callable[[int, int, Unpack[_Ts]], object], *args: Unpack[_Ts]
) -> None: ...
@abstractmethod
def remove_child_handler(self, pid: int) -> bool: ...
@abstractmethod
def attach_loop(self, loop: AbstractEventLoop | None) -> None: ...
@abstractmethod
def close(self) -> None: ...
@abstractmethod
def __enter__(self) -> Self: ...
@abstractmethod
def __exit__(
self, typ: type[BaseException] | None, exc: BaseException | None, tb: types.TracebackType | None
) -> None: ...
@abstractmethod
def is_active(self) -> bool: ...
if sys.platform != "win32":
if sys.version_info >= (3, 9):
__all__ = (
"SelectorEventLoop",
"AbstractChildWatcher",
"SafeChildWatcher",
"FastChildWatcher",
"PidfdChildWatcher",
"MultiLoopChildWatcher",
"ThreadedChildWatcher",
"DefaultEventLoopPolicy",
)
else:
__all__ = (
"SelectorEventLoop",
"AbstractChildWatcher",
"SafeChildWatcher",
"FastChildWatcher",
"MultiLoopChildWatcher",
"ThreadedChildWatcher",
"DefaultEventLoopPolicy",
)
# Doesn't actually have ABCMeta metaclass at runtime, but mypy complains if we don't have it in the stub.
# See discussion in #7412
class BaseChildWatcher(AbstractChildWatcher, metaclass=ABCMeta):
def close(self) -> None: ...
def is_active(self) -> bool: ...
def attach_loop(self, loop: AbstractEventLoop | None) -> None: ...
if sys.version_info >= (3, 12):
@deprecated("Deprecated as of Python 3.12; will be removed in Python 3.14")
class SafeChildWatcher(BaseChildWatcher):
def __enter__(self) -> Self: ...
def __exit__(self, a: type[BaseException] | None, b: BaseException | None, c: types.TracebackType | None) -> None: ...
def add_child_handler(
self, pid: int, callback: Callable[[int, int, Unpack[_Ts]], object], *args: Unpack[_Ts]
) -> None: ...
def remove_child_handler(self, pid: int) -> bool: ...
@deprecated("Deprecated as of Python 3.12; will be removed in Python 3.14")
class FastChildWatcher(BaseChildWatcher):
def __enter__(self) -> Self: ...
def __exit__(self, a: type[BaseException] | None, b: BaseException | None, c: types.TracebackType | None) -> None: ...
def add_child_handler(
self, pid: int, callback: Callable[[int, int, Unpack[_Ts]], object], *args: Unpack[_Ts]
) -> None: ...
def remove_child_handler(self, pid: int) -> bool: ...
else:
class SafeChildWatcher(BaseChildWatcher):
def __enter__(self) -> Self: ...
def __exit__(self, a: type[BaseException] | None, b: BaseException | None, c: types.TracebackType | None) -> None: ...
def add_child_handler(
self, pid: int, callback: Callable[[int, int, Unpack[_Ts]], object], *args: Unpack[_Ts]
) -> None: ...
def remove_child_handler(self, pid: int) -> bool: ...
class FastChildWatcher(BaseChildWatcher):
def __enter__(self) -> Self: ...
def __exit__(self, a: type[BaseException] | None, b: BaseException | None, c: types.TracebackType | None) -> None: ...
def add_child_handler(
self, pid: int, callback: Callable[[int, int, Unpack[_Ts]], object], *args: Unpack[_Ts]
) -> None: ...
def remove_child_handler(self, pid: int) -> bool: ...
class _UnixSelectorEventLoop(BaseSelectorEventLoop): ...
class _UnixDefaultEventLoopPolicy(BaseDefaultEventLoopPolicy):
if sys.version_info >= (3, 12):
@deprecated("Deprecated as of Python 3.12; will be removed in Python 3.14")
def get_child_watcher(self) -> AbstractChildWatcher: ...
@deprecated("Deprecated as of Python 3.12; will be removed in Python 3.14")
def set_child_watcher(self, watcher: AbstractChildWatcher | None) -> None: ...
else:
def get_child_watcher(self) -> AbstractChildWatcher: ...
def set_child_watcher(self, watcher: AbstractChildWatcher | None) -> None: ...
SelectorEventLoop = _UnixSelectorEventLoop
DefaultEventLoopPolicy = _UnixDefaultEventLoopPolicy
if sys.version_info >= (3, 12):
@deprecated("Deprecated as of Python 3.12; will be removed in Python 3.14")
class MultiLoopChildWatcher(AbstractChildWatcher):
def is_active(self) -> bool: ...
def close(self) -> None: ...
def __enter__(self) -> Self: ...
def __exit__(
self, exc_type: type[BaseException] | None, exc_val: BaseException | None, exc_tb: types.TracebackType | None
) -> None: ...
def add_child_handler(
self, pid: int, callback: Callable[[int, int, Unpack[_Ts]], object], *args: Unpack[_Ts]
) -> None: ...
def remove_child_handler(self, pid: int) -> bool: ...
def attach_loop(self, loop: AbstractEventLoop | None) -> None: ...
else:
class MultiLoopChildWatcher(AbstractChildWatcher):
def is_active(self) -> bool: ...
def close(self) -> None: ...
def __enter__(self) -> Self: ...
def __exit__(
self, exc_type: type[BaseException] | None, exc_val: BaseException | None, exc_tb: types.TracebackType | None
) -> None: ...
def add_child_handler(
self, pid: int, callback: Callable[[int, int, Unpack[_Ts]], object], *args: Unpack[_Ts]
) -> None: ...
def remove_child_handler(self, pid: int) -> bool: ...
def attach_loop(self, loop: AbstractEventLoop | None) -> None: ...
class ThreadedChildWatcher(AbstractChildWatcher):
def is_active(self) -> Literal[True]: ...
def close(self) -> None: ...
def __enter__(self) -> Self: ...
def __exit__(
self, exc_type: type[BaseException] | None, exc_val: BaseException | None, exc_tb: types.TracebackType | None
) -> None: ...
def __del__(self) -> None: ...
def add_child_handler(
self, pid: int, callback: Callable[[int, int, Unpack[_Ts]], object], *args: Unpack[_Ts]
) -> None: ...
def remove_child_handler(self, pid: int) -> bool: ...
def attach_loop(self, loop: AbstractEventLoop | None) -> None: ...
if sys.version_info >= (3, 9):
class PidfdChildWatcher(AbstractChildWatcher):
def __enter__(self) -> Self: ...
def __exit__(
self, exc_type: type[BaseException] | None, exc_val: BaseException | None, exc_tb: types.TracebackType | None
) -> None: ...
def is_active(self) -> bool: ...
def close(self) -> None: ...
def attach_loop(self, loop: AbstractEventLoop | None) -> None: ...
def add_child_handler(
self, pid: int, callback: Callable[[int, int, Unpack[_Ts]], object], *args: Unpack[_Ts]
) -> None: ...
def remove_child_handler(self, pid: int) -> bool: ...

View File

@@ -1,32 +0,0 @@
from ._base import (
ALL_COMPLETED as ALL_COMPLETED,
FIRST_COMPLETED as FIRST_COMPLETED,
FIRST_EXCEPTION as FIRST_EXCEPTION,
BrokenExecutor as BrokenExecutor,
CancelledError as CancelledError,
Executor as Executor,
Future as Future,
InvalidStateError as InvalidStateError,
TimeoutError as TimeoutError,
as_completed as as_completed,
wait as wait,
)
from .process import ProcessPoolExecutor as ProcessPoolExecutor
from .thread import ThreadPoolExecutor as ThreadPoolExecutor
__all__ = (
"FIRST_COMPLETED",
"FIRST_EXCEPTION",
"ALL_COMPLETED",
"CancelledError",
"TimeoutError",
"BrokenExecutor",
"Future",
"Executor",
"wait",
"as_completed",
"ProcessPoolExecutor",
"ThreadPoolExecutor",
)
def __dir__() -> tuple[str, ...]: ...

View File

@@ -1,16 +0,0 @@
from typing import Any, TypeVar
__all__ = ["Error", "copy", "deepcopy"]
_T = TypeVar("_T")
# None in CPython but non-None in Jython
PyStringMap: Any
# Note: memo and _nil are internal kwargs.
def deepcopy(x: _T, memo: dict[int, Any] | None = None, _nil: Any = []) -> _T: ...
def copy(x: _T) -> _T: ...
class Error(Exception): ...
error = Error

View File

@@ -1,25 +0,0 @@
from typing import Any
from ..cmd import Command
def show_formats() -> None: ...
class bdist(Command):
description: str
user_options: Any
boolean_options: Any
help_options: Any
no_format_option: Any
default_format: Any
format_commands: Any
format_command: Any
bdist_base: Any
plat_name: Any
formats: Any
dist_dir: Any
skip_build: int
group: Any
owner: Any
def initialize_options(self) -> None: ...
def finalize_options(self) -> None: ...
def run(self) -> None: ...

View File

@@ -1,67 +0,0 @@
ENDMARKER: int
NAME: int
NUMBER: int
STRING: int
NEWLINE: int
INDENT: int
DEDENT: int
LPAR: int
RPAR: int
LSQB: int
RSQB: int
COLON: int
COMMA: int
SEMI: int
PLUS: int
MINUS: int
STAR: int
SLASH: int
VBAR: int
AMPER: int
LESS: int
GREATER: int
EQUAL: int
DOT: int
PERCENT: int
BACKQUOTE: int
LBRACE: int
RBRACE: int
EQEQUAL: int
NOTEQUAL: int
LESSEQUAL: int
GREATEREQUAL: int
TILDE: int
CIRCUMFLEX: int
LEFTSHIFT: int
RIGHTSHIFT: int
DOUBLESTAR: int
PLUSEQUAL: int
MINEQUAL: int
STAREQUAL: int
SLASHEQUAL: int
PERCENTEQUAL: int
AMPEREQUAL: int
VBAREQUAL: int
CIRCUMFLEXEQUAL: int
LEFTSHIFTEQUAL: int
RIGHTSHIFTEQUAL: int
DOUBLESTAREQUAL: int
DOUBLESLASH: int
DOUBLESLASHEQUAL: int
OP: int
COMMENT: int
NL: int
RARROW: int
AT: int
ATEQUAL: int
AWAIT: int
ASYNC: int
ERRORTOKEN: int
COLONEQUAL: int
N_TOKENS: int
NT_OFFSET: int
tok_name: dict[int, str]
def ISTERMINAL(x: int) -> bool: ...
def ISNONTERMINAL(x: int) -> bool: ...
def ISEOF(x: int) -> bool: ...

View File

@@ -1,55 +0,0 @@
import sys
from typing import Literal, overload
if sys.platform != "win32":
LOG_ALERT: Literal[1]
LOG_AUTH: Literal[32]
LOG_AUTHPRIV: Literal[80]
LOG_CONS: Literal[2]
LOG_CRIT: Literal[2]
LOG_CRON: Literal[72]
LOG_DAEMON: Literal[24]
LOG_DEBUG: Literal[7]
LOG_EMERG: Literal[0]
LOG_ERR: Literal[3]
LOG_INFO: Literal[6]
LOG_KERN: Literal[0]
LOG_LOCAL0: Literal[128]
LOG_LOCAL1: Literal[136]
LOG_LOCAL2: Literal[144]
LOG_LOCAL3: Literal[152]
LOG_LOCAL4: Literal[160]
LOG_LOCAL5: Literal[168]
LOG_LOCAL6: Literal[176]
LOG_LOCAL7: Literal[184]
LOG_LPR: Literal[48]
LOG_MAIL: Literal[16]
LOG_NDELAY: Literal[8]
LOG_NEWS: Literal[56]
LOG_NOTICE: Literal[5]
LOG_NOWAIT: Literal[16]
LOG_ODELAY: Literal[4]
LOG_PERROR: Literal[32]
LOG_PID: Literal[1]
LOG_SYSLOG: Literal[40]
LOG_USER: Literal[8]
LOG_UUCP: Literal[64]
LOG_WARNING: Literal[4]
if sys.version_info >= (3, 13):
LOG_FTP: Literal[88]
LOG_INSTALL: Literal[112]
LOG_LAUNCHD: Literal[192]
LOG_NETINFO: Literal[96]
LOG_RAS: Literal[120]
LOG_REMOTEAUTH: Literal[104]
def LOG_MASK(pri: int, /) -> int: ...
def LOG_UPTO(pri: int, /) -> int: ...
def closelog() -> None: ...
def openlog(ident: str = ..., logoption: int = ..., facility: int = ...) -> None: ...
def setlogmask(maskpri: int, /) -> int: ...
@overload
def syslog(priority: int, message: str) -> None: ...
@overload
def syslog(message: str) -> None: ...

View File

@@ -1,80 +0,0 @@
from typing import Literal
# These are not actually bools. See #4669
NO: bool
YES: bool
TRUE: bool
FALSE: bool
ON: bool
OFF: bool
N: Literal["n"]
S: Literal["s"]
W: Literal["w"]
E: Literal["e"]
NW: Literal["nw"]
SW: Literal["sw"]
NE: Literal["ne"]
SE: Literal["se"]
NS: Literal["ns"]
EW: Literal["ew"]
NSEW: Literal["nsew"]
CENTER: Literal["center"]
NONE: Literal["none"]
X: Literal["x"]
Y: Literal["y"]
BOTH: Literal["both"]
LEFT: Literal["left"]
TOP: Literal["top"]
RIGHT: Literal["right"]
BOTTOM: Literal["bottom"]
RAISED: Literal["raised"]
SUNKEN: Literal["sunken"]
FLAT: Literal["flat"]
RIDGE: Literal["ridge"]
GROOVE: Literal["groove"]
SOLID: Literal["solid"]
HORIZONTAL: Literal["horizontal"]
VERTICAL: Literal["vertical"]
NUMERIC: Literal["numeric"]
CHAR: Literal["char"]
WORD: Literal["word"]
BASELINE: Literal["baseline"]
INSIDE: Literal["inside"]
OUTSIDE: Literal["outside"]
SEL: Literal["sel"]
SEL_FIRST: Literal["sel.first"]
SEL_LAST: Literal["sel.last"]
END: Literal["end"]
INSERT: Literal["insert"]
CURRENT: Literal["current"]
ANCHOR: Literal["anchor"]
ALL: Literal["all"]
NORMAL: Literal["normal"]
DISABLED: Literal["disabled"]
ACTIVE: Literal["active"]
HIDDEN: Literal["hidden"]
CASCADE: Literal["cascade"]
CHECKBUTTON: Literal["checkbutton"]
COMMAND: Literal["command"]
RADIOBUTTON: Literal["radiobutton"]
SEPARATOR: Literal["separator"]
SINGLE: Literal["single"]
BROWSE: Literal["browse"]
MULTIPLE: Literal["multiple"]
EXTENDED: Literal["extended"]
DOTBOX: Literal["dotbox"]
UNDERLINE: Literal["underline"]
PIESLICE: Literal["pieslice"]
CHORD: Literal["chord"]
ARC: Literal["arc"]
FIRST: Literal["first"]
LAST: Literal["last"]
BUTT: Literal["butt"]
PROJECTING: Literal["projecting"]
ROUND: Literal["round"]
BEVEL: Literal["bevel"]
MITER: Literal["miter"]
MOVETO: Literal["moveto"]
SCROLL: Literal["scroll"]
UNITS: Literal["units"]
PAGES: Literal["pages"]

View File

@@ -1,28 +0,0 @@
import sys
from _typeshed import ReadableBuffer
from typing import Literal, overload
if sys.platform == "win32":
SND_APPLICATION: Literal[128]
SND_FILENAME: Literal[131072]
SND_ALIAS: Literal[65536]
SND_LOOP: Literal[8]
SND_MEMORY: Literal[4]
SND_PURGE: Literal[64]
SND_ASYNC: Literal[1]
SND_NODEFAULT: Literal[2]
SND_NOSTOP: Literal[16]
SND_NOWAIT: Literal[8192]
MB_ICONASTERISK: Literal[64]
MB_ICONEXCLAMATION: Literal[48]
MB_ICONHAND: Literal[16]
MB_ICONQUESTION: Literal[32]
MB_OK: Literal[0]
def Beep(frequency: int, duration: int) -> None: ...
# Can actually accept anything ORed with 4, and if not it's definitely str, but that's inexpressible
@overload
def PlaySound(sound: ReadableBuffer | None, flags: Literal[4]) -> None: ...
@overload
def PlaySound(sound: str | ReadableBuffer | None, flags: int) -> None: ...
def MessageBeep(type: int = 0) -> None: ...

View File

@@ -11,25 +11,38 @@ repository = { workspace = true }
license = { workspace = true }
[dependencies]
red_knot_module_resolver = { workspace = true }
ruff_db = { workspace = true }
ruff_index = { workspace = true }
ruff_python_ast = { workspace = true }
ruff_python_stdlib = { workspace = true }
ruff_text_size = { workspace = true }
bitflags = { workspace = true }
indexmap = { workspace = true }
camino = { workspace = true }
compact_str = { workspace = true }
countme = { workspace = true }
once_cell = { workspace = true }
ordermap = { workspace = true }
salsa = { workspace = true }
smallvec = { workspace = true }
smol_str = { workspace = true }
tracing = { workspace = true }
rustc-hash = { workspace = true }
hashbrown = { workspace = true }
[build-dependencies]
path-slash = { workspace = true }
walkdir = { workspace = true }
zip = { workspace = true, features = ["zstd", "deflate"] }
[dev-dependencies]
anyhow = { workspace = true }
ruff_db = { workspace = true, features = ["os", "testing"]}
ruff_python_parser = { workspace = true }
anyhow = { workspace = true }
insta = { workspace = true }
tempfile = { workspace = true }
walkdir = { workspace = true }
zip = { workspace = true }
[lints]
workspace = true

View File

@@ -1,9 +1,9 @@
# Red Knot
A work-in-progress multifile module resolver for Ruff.
Semantic analysis for the red-knot project.
## Vendored types for the stdlib
This crate vendors [typeshed](https://github.com/python/typeshed)'s stubs for the standard library. The vendored stubs can be found in `crates/red_knot_module_resolver/vendor/typeshed`. The file `crates/red_knot_module_resolver/vendor/typeshed/source_commit.txt` tells you the typeshed commit that our vendored stdlib stubs currently correspond to.
This crate vendors [typeshed](https://github.com/python/typeshed)'s stubs for the standard library. The vendored stubs can be found in `crates/red_knot_python_semantic/vendor/typeshed`. The file `crates/red_knot_python_semantic/vendor/typeshed/source_commit.txt` tells you the typeshed commit that our vendored stdlib stubs currently correspond to.
The typeshed stubs are updated every two weeks via an automated PR using the `sync_typeshed.yaml` workflow in the `.github/workflows` directory. This workflow can also be triggered at any time via [workflow dispatch](https://docs.github.com/en/actions/using-workflows/manually-running-a-workflow#running-a-workflow).

View File

@@ -3,7 +3,7 @@
//!
//! This script should be automatically run at build time
//! whenever the script itself changes, or whenever any files
//! in `crates/red_knot_module_resolver/vendor/typeshed` change.
//! in `crates/red_knot_python_semantic/vendor/typeshed` change.
use std::fs::File;
use std::path::Path;
@@ -23,8 +23,21 @@ const TYPESHED_ZIP_LOCATION: &str = "/zipped_typeshed.zip";
fn zip_dir(directory_path: &str, writer: File) -> ZipResult<File> {
let mut zip = ZipWriter::new(writer);
// Use deflated compression for WASM builds because compiling `zstd-sys` requires clang
// [source](https://github.com/gyscos/zstd-rs/wiki/Compile-for-WASM) which complicates the build
// by a lot. Deflated compression is slower but it shouldn't matter much for the WASM use case
// (WASM itself is already slower than a native build for a specific platform).
// We can't use `#[cfg(...)]` here because the target-arch in a build script is the
// architecture of the system running the build script and not the architecture of the build-target.
// That's why we use the `TARGET` environment variable here.
let method = if std::env::var("TARGET").unwrap().contains("wasm32") {
CompressionMethod::Deflated
} else {
CompressionMethod::Zstd
};
let options = FileOptions::default()
.compression_method(CompressionMethod::Zstd)
.compression_method(method)
.unix_permissions(0o644);
for entry in walkdir::WalkDir::new(directory_path) {

View File

@@ -27,12 +27,13 @@ pub struct AstNodeRef<T> {
#[allow(unsafe_code)]
impl<T> AstNodeRef<T> {
/// Creates a new `AstNodeRef` that reference `node`. The `parsed` is the [`ParsedModule`] to which
/// the `AstNodeRef` belongs.
/// Creates a new `AstNodeRef` that reference `node`. The `parsed` is the [`ParsedModule`] to
/// which the `AstNodeRef` belongs.
///
/// ## Safety
/// Dereferencing the `node` can result in undefined behavior if `parsed` isn't the [`ParsedModule`] to
/// which `node` belongs. It's the caller's responsibility to ensure that the invariant `node belongs to parsed` is upheld.
/// Dereferencing the `node` can result in undefined behavior if `parsed` isn't the
/// [`ParsedModule`] to which `node` belongs. It's the caller's responsibility to ensure that
/// the invariant `node belongs to parsed` is upheld.
pub(super) unsafe fn new(parsed: ParsedModule, node: &T) -> Self {
Self {
@@ -43,8 +44,8 @@ impl<T> AstNodeRef<T> {
/// Returns a reference to the wrapped node.
pub fn node(&self) -> &T {
// SAFETY: Holding on to `parsed` ensures that the AST to which `node` belongs is still alive
// and not moved.
// SAFETY: Holding on to `parsed` ensures that the AST to which `node` belongs is still
// alive and not moved.
unsafe { self.node.as_ref() }
}
}

View File

@@ -0,0 +1,16 @@
use crate::module_name::ModuleName;
use crate::module_resolver::resolve_module;
use crate::semantic_index::global_scope;
use crate::semantic_index::symbol::ScopeId;
use crate::Db;
/// Salsa query to get the builtins scope.
///
/// Can return None if a custom typeshed is used that is missing `builtins.pyi`.
#[salsa::tracked]
pub(crate) fn builtins_scope(db: &dyn Db) -> Option<ScopeId<'_>> {
let builtins_name =
ModuleName::new_static("builtins").expect("Expected 'builtins' to be a valid module name");
let builtins_file = resolve_module(db, builtins_name)?.file();
Some(global_scope(db, builtins_file))
}

View File

@@ -1,55 +1,27 @@
use salsa::DbWithJar;
use ruff_db::{Db as SourceDb, Upcast};
use red_knot_module_resolver::Db as ResolverDb;
use crate::semantic_index::symbol::{public_symbols_map, scopes_map, PublicSymbolId, ScopeId};
use crate::semantic_index::{root_scope, semantic_index, symbol_table};
use crate::types::{infer_types, public_symbol_ty};
#[salsa::jar(db=Db)]
pub struct Jar(
ScopeId<'_>,
PublicSymbolId<'_>,
symbol_table,
scopes_map,
root_scope,
semantic_index,
infer_types,
public_symbol_ty,
public_symbols_map,
);
/// Database giving access to semantic information about a Python program.
pub trait Db:
SourceDb + ResolverDb + DbWithJar<Jar> + Upcast<dyn SourceDb> + Upcast<dyn ResolverDb>
{
}
#[salsa::db]
pub trait Db: SourceDb + Upcast<dyn SourceDb> {}
#[cfg(test)]
pub(crate) mod tests {
use std::fmt::Formatter;
use std::marker::PhantomData;
use std::sync::Arc;
use salsa::id::AsId;
use salsa::ingredient::Ingredient;
use salsa::storage::HasIngredientsFor;
use salsa::DebugWithDb;
use crate::module_resolver::vendored_typeshed_stubs;
use ruff_db::files::Files;
use ruff_db::system::{DbWithTestSystem, System, TestSystem};
use ruff_db::vendored::VendoredFileSystem;
use ruff_db::{Db as SourceDb, Upcast};
use red_knot_module_resolver::{Db as ResolverDb, Jar as ResolverJar};
use ruff_db::file_system::{FileSystem, MemoryFileSystem, OsFileSystem};
use ruff_db::vfs::Vfs;
use ruff_db::{Db as SourceDb, Jar as SourceJar, Upcast};
use super::Db;
use super::{Db, Jar};
#[salsa::db(Jar, ResolverJar, SourceJar)]
#[salsa::db]
pub(crate) struct TestDb {
storage: salsa::Storage<Self>,
vfs: Vfs,
file_system: TestFileSystem,
files: Files,
system: TestSystem,
vendored: VendoredFileSystem,
events: std::sync::Arc<std::sync::Mutex<Vec<salsa::Event>>>,
}
@@ -57,29 +29,13 @@ pub(crate) mod tests {
pub(crate) fn new() -> Self {
Self {
storage: salsa::Storage::default(),
file_system: TestFileSystem::Memory(MemoryFileSystem::default()),
system: TestSystem::default(),
vendored: vendored_typeshed_stubs().clone(),
events: std::sync::Arc::default(),
vfs: Vfs::with_stubbed_vendored(),
files: Files::default(),
}
}
/// Returns the memory file system.
///
/// ## Panics
/// If this test db isn't using a memory file system.
pub(crate) fn memory_file_system(&self) -> &MemoryFileSystem {
if let TestFileSystem::Memory(fs) = &self.file_system {
fs
} else {
panic!("The test db is not using a memory file system");
}
}
#[allow(unused)]
pub(crate) fn vfs_mut(&mut self) -> &mut Vfs {
&mut self.vfs
}
/// Takes the salsa events.
///
/// ## Panics
@@ -100,16 +56,28 @@ pub(crate) mod tests {
}
}
impl SourceDb for TestDb {
fn file_system(&self) -> &dyn FileSystem {
match &self.file_system {
TestFileSystem::Memory(fs) => fs,
TestFileSystem::Os(fs) => fs,
}
impl DbWithTestSystem for TestDb {
fn test_system(&self) -> &TestSystem {
&self.system
}
fn vfs(&self) -> &Vfs {
&self.vfs
fn test_system_mut(&mut self) -> &mut TestSystem {
&mut self.system
}
}
#[salsa::db]
impl SourceDb for TestDb {
fn vendored(&self) -> &VendoredFileSystem {
&self.vendored
}
fn system(&self) -> &dyn System {
&self.system
}
fn files(&self) -> &Files {
&self.files
}
}
@@ -117,144 +85,21 @@ pub(crate) mod tests {
fn upcast(&self) -> &(dyn SourceDb + 'static) {
self
}
}
impl Upcast<dyn ResolverDb> for TestDb {
fn upcast(&self) -> &(dyn ResolverDb + 'static) {
fn upcast_mut(&mut self) -> &mut (dyn SourceDb + 'static) {
self
}
}
impl red_knot_module_resolver::Db for TestDb {}
#[salsa::db]
impl Db for TestDb {}
#[salsa::db]
impl salsa::Database for TestDb {
fn salsa_event(&self, event: salsa::Event) {
tracing::trace!("event: {:?}", event.debug(self));
fn salsa_event(&self, event: &dyn Fn() -> salsa::Event) {
let event = event();
tracing::trace!("event: {event:?}");
let mut events = self.events.lock().unwrap();
events.push(event);
}
}
impl salsa::ParallelDatabase for TestDb {
fn snapshot(&self) -> salsa::Snapshot<Self> {
salsa::Snapshot::new(Self {
storage: self.storage.snapshot(),
vfs: self.vfs.snapshot(),
file_system: match &self.file_system {
TestFileSystem::Memory(memory) => TestFileSystem::Memory(memory.snapshot()),
TestFileSystem::Os(fs) => TestFileSystem::Os(fs.snapshot()),
},
events: self.events.clone(),
})
}
}
enum TestFileSystem {
Memory(MemoryFileSystem),
#[allow(dead_code)]
Os(OsFileSystem),
}
pub(crate) fn assert_will_run_function_query<'db, C, Db, Jar>(
db: &'db Db,
to_function: impl FnOnce(&C) -> &salsa::function::FunctionIngredient<C>,
input: &C::Input<'db>,
events: &[salsa::Event],
) where
C: salsa::function::Configuration<Jar = Jar>
+ salsa::storage::IngredientsFor<Jar = Jar, Ingredients = C>,
Jar: HasIngredientsFor<C>,
Db: salsa::DbWithJar<Jar>,
C::Input<'db>: AsId,
{
will_run_function_query(db, to_function, input, events, true);
}
pub(crate) fn assert_will_not_run_function_query<'db, C, Db, Jar>(
db: &'db Db,
to_function: impl FnOnce(&C) -> &salsa::function::FunctionIngredient<C>,
input: &C::Input<'db>,
events: &[salsa::Event],
) where
C: salsa::function::Configuration<Jar = Jar>
+ salsa::storage::IngredientsFor<Jar = Jar, Ingredients = C>,
Jar: HasIngredientsFor<C>,
Db: salsa::DbWithJar<Jar>,
C::Input<'db>: AsId,
{
will_run_function_query(db, to_function, input, events, false);
}
fn will_run_function_query<'db, C, Db, Jar>(
db: &'db Db,
to_function: impl FnOnce(&C) -> &salsa::function::FunctionIngredient<C>,
input: &C::Input<'db>,
events: &[salsa::Event],
should_run: bool,
) where
C: salsa::function::Configuration<Jar = Jar>
+ salsa::storage::IngredientsFor<Jar = Jar, Ingredients = C>,
Jar: HasIngredientsFor<C>,
Db: salsa::DbWithJar<Jar>,
C::Input<'db>: AsId,
{
let (jar, _) =
<_ as salsa::storage::HasJar<<C as salsa::storage::IngredientsFor>::Jar>>::jar(db);
let ingredient = jar.ingredient();
let function_ingredient = to_function(ingredient);
let ingredient_index =
<salsa::function::FunctionIngredient<C> as Ingredient<Db>>::ingredient_index(
function_ingredient,
);
let did_run = events.iter().any(|event| {
if let salsa::EventKind::WillExecute { database_key } = event.kind {
database_key.ingredient_index() == ingredient_index
&& database_key.key_index() == input.as_id()
} else {
false
}
});
if should_run && !did_run {
panic!(
"Expected query {:?} to run but it didn't",
DebugIdx {
db: PhantomData::<Db>,
value_id: input.as_id(),
ingredient: function_ingredient,
}
);
} else if !should_run && did_run {
panic!(
"Expected query {:?} not to run but it did",
DebugIdx {
db: PhantomData::<Db>,
value_id: input.as_id(),
ingredient: function_ingredient,
}
);
}
}
struct DebugIdx<'a, I, Db>
where
I: Ingredient<Db>,
{
value_id: salsa::Id,
ingredient: &'a I,
db: PhantomData<Db>,
}
impl<'a, I, Db> std::fmt::Debug for DebugIdx<'a, I, Db>
where
I: Ingredient<Db>,
{
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
self.ingredient.fmt_index(Some(self.value_id), f)
}
}
}

View File

@@ -1,12 +1,24 @@
use std::hash::BuildHasherDefault;
use rustc_hash::FxHasher;
pub use db::Db;
pub use module_name::ModuleName;
pub use module_resolver::{resolve_module, system_module_search_paths, vendored_typeshed_stubs};
pub use program::{Program, ProgramSettings, SearchPathSettings};
pub use python_version::PythonVersion;
pub use semantic_model::{HasTy, SemanticModel};
pub mod ast_node_ref;
mod builtins;
mod db;
pub mod name;
mod module_name;
mod module_resolver;
mod node_key;
mod program;
mod python_version;
pub mod semantic_index;
mod semantic_model;
pub mod types;
type FxIndexSet<V> = indexmap::set::IndexSet<V, BuildHasherDefault<FxHasher>>;
pub use db::{Db, Jar};
use rustc_hash::FxHasher;
use std::hash::BuildHasherDefault;
type FxOrderSet<V> = ordermap::set::OrderSet<V, BuildHasherDefault<FxHasher>>;

View File

@@ -1,10 +0,0 @@
use std::hash::BuildHasherDefault;
use rustc_hash::FxHasher;
pub mod ast_node_ref;
mod node_key;
pub mod semantic_index;
pub mod types;
pub(crate) type FxIndexSet<V> = indexmap::set::IndexSet<V, BuildHasherDefault<FxHasher>>;

View File

@@ -0,0 +1,198 @@
use std::fmt;
use std::ops::Deref;
use compact_str::{CompactString, ToCompactString};
use ruff_python_stdlib::identifiers::is_identifier;
/// A module name, e.g. `foo.bar`.
///
/// Always normalized to the absolute form (never a relative module name, i.e., never `.foo`).
#[derive(Clone, Debug, Eq, PartialEq, Hash, PartialOrd, Ord)]
pub struct ModuleName(compact_str::CompactString);
impl ModuleName {
/// Creates a new module name for `name`. Returns `Some` if `name` is a valid, absolute
/// module name and `None` otherwise.
///
/// The module name is invalid if:
///
/// * The name is empty
/// * The name is relative
/// * The name ends with a `.`
/// * The name contains a sequence of multiple dots
/// * A component of a name (the part between two dots) isn't a valid python identifier.
#[inline]
#[must_use]
pub fn new(name: &str) -> Option<Self> {
Self::is_valid_name(name).then(|| Self(CompactString::from(name)))
}
/// Creates a new module name for `name` where `name` is a static string.
/// Returns `Some` if `name` is a valid, absolute module name and `None` otherwise.
///
/// The module name is invalid if:
///
/// * The name is empty
/// * The name is relative
/// * The name ends with a `.`
/// * The name contains a sequence of multiple dots
/// * A component of a name (the part between two dots) isn't a valid python identifier.
///
/// ## Examples
///
/// ```
/// use red_knot_python_semantic::ModuleName;
///
/// assert_eq!(ModuleName::new_static("foo.bar").as_deref(), Some("foo.bar"));
/// assert_eq!(ModuleName::new_static(""), None);
/// assert_eq!(ModuleName::new_static("..foo"), None);
/// assert_eq!(ModuleName::new_static(".foo"), None);
/// assert_eq!(ModuleName::new_static("foo."), None);
/// assert_eq!(ModuleName::new_static("foo..bar"), None);
/// assert_eq!(ModuleName::new_static("2000"), None);
/// ```
#[inline]
#[must_use]
pub fn new_static(name: &'static str) -> Option<Self> {
Self::is_valid_name(name).then(|| Self(CompactString::const_new(name)))
}
#[must_use]
fn is_valid_name(name: &str) -> bool {
!name.is_empty() && name.split('.').all(is_identifier)
}
/// An iterator over the components of the module name:
///
/// # Examples
///
/// ```
/// use red_knot_python_semantic::ModuleName;
///
/// assert_eq!(ModuleName::new_static("foo.bar.baz").unwrap().components().collect::<Vec<_>>(), vec!["foo", "bar", "baz"]);
/// ```
#[must_use]
pub fn components(&self) -> impl DoubleEndedIterator<Item = &str> {
self.0.split('.')
}
/// The name of this module's immediate parent, if it has a parent.
///
/// # Examples
///
/// ```
/// use red_knot_python_semantic::ModuleName;
///
/// assert_eq!(ModuleName::new_static("foo.bar").unwrap().parent(), Some(ModuleName::new_static("foo").unwrap()));
/// assert_eq!(ModuleName::new_static("foo.bar.baz").unwrap().parent(), Some(ModuleName::new_static("foo.bar").unwrap()));
/// assert_eq!(ModuleName::new_static("root").unwrap().parent(), None);
/// ```
#[must_use]
pub fn parent(&self) -> Option<ModuleName> {
let (parent, _) = self.0.rsplit_once('.')?;
Some(Self(parent.to_compact_string()))
}
/// Returns `true` if the name starts with `other`.
///
/// This is equivalent to checking if `self` is a sub-module of `other`.
///
/// # Examples
///
/// ```
/// use red_knot_python_semantic::ModuleName;
///
/// assert!(ModuleName::new_static("foo.bar").unwrap().starts_with(&ModuleName::new_static("foo").unwrap()));
///
/// assert!(!ModuleName::new_static("foo.bar").unwrap().starts_with(&ModuleName::new_static("bar").unwrap()));
/// assert!(!ModuleName::new_static("foo_bar").unwrap().starts_with(&ModuleName::new_static("foo").unwrap()));
/// ```
#[must_use]
pub fn starts_with(&self, other: &ModuleName) -> bool {
let mut self_components = self.components();
let other_components = other.components();
for other_component in other_components {
if self_components.next() != Some(other_component) {
return false;
}
}
true
}
#[must_use]
#[inline]
pub fn as_str(&self) -> &str {
&self.0
}
/// Construct a [`ModuleName`] from a sequence of parts.
///
/// # Examples
///
/// ```
/// use red_knot_python_semantic::ModuleName;
///
/// assert_eq!(&*ModuleName::from_components(["a"]).unwrap(), "a");
/// assert_eq!(&*ModuleName::from_components(["a", "b"]).unwrap(), "a.b");
/// assert_eq!(&*ModuleName::from_components(["a", "b", "c"]).unwrap(), "a.b.c");
///
/// assert_eq!(ModuleName::from_components(["a-b"]), None);
/// assert_eq!(ModuleName::from_components(["a", "a-b"]), None);
/// assert_eq!(ModuleName::from_components(["a", "b", "a-b-c"]), None);
/// ```
#[must_use]
pub fn from_components<'a>(components: impl IntoIterator<Item = &'a str>) -> Option<Self> {
let mut components = components.into_iter();
let first_part = components.next()?;
if !is_identifier(first_part) {
return None;
}
let name = if let Some(second_part) = components.next() {
if !is_identifier(second_part) {
return None;
}
let mut name = format!("{first_part}.{second_part}");
for part in components {
if !is_identifier(part) {
return None;
}
name.push('.');
name.push_str(part);
}
CompactString::from(&name)
} else {
CompactString::from(first_part)
};
Some(Self(name))
}
}
impl Deref for ModuleName {
type Target = str;
#[inline]
fn deref(&self) -> &Self::Target {
self.as_str()
}
}
impl PartialEq<str> for ModuleName {
fn eq(&self, other: &str) -> bool {
self.as_str() == other
}
}
impl PartialEq<ModuleName> for str {
fn eq(&self, other: &ModuleName) -> bool {
self == other.as_str()
}
}
impl std::fmt::Display for ModuleName {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> std::fmt::Result {
f.write_str(&self.0)
}
}

View File

@@ -0,0 +1,45 @@
use std::iter::FusedIterator;
pub(crate) use module::Module;
pub use resolver::resolve_module;
use ruff_db::system::SystemPath;
pub use typeshed::vendored_typeshed_stubs;
use crate::Db;
use resolver::{module_resolution_settings, SearchPathIterator};
mod module;
mod path;
mod resolver;
mod state;
mod typeshed;
#[cfg(test)]
mod testing;
/// Returns an iterator over all search paths pointing to a system path
pub fn system_module_search_paths(db: &dyn Db) -> SystemModuleSearchPathsIter {
SystemModuleSearchPathsIter {
inner: module_resolution_settings(db).search_paths(db),
}
}
pub struct SystemModuleSearchPathsIter<'db> {
inner: SearchPathIterator<'db>,
}
impl<'db> Iterator for SystemModuleSearchPathsIter<'db> {
type Item = &'db SystemPath;
fn next(&mut self) -> Option<Self::Item> {
loop {
let next = self.inner.next()?;
if let Some(system_path) = next.as_system_path() {
return Some(system_path);
}
}
}
}
impl FusedIterator for SystemModuleSearchPathsIter<'_> {}

View File

@@ -0,0 +1,79 @@
use std::fmt::Formatter;
use std::sync::Arc;
use ruff_db::files::File;
use super::path::SearchPath;
use crate::module_name::ModuleName;
/// Representation of a Python module.
#[derive(Clone, PartialEq, Eq)]
pub struct Module {
inner: Arc<ModuleInner>,
}
impl Module {
pub(crate) fn new(
name: ModuleName,
kind: ModuleKind,
search_path: SearchPath,
file: File,
) -> Self {
Self {
inner: Arc::new(ModuleInner {
name,
kind,
search_path,
file,
}),
}
}
/// The absolute name of the module (e.g. `foo.bar`)
pub fn name(&self) -> &ModuleName {
&self.inner.name
}
/// The file to the source code that defines this module
pub fn file(&self) -> File {
self.inner.file
}
/// The search path from which the module was resolved.
pub(crate) fn search_path(&self) -> &SearchPath {
&self.inner.search_path
}
/// Determine whether this module is a single-file module or a package
pub fn kind(&self) -> ModuleKind {
self.inner.kind
}
}
impl std::fmt::Debug for Module {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
f.debug_struct("Module")
.field("name", &self.name())
.field("kind", &self.kind())
.field("file", &self.file())
.field("search_path", &self.search_path())
.finish()
}
}
#[derive(PartialEq, Eq)]
struct ModuleInner {
name: ModuleName,
kind: ModuleKind,
search_path: SearchPath,
file: File,
}
#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)]
pub enum ModuleKind {
/// A single-file module (e.g. `foo.py` or `foo.pyi`)
Module,
/// A python package (`foo/__init__.py` or `foo/__init__.pyi`)
Package,
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,25 @@
use ruff_db::vendored::VendoredFileSystem;
use super::typeshed::LazyTypeshedVersions;
use crate::db::Db;
use crate::python_version::PythonVersion;
pub(crate) struct ResolverState<'db> {
pub(crate) db: &'db dyn Db,
pub(crate) typeshed_versions: LazyTypeshedVersions<'db>,
pub(crate) target_version: PythonVersion,
}
impl<'db> ResolverState<'db> {
pub(crate) fn new(db: &'db dyn Db, target_version: PythonVersion) -> Self {
Self {
db,
typeshed_versions: LazyTypeshedVersions::new(),
target_version,
}
}
pub(crate) fn vendored(&self) -> &VendoredFileSystem {
self.db.vendored()
}
}

View File

@@ -0,0 +1,295 @@
use ruff_db::system::{DbWithTestSystem, SystemPath, SystemPathBuf};
use ruff_db::vendored::VendoredPathBuf;
use crate::db::tests::TestDb;
use crate::program::{Program, SearchPathSettings};
use crate::python_version::PythonVersion;
/// A test case for the module resolver.
///
/// You generally shouldn't construct instances of this struct directly;
/// instead, use the [`TestCaseBuilder`].
pub(crate) struct TestCase<T> {
pub(crate) db: TestDb,
pub(crate) src: SystemPathBuf,
pub(crate) stdlib: T,
// Most test cases only ever need a single `site-packages` directory,
// so this is a single directory instead of a `Vec` of directories,
// like it is in `ruff_db::Program`.
pub(crate) site_packages: SystemPathBuf,
pub(crate) target_version: PythonVersion,
}
/// A `(file_name, file_contents)` tuple
pub(crate) type FileSpec = (&'static str, &'static str);
/// Specification for a typeshed mock to be created as part of a test
#[derive(Debug, Clone, Copy, Default)]
pub(crate) struct MockedTypeshed {
/// The stdlib files to be created in the typeshed mock
pub(crate) stdlib_files: &'static [FileSpec],
/// The contents of the `stdlib/VERSIONS` file
/// to be created in the typeshed mock
pub(crate) versions: &'static str,
}
#[derive(Debug)]
pub(crate) struct VendoredTypeshed;
#[derive(Debug)]
pub(crate) struct UnspecifiedTypeshed;
/// A builder for a module-resolver test case.
///
/// The builder takes care of creating a [`TestDb`]
/// instance, applying the module resolver settings,
/// and creating mock directories for the stdlib, `site-packages`,
/// first-party code, etc.
///
/// For simple tests that do not involve typeshed,
/// test cases can be created as follows:
///
/// ```rs
/// let test_case = TestCaseBuilder::new()
/// .with_src_files(...)
/// .build();
///
/// let test_case2 = TestCaseBuilder::new()
/// .with_site_packages_files(...)
/// .build();
/// ```
///
/// Any tests can specify the target Python version that should be used
/// in the module resolver settings:
///
/// ```rs
/// let test_case = TestCaseBuilder::new()
/// .with_src_files(...)
/// .with_target_version(...)
/// .build();
/// ```
///
/// For tests checking that standard-library module resolution is working
/// correctly, you should usually create a [`MockedTypeshed`] instance
/// and pass it to the [`TestCaseBuilder::with_custom_typeshed`] method.
/// If you need to check something that involves the vendored typeshed stubs
/// we include as part of the binary, you can instead use the
/// [`TestCaseBuilder::with_vendored_typeshed`] method.
/// For either of these, you should almost always try to be explicit
/// about the Python version you want to be specified in the module-resolver
/// settings for the test:
///
/// ```rs
/// const TYPESHED = MockedTypeshed { ... };
///
/// let test_case = resolver_test_case()
/// .with_custom_typeshed(TYPESHED)
/// .with_target_version(...)
/// .build();
///
/// let test_case2 = resolver_test_case()
/// .with_vendored_typeshed()
/// .with_target_version(...)
/// .build();
/// ```
///
/// If you have not called one of those options, the `stdlib` field
/// on the [`TestCase`] instance created from `.build()` will be set
/// to `()`.
pub(crate) struct TestCaseBuilder<T> {
typeshed_option: T,
target_version: PythonVersion,
first_party_files: Vec<FileSpec>,
site_packages_files: Vec<FileSpec>,
}
impl<T> TestCaseBuilder<T> {
/// Specify files to be created in the `src` mock directory
pub(crate) fn with_src_files(mut self, files: &[FileSpec]) -> Self {
self.first_party_files.extend(files.iter().copied());
self
}
/// Specify files to be created in the `site-packages` mock directory
pub(crate) fn with_site_packages_files(mut self, files: &[FileSpec]) -> Self {
self.site_packages_files.extend(files.iter().copied());
self
}
/// Specify the target Python version the module resolver should assume
pub(crate) fn with_target_version(mut self, target_version: PythonVersion) -> Self {
self.target_version = target_version;
self
}
fn write_mock_directory(
db: &mut TestDb,
location: impl AsRef<SystemPath>,
files: impl IntoIterator<Item = FileSpec>,
) -> SystemPathBuf {
let root = location.as_ref().to_path_buf();
// Make sure to create the directory even if the list of files is empty:
db.memory_file_system().create_directory_all(&root).unwrap();
db.write_files(
files
.into_iter()
.map(|(relative_path, contents)| (root.join(relative_path), contents)),
)
.unwrap();
root
}
}
impl TestCaseBuilder<UnspecifiedTypeshed> {
pub(crate) fn new() -> TestCaseBuilder<UnspecifiedTypeshed> {
Self {
typeshed_option: UnspecifiedTypeshed,
target_version: PythonVersion::default(),
first_party_files: vec![],
site_packages_files: vec![],
}
}
/// Use the vendored stdlib stubs included in the Ruff binary for this test case
pub(crate) fn with_vendored_typeshed(self) -> TestCaseBuilder<VendoredTypeshed> {
let TestCaseBuilder {
typeshed_option: _,
target_version,
first_party_files,
site_packages_files,
} = self;
TestCaseBuilder {
typeshed_option: VendoredTypeshed,
target_version,
first_party_files,
site_packages_files,
}
}
/// Use a mock typeshed directory for this test case
pub(crate) fn with_custom_typeshed(
self,
typeshed: MockedTypeshed,
) -> TestCaseBuilder<MockedTypeshed> {
let TestCaseBuilder {
typeshed_option: _,
target_version,
first_party_files,
site_packages_files,
} = self;
TestCaseBuilder {
typeshed_option: typeshed,
target_version,
first_party_files,
site_packages_files,
}
}
pub(crate) fn build(self) -> TestCase<()> {
let TestCase {
db,
src,
stdlib: _,
site_packages,
target_version,
} = self.with_custom_typeshed(MockedTypeshed::default()).build();
TestCase {
db,
src,
stdlib: (),
site_packages,
target_version,
}
}
}
impl TestCaseBuilder<MockedTypeshed> {
pub(crate) fn build(self) -> TestCase<SystemPathBuf> {
let TestCaseBuilder {
typeshed_option,
target_version,
first_party_files,
site_packages_files,
} = self;
let mut db = TestDb::new();
let site_packages =
Self::write_mock_directory(&mut db, "/site-packages", site_packages_files);
let src = Self::write_mock_directory(&mut db, "/src", first_party_files);
let typeshed = Self::build_typeshed_mock(&mut db, &typeshed_option);
Program::new(
&db,
target_version,
SearchPathSettings {
extra_paths: vec![],
src_root: src.clone(),
custom_typeshed: Some(typeshed.clone()),
site_packages: vec![site_packages.clone()],
},
);
TestCase {
db,
src,
stdlib: typeshed.join("stdlib"),
site_packages,
target_version,
}
}
fn build_typeshed_mock(db: &mut TestDb, typeshed_to_build: &MockedTypeshed) -> SystemPathBuf {
let typeshed = SystemPathBuf::from("/typeshed");
let MockedTypeshed {
stdlib_files,
versions,
} = typeshed_to_build;
Self::write_mock_directory(
db,
typeshed.join("stdlib"),
stdlib_files
.iter()
.copied()
.chain(std::iter::once(("VERSIONS", *versions))),
);
typeshed
}
}
impl TestCaseBuilder<VendoredTypeshed> {
pub(crate) fn build(self) -> TestCase<VendoredPathBuf> {
let TestCaseBuilder {
typeshed_option: VendoredTypeshed,
target_version,
first_party_files,
site_packages_files,
} = self;
let mut db = TestDb::new();
let site_packages =
Self::write_mock_directory(&mut db, "/site-packages", site_packages_files);
let src = Self::write_mock_directory(&mut db, "/src", first_party_files);
Program::new(
&db,
target_version,
SearchPathSettings {
extra_paths: vec![],
src_root: src.clone(),
custom_typeshed: None,
site_packages: vec![site_packages.clone()],
},
);
TestCase {
db,
src,
stdlib: VendoredPathBuf::from("stdlib"),
site_packages,
target_version,
}
}
}

View File

@@ -0,0 +1,8 @@
pub use self::vendored::vendored_typeshed_stubs;
pub(super) use self::versions::{
parse_typeshed_versions, LazyTypeshedVersions, TypeshedVersionsParseError,
TypeshedVersionsQueryResult,
};
mod vendored;
mod versions;

View File

@@ -1,17 +1,25 @@
pub(crate) mod versions;
use once_cell::sync::Lazy;
use ruff_db::vendored::VendoredFileSystem;
// The file path here is hardcoded in this crate's `build.rs` script.
// Luckily this crate will fail to build if this file isn't available at build time.
static TYPESHED_ZIP_BYTES: &[u8] = include_bytes!(concat!(env!("OUT_DIR"), "/zipped_typeshed.zip"));
pub fn vendored_typeshed_stubs() -> &'static VendoredFileSystem {
static VENDORED_TYPESHED_STUBS: Lazy<VendoredFileSystem> =
Lazy::new(|| VendoredFileSystem::new_static(TYPESHED_ZIP_BYTES).unwrap());
&VENDORED_TYPESHED_STUBS
}
#[cfg(test)]
mod tests {
use std::io::{self, Read};
use std::path::Path;
use ruff_db::vendored::VendoredFileSystem;
use ruff_db::vfs::VendoredPath;
use ruff_db::vendored::VendoredPath;
// The file path here is hardcoded in this crate's `build.rs` script.
// Luckily this crate will fail to build if this file isn't available at build time.
const TYPESHED_ZIP_BYTES: &[u8] =
include_bytes!(concat!(env!("OUT_DIR"), "/zipped_typeshed.zip"));
use super::*;
#[test]
fn typeshed_zip_created_at_build_time() {
@@ -34,7 +42,7 @@ mod tests {
#[test]
fn typeshed_vfs_consistent_with_vendored_stubs() {
let vendored_typeshed_dir = Path::new("vendor/typeshed").canonicalize().unwrap();
let vendored_typeshed_stubs = VendoredFileSystem::new(TYPESHED_ZIP_BYTES).unwrap();
let vendored_typeshed_stubs = vendored_typeshed_stubs();
let mut empty_iterator = true;
for entry in walkdir::WalkDir::new(&vendored_typeshed_dir).min_depth(1) {
@@ -64,7 +72,7 @@ mod tests {
let vendored_path_kind = vendored_typeshed_stubs
.metadata(vendored_path)
.unwrap_or_else(|| {
.unwrap_or_else(|_| {
panic!(
"Expected metadata for {vendored_path:?} to be retrievable from the `VendoredFileSystem!

View File

@@ -1,16 +1,96 @@
use std::cell::OnceCell;
use std::collections::BTreeMap;
use std::fmt;
use std::num::{NonZeroU16, NonZeroUsize};
use std::ops::{RangeFrom, RangeInclusive};
use std::str::FromStr;
use once_cell::sync::Lazy;
use ruff_db::system::SystemPath;
use rustc_hash::FxHashMap;
use crate::module::ModuleName;
use ruff_db::files::{system_path_to_file, File};
#[derive(Debug, PartialEq, Eq)]
pub struct TypeshedVersionsParseError {
line_number: NonZeroU16,
use super::vendored::vendored_typeshed_stubs;
use crate::db::Db;
use crate::module_name::ModuleName;
use crate::python_version::PythonVersion;
#[derive(Debug)]
pub(crate) struct LazyTypeshedVersions<'db>(OnceCell<&'db TypeshedVersions>);
impl<'db> LazyTypeshedVersions<'db> {
#[must_use]
pub(crate) fn new() -> Self {
Self(OnceCell::new())
}
/// Query whether a module exists at runtime in the stdlib on a certain Python version.
///
/// Simply probing whether a file exists in typeshed is insufficient for this question,
/// as a module in the stdlib may have been added in Python 3.10, but the typeshed stub
/// will still be available (either in a custom typeshed dir or in our vendored copy)
/// even if the user specified Python 3.8 as the target version.
///
/// For top-level modules and packages, the VERSIONS file can always provide an unambiguous answer
/// as to whether the module exists on the specified target version. However, VERSIONS does not
/// provide comprehensive information on all submodules, meaning that this method sometimes
/// returns [`TypeshedVersionsQueryResult::MaybeExists`].
/// See [`TypeshedVersionsQueryResult`] for more details.
#[must_use]
pub(crate) fn query_module(
&self,
db: &'db dyn Db,
module: &ModuleName,
stdlib_root: Option<&SystemPath>,
target_version: PythonVersion,
) -> TypeshedVersionsQueryResult {
let versions = self.0.get_or_init(|| {
let versions_path = if let Some(system_path) = stdlib_root {
system_path.join("VERSIONS")
} else {
return &VENDORED_VERSIONS;
};
let Ok(versions_file) = system_path_to_file(db.upcast(), &versions_path) else {
todo!(
"Still need to figure out how to handle VERSIONS files being deleted \
from custom typeshed directories! Expected a file to exist at {versions_path}"
)
};
// TODO(Alex/Micha): If VERSIONS is invalid,
// this should invalidate not just the specific module resolution we're currently attempting,
// but all type inference that depends on any standard-library types.
// Unwrapping here is not correct...
parse_typeshed_versions(db, versions_file).as_ref().unwrap()
});
versions.query_module(module, target_version)
}
}
#[salsa::tracked(return_ref)]
pub(crate) fn parse_typeshed_versions(
db: &dyn Db,
versions_file: File,
) -> Result<TypeshedVersions, TypeshedVersionsParseError> {
// TODO: Handle IO errors
let file_content = versions_file
.read_to_string(db.upcast())
.unwrap_or_default();
file_content.parse()
}
static VENDORED_VERSIONS: Lazy<TypeshedVersions> = Lazy::new(|| {
TypeshedVersions::from_str(
&vendored_typeshed_stubs()
.read_to_string("stdlib/VERSIONS")
.unwrap(),
)
.unwrap()
});
#[derive(Debug, PartialEq, Eq, Clone)]
pub(crate) struct TypeshedVersionsParseError {
line_number: Option<NonZeroU16>,
reason: TypeshedVersionsParseErrorKind,
}
@@ -20,10 +100,14 @@ impl fmt::Display for TypeshedVersionsParseError {
line_number,
reason,
} = self;
write!(
f,
"Error while parsing line {line_number} of typeshed's VERSIONS file: {reason}"
)
if let Some(line_number) = line_number {
write!(
f,
"Error while parsing line {line_number} of typeshed's VERSIONS file: {reason}"
)
} else {
write!(f, "Error while parsing typeshed's VERSIONS file: {reason}")
}
}
}
@@ -37,8 +121,8 @@ impl std::error::Error for TypeshedVersionsParseError {
}
}
#[derive(Debug, PartialEq, Eq)]
pub enum TypeshedVersionsParseErrorKind {
#[derive(Debug, PartialEq, Eq, Clone)]
pub(super) enum TypeshedVersionsParseErrorKind {
TooManyLines(NonZeroUsize),
UnexpectedNumberOfColons,
InvalidModuleName(String),
@@ -81,38 +165,94 @@ impl fmt::Display for TypeshedVersionsParseErrorKind {
}
#[derive(Debug, PartialEq, Eq)]
pub struct TypeshedVersions(FxHashMap<ModuleName, PyVersionRange>);
pub(crate) struct TypeshedVersions(FxHashMap<ModuleName, PyVersionRange>);
impl TypeshedVersions {
pub fn len(&self) -> usize {
self.0.len()
#[must_use]
fn exact(&self, module_name: &ModuleName) -> Option<&PyVersionRange> {
self.0.get(module_name)
}
pub fn is_empty(&self) -> bool {
self.0.is_empty()
}
pub fn contains_module(&self, module_name: &ModuleName) -> bool {
self.0.contains_key(module_name)
}
pub fn module_exists_on_version(
#[must_use]
fn query_module(
&self,
module: ModuleName,
version: impl Into<PyVersion>,
) -> bool {
let version = version.into();
let mut module: Option<ModuleName> = Some(module);
while let Some(module_to_try) = module {
if let Some(range) = self.0.get(&module_to_try) {
return range.contains(version);
module: &ModuleName,
target_version: PythonVersion,
) -> TypeshedVersionsQueryResult {
if let Some(range) = self.exact(module) {
if range.contains(target_version) {
TypeshedVersionsQueryResult::Exists
} else {
TypeshedVersionsQueryResult::DoesNotExist
}
module = module_to_try.parent();
} else {
let mut module = module.parent();
while let Some(module_to_try) = module {
if let Some(range) = self.exact(&module_to_try) {
return {
if range.contains(target_version) {
TypeshedVersionsQueryResult::MaybeExists
} else {
TypeshedVersionsQueryResult::DoesNotExist
}
};
}
module = module_to_try.parent();
}
TypeshedVersionsQueryResult::DoesNotExist
}
false
}
}
/// Possible answers [`LazyTypeshedVersions::query_module()`] could give to the question:
/// "Does this module exist in the stdlib at runtime on a certain target version?"
#[derive(Debug, Copy, PartialEq, Eq, Clone, Hash)]
pub(crate) enum TypeshedVersionsQueryResult {
/// The module definitely exists in the stdlib at runtime on the user-specified target version.
///
/// For example:
/// - The target version is Python 3.8
/// - We're querying whether the `asyncio.tasks` module exists in the stdlib
/// - The VERSIONS file contains the line `asyncio.tasks: 3.8-`
Exists,
/// The module definitely does not exist in the stdlib on the user-specified target version.
///
/// For example:
/// - We're querying whether the `foo` module exists in the stdlib
/// - There is no top-level `foo` module in VERSIONS
///
/// OR:
/// - The target version is Python 3.8
/// - We're querying whether the module `importlib.abc` exists in the stdlib
/// - The VERSIONS file contains the line `importlib.abc: 3.10-`,
/// indicating that the module was added in 3.10
///
/// OR:
/// - The target version is Python 3.8
/// - We're querying whether the module `collections.abc` exists in the stdlib
/// - The VERSIONS file does not contain any information about the `collections.abc` submodule,
/// but *does* contain the line `collections: 3.10-`,
/// indicating that the entire `collections` package was added in Python 3.10.
DoesNotExist,
/// The module potentially exists in the stdlib and, if it does,
/// it definitely exists on the user-specified target version.
///
/// This variant is only relevant for submodules,
/// for which the typeshed VERSIONS file does not provide comprehensive information.
/// (The VERSIONS file is guaranteed to provide information about all top-level stdlib modules and packages,
/// but not necessarily about all submodules within each top-level package.)
///
/// For example:
/// - The target version is Python 3.8
/// - We're querying whether the `asyncio.staggered` module exists in the stdlib
/// - The typeshed VERSIONS file contains the line `asyncio: 3.8`,
/// indicating that the `asyncio` package was added in Python 3.8,
/// but does not contain any explicit information about the `asyncio.staggered` submodule.
MaybeExists,
}
impl FromStr for TypeshedVersions {
type Err = TypeshedVersionsParseError;
@@ -125,7 +265,7 @@ impl FromStr for TypeshedVersions {
let Ok(line_number) = NonZeroU16::try_from(line_number) else {
return Err(TypeshedVersionsParseError {
line_number: NonZeroU16::MAX,
line_number: None,
reason: TypeshedVersionsParseErrorKind::TooManyLines(line_number),
});
};
@@ -141,14 +281,14 @@ impl FromStr for TypeshedVersions {
let (Some(module_name), Some(rest), None) = (parts.next(), parts.next(), parts.next())
else {
return Err(TypeshedVersionsParseError {
line_number,
line_number: Some(line_number),
reason: TypeshedVersionsParseErrorKind::UnexpectedNumberOfColons,
});
};
let Some(module_name) = ModuleName::new(module_name) else {
return Err(TypeshedVersionsParseError {
line_number,
line_number: Some(line_number),
reason: TypeshedVersionsParseErrorKind::InvalidModuleName(
module_name.to_string(),
),
@@ -159,7 +299,7 @@ impl FromStr for TypeshedVersions {
Ok(version) => map.insert(module_name, version),
Err(reason) => {
return Err(TypeshedVersionsParseError {
line_number,
line_number: Some(line_number),
reason,
})
}
@@ -180,14 +320,15 @@ impl fmt::Display for TypeshedVersions {
}
}
#[derive(Debug, Clone, Eq, PartialEq)]
#[derive(Debug, Clone, Eq, PartialEq, Hash)]
enum PyVersionRange {
AvailableFrom(RangeFrom<PyVersion>),
AvailableWithin(RangeInclusive<PyVersion>),
AvailableFrom(RangeFrom<PythonVersion>),
AvailableWithin(RangeInclusive<PythonVersion>),
}
impl PyVersionRange {
fn contains(&self, version: PyVersion) -> bool {
#[must_use]
fn contains(&self, version: PythonVersion) -> bool {
match self {
Self::AvailableFrom(inner) => inner.contains(&version),
Self::AvailableWithin(inner) => inner.contains(&version),
@@ -201,9 +342,14 @@ impl FromStr for PyVersionRange {
fn from_str(s: &str) -> Result<Self, Self::Err> {
let mut parts = s.split('-').map(str::trim);
match (parts.next(), parts.next(), parts.next()) {
(Some(lower), Some(""), None) => Ok(Self::AvailableFrom((lower.parse()?)..)),
(Some(lower), Some(""), None) => {
let lower = PythonVersion::from_versions_file_string(lower)?;
Ok(Self::AvailableFrom(lower..))
}
(Some(lower), Some(upper), None) => {
Ok(Self::AvailableWithin((lower.parse()?)..=(upper.parse()?)))
let lower = PythonVersion::from_versions_file_string(lower)?;
let upper = PythonVersion::from_versions_file_string(upper)?;
Ok(Self::AvailableWithin(lower..=upper))
}
_ => Err(TypeshedVersionsParseErrorKind::UnexpectedNumberOfHyphens),
}
@@ -221,87 +367,20 @@ impl fmt::Display for PyVersionRange {
}
}
#[derive(Debug, Clone, Copy, Eq, PartialEq, Ord, PartialOrd, Hash)]
pub struct PyVersion {
major: u8,
minor: u8,
}
impl FromStr for PyVersion {
type Err = TypeshedVersionsParseErrorKind;
fn from_str(s: &str) -> Result<Self, Self::Err> {
impl PythonVersion {
fn from_versions_file_string(s: &str) -> Result<Self, TypeshedVersionsParseErrorKind> {
let mut parts = s.split('.').map(str::trim);
let (Some(major), Some(minor), None) = (parts.next(), parts.next(), parts.next()) else {
return Err(TypeshedVersionsParseErrorKind::UnexpectedNumberOfPeriods(
s.to_string(),
));
};
let major = match u8::from_str(major) {
Ok(major) => major,
Err(err) => {
return Err(TypeshedVersionsParseErrorKind::IntegerParsingFailure {
version: s.to_string(),
err,
})
PythonVersion::try_from((major, minor)).map_err(|int_parse_error| {
TypeshedVersionsParseErrorKind::IntegerParsingFailure {
version: s.to_string(),
err: int_parse_error,
}
};
let minor = match u8::from_str(minor) {
Ok(minor) => minor,
Err(err) => {
return Err(TypeshedVersionsParseErrorKind::IntegerParsingFailure {
version: s.to_string(),
err,
})
}
};
Ok(Self { major, minor })
}
}
impl fmt::Display for PyVersion {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let PyVersion { major, minor } = self;
write!(f, "{major}.{minor}")
}
}
// TODO: unify with the PythonVersion enum in the linter/formatter crates?
#[derive(Copy, Clone, Hash, Debug, PartialEq, Eq, PartialOrd, Ord, Default)]
pub enum SupportedPyVersion {
Py37,
#[default]
Py38,
Py39,
Py310,
Py311,
Py312,
Py313,
}
impl From<SupportedPyVersion> for PyVersion {
fn from(value: SupportedPyVersion) -> Self {
match value {
SupportedPyVersion::Py37 => PyVersion { major: 3, minor: 7 },
SupportedPyVersion::Py38 => PyVersion { major: 3, minor: 8 },
SupportedPyVersion::Py39 => PyVersion { major: 3, minor: 9 },
SupportedPyVersion::Py310 => PyVersion {
major: 3,
minor: 10,
},
SupportedPyVersion::Py311 => PyVersion {
major: 3,
minor: 11,
},
SupportedPyVersion::Py312 => PyVersion {
major: 3,
minor: 12,
},
SupportedPyVersion::Py313 => PyVersion {
major: 3,
minor: 13,
},
}
})
}
}
@@ -310,14 +389,26 @@ mod tests {
use std::num::{IntErrorKind, NonZeroU16};
use std::path::Path;
use super::*;
use insta::assert_snapshot;
use super::*;
const TYPESHED_STDLIB_DIR: &str = "stdlib";
#[allow(unsafe_code)]
const ONE: NonZeroU16 = unsafe { NonZeroU16::new_unchecked(1) };
const ONE: Option<NonZeroU16> = Some(unsafe { NonZeroU16::new_unchecked(1) });
impl TypeshedVersions {
#[must_use]
fn contains_exact(&self, module: &ModuleName) -> bool {
self.exact(module).is_some()
}
#[must_use]
fn len(&self) -> usize {
self.0.len()
}
}
#[test]
fn can_parse_vendored_versions_file() {
@@ -334,23 +425,36 @@ mod tests {
let asyncio_staggered = ModuleName::new_static("asyncio.staggered").unwrap();
let audioop = ModuleName::new_static("audioop").unwrap();
assert!(versions.contains_module(&asyncio));
assert!(versions.module_exists_on_version(asyncio, SupportedPyVersion::Py310));
assert!(versions.contains_module(&asyncio_staggered));
assert!(
versions.module_exists_on_version(asyncio_staggered.clone(), SupportedPyVersion::Py38)
assert!(versions.contains_exact(&asyncio));
assert_eq!(
versions.query_module(&asyncio, PythonVersion::PY310),
TypeshedVersionsQueryResult::Exists
);
assert!(!versions.module_exists_on_version(asyncio_staggered, SupportedPyVersion::Py37));
assert!(versions.contains_module(&audioop));
assert!(versions.module_exists_on_version(audioop.clone(), SupportedPyVersion::Py312));
assert!(!versions.module_exists_on_version(audioop, SupportedPyVersion::Py313));
assert!(versions.contains_exact(&asyncio_staggered));
assert_eq!(
versions.query_module(&asyncio_staggered, PythonVersion::PY38),
TypeshedVersionsQueryResult::Exists
);
assert_eq!(
versions.query_module(&asyncio_staggered, PythonVersion::PY37),
TypeshedVersionsQueryResult::DoesNotExist
);
assert!(versions.contains_exact(&audioop));
assert_eq!(
versions.query_module(&audioop, PythonVersion::PY312),
TypeshedVersionsQueryResult::Exists
);
assert_eq!(
versions.query_module(&audioop, PythonVersion::PY313),
TypeshedVersionsQueryResult::DoesNotExist
);
}
#[test]
fn typeshed_versions_consistent_with_vendored_stubs() {
const VERSIONS_DATA: &str = include_str!("../../vendor/typeshed/stdlib/VERSIONS");
const VERSIONS_DATA: &str = include_str!("../../../vendor/typeshed/stdlib/VERSIONS");
let vendored_typeshed_dir = Path::new("vendor/typeshed").canonicalize().unwrap();
let vendored_typeshed_versions = TypeshedVersions::from_str(VERSIONS_DATA).unwrap();
@@ -393,7 +497,7 @@ mod tests {
let top_level_module = ModuleName::new(top_level_module)
.unwrap_or_else(|| panic!("{top_level_module:?} was not a valid module name!"));
assert!(vendored_typeshed_versions.contains_module(&top_level_module));
assert!(vendored_typeshed_versions.contains_exact(&top_level_module));
}
assert!(
@@ -426,30 +530,102 @@ foo: 3.8- # trailing comment
foo: 3.8-
"###
);
}
let foo = ModuleName::new_static("foo").unwrap();
#[test]
fn version_within_range_parsed_correctly() {
let parsed_versions = TypeshedVersions::from_str("bar: 2.7-3.10").unwrap();
let bar = ModuleName::new_static("bar").unwrap();
assert!(parsed_versions.contains_exact(&bar));
assert_eq!(
parsed_versions.query_module(&bar, PythonVersion::PY37),
TypeshedVersionsQueryResult::Exists
);
assert_eq!(
parsed_versions.query_module(&bar, PythonVersion::PY310),
TypeshedVersionsQueryResult::Exists
);
assert_eq!(
parsed_versions.query_module(&bar, PythonVersion::PY311),
TypeshedVersionsQueryResult::DoesNotExist
);
}
#[test]
fn version_from_range_parsed_correctly() {
let parsed_versions = TypeshedVersions::from_str("foo: 3.8-").unwrap();
let foo = ModuleName::new_static("foo").unwrap();
assert!(parsed_versions.contains_exact(&foo));
assert_eq!(
parsed_versions.query_module(&foo, PythonVersion::PY37),
TypeshedVersionsQueryResult::DoesNotExist
);
assert_eq!(
parsed_versions.query_module(&foo, PythonVersion::PY38),
TypeshedVersionsQueryResult::Exists
);
assert_eq!(
parsed_versions.query_module(&foo, PythonVersion::PY311),
TypeshedVersionsQueryResult::Exists
);
}
#[test]
fn explicit_submodule_parsed_correctly() {
let parsed_versions = TypeshedVersions::from_str("bar.baz: 3.1-3.9").unwrap();
let bar_baz = ModuleName::new_static("bar.baz").unwrap();
assert!(parsed_versions.contains_exact(&bar_baz));
assert_eq!(
parsed_versions.query_module(&bar_baz, PythonVersion::PY37),
TypeshedVersionsQueryResult::Exists
);
assert_eq!(
parsed_versions.query_module(&bar_baz, PythonVersion::PY39),
TypeshedVersionsQueryResult::Exists
);
assert_eq!(
parsed_versions.query_module(&bar_baz, PythonVersion::PY310),
TypeshedVersionsQueryResult::DoesNotExist
);
}
#[test]
fn implicit_submodule_queried_correctly() {
let parsed_versions = TypeshedVersions::from_str("bar: 2.7-3.10").unwrap();
let bar_eggs = ModuleName::new_static("bar.eggs").unwrap();
assert!(!parsed_versions.contains_exact(&bar_eggs));
assert_eq!(
parsed_versions.query_module(&bar_eggs, PythonVersion::PY37),
TypeshedVersionsQueryResult::MaybeExists
);
assert_eq!(
parsed_versions.query_module(&bar_eggs, PythonVersion::PY310),
TypeshedVersionsQueryResult::MaybeExists
);
assert_eq!(
parsed_versions.query_module(&bar_eggs, PythonVersion::PY311),
TypeshedVersionsQueryResult::DoesNotExist
);
}
#[test]
fn nonexistent_module_queried_correctly() {
let parsed_versions = TypeshedVersions::from_str("eggs: 3.8-").unwrap();
let spam = ModuleName::new_static("spam").unwrap();
assert!(parsed_versions.contains_module(&foo));
assert!(!parsed_versions.module_exists_on_version(foo.clone(), SupportedPyVersion::Py37));
assert!(parsed_versions.module_exists_on_version(foo.clone(), SupportedPyVersion::Py38));
assert!(parsed_versions.module_exists_on_version(foo, SupportedPyVersion::Py311));
assert!(parsed_versions.contains_module(&bar));
assert!(parsed_versions.module_exists_on_version(bar.clone(), SupportedPyVersion::Py37));
assert!(parsed_versions.module_exists_on_version(bar.clone(), SupportedPyVersion::Py310));
assert!(!parsed_versions.module_exists_on_version(bar, SupportedPyVersion::Py311));
assert!(parsed_versions.contains_module(&bar_baz));
assert!(parsed_versions.module_exists_on_version(bar_baz.clone(), SupportedPyVersion::Py37));
assert!(parsed_versions.module_exists_on_version(bar_baz.clone(), SupportedPyVersion::Py39));
assert!(!parsed_versions.module_exists_on_version(bar_baz, SupportedPyVersion::Py310));
assert!(!parsed_versions.contains_module(&spam));
assert!(!parsed_versions.module_exists_on_version(spam.clone(), SupportedPyVersion::Py37));
assert!(!parsed_versions.module_exists_on_version(spam, SupportedPyVersion::Py313));
assert!(!parsed_versions.contains_exact(&spam));
assert_eq!(
parsed_versions.query_module(&spam, PythonVersion::PY37),
TypeshedVersionsQueryResult::DoesNotExist
);
assert_eq!(
parsed_versions.query_module(&spam, PythonVersion::PY313),
TypeshedVersionsQueryResult::DoesNotExist
);
}
#[test]
@@ -465,7 +641,7 @@ foo: 3.8- # trailing comment
assert_eq!(
TypeshedVersions::from_str(&massive_versions_file),
Err(TypeshedVersionsParseError {
line_number: NonZeroU16::MAX,
line_number: None,
reason: TypeshedVersionsParseErrorKind::TooManyLines(
NonZeroUsize::new(too_many + 1 - offset).unwrap()
)

View File

@@ -1,56 +0,0 @@
use std::ops::Deref;
#[derive(Debug, Clone, Eq, PartialEq, Hash)]
pub struct Name(smol_str::SmolStr);
impl Name {
#[inline]
pub fn new(name: &str) -> Self {
Self(smol_str::SmolStr::new(name))
}
#[inline]
pub fn new_static(name: &'static str) -> Self {
Self(smol_str::SmolStr::new_static(name))
}
pub fn as_str(&self) -> &str {
self.0.as_str()
}
}
impl Deref for Name {
type Target = str;
#[inline]
fn deref(&self) -> &Self::Target {
self.as_str()
}
}
impl<T> From<T> for Name
where
T: Into<smol_str::SmolStr>,
{
fn from(value: T) -> Self {
Self(value.into())
}
}
impl std::fmt::Display for Name {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.write_str(self.as_str())
}
}
impl PartialEq<str> for Name {
fn eq(&self, other: &str) -> bool {
self.as_str() == other
}
}
impl PartialEq<Name> for str {
fn eq(&self, other: &Name) -> bool {
other == self
}
}

View File

@@ -0,0 +1,46 @@
use crate::python_version::PythonVersion;
use crate::Db;
use ruff_db::system::SystemPathBuf;
use salsa::Durability;
#[salsa::input(singleton)]
pub struct Program {
pub target_version: PythonVersion,
#[return_ref]
pub search_paths: SearchPathSettings,
}
impl Program {
pub fn from_settings(db: &dyn Db, settings: ProgramSettings) -> Self {
Program::builder(settings.target_version, settings.search_paths)
.durability(Durability::HIGH)
.new(db)
}
}
#[derive(Debug, Eq, PartialEq)]
pub struct ProgramSettings {
pub target_version: PythonVersion,
pub search_paths: SearchPathSettings,
}
/// Configures the search paths for module resolution.
#[derive(Eq, PartialEq, Debug, Clone, Default)]
pub struct SearchPathSettings {
/// List of user-provided paths that should take first priority in the module resolution.
/// Examples in other type checkers are mypy's MYPYPATH environment variable,
/// or pyright's stubPath configuration setting.
pub extra_paths: Vec<SystemPathBuf>,
/// The root of the workspace, used for finding first-party modules.
pub src_root: SystemPathBuf,
/// Optional path to a "custom typeshed" directory on disk for us to use for standard-library types.
/// If this is not provided, we will fallback to our vendored typeshed stubs for the stdlib,
/// bundled as a zip file in the binary
pub custom_typeshed: Option<SystemPathBuf>,
/// The path to the user's `site-packages` directory, where third-party packages from ``PyPI`` are installed.
pub site_packages: Vec<SystemPathBuf>,
}

View File

@@ -0,0 +1,62 @@
use std::fmt;
/// Representation of a Python version.
///
/// Unlike the `TargetVersion` enums in the CLI crates,
/// this does not necessarily represent a Python version that we actually support.
#[derive(Debug, Clone, Copy, Eq, PartialEq, Ord, PartialOrd, Hash)]
pub struct PythonVersion {
pub major: u8,
pub minor: u8,
}
impl PythonVersion {
pub const PY37: PythonVersion = PythonVersion { major: 3, minor: 7 };
pub const PY38: PythonVersion = PythonVersion { major: 3, minor: 8 };
pub const PY39: PythonVersion = PythonVersion { major: 3, minor: 9 };
pub const PY310: PythonVersion = PythonVersion {
major: 3,
minor: 10,
};
pub const PY311: PythonVersion = PythonVersion {
major: 3,
minor: 11,
};
pub const PY312: PythonVersion = PythonVersion {
major: 3,
minor: 12,
};
pub const PY313: PythonVersion = PythonVersion {
major: 3,
minor: 13,
};
pub fn free_threaded_build_available(self) -> bool {
self >= PythonVersion::PY313
}
}
impl Default for PythonVersion {
fn default() -> Self {
Self::PY38
}
}
impl TryFrom<(&str, &str)> for PythonVersion {
type Error = std::num::ParseIntError;
fn try_from(value: (&str, &str)) -> Result<Self, Self::Error> {
let (major, minor) = value;
Ok(Self {
major: major.parse()?,
minor: minor.parse()?,
})
}
}
impl fmt::Display for PythonVersion {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let PythonVersion { major, minor } = self;
write!(f, "{major}.{minor}")
}
}

View File

@@ -2,25 +2,30 @@ use std::iter::FusedIterator;
use std::sync::Arc;
use rustc_hash::FxHashMap;
use salsa::DebugWithDb;
use salsa::plumbing::AsId;
use ruff_db::files::File;
use ruff_db::parsed::parsed_module;
use ruff_db::vfs::VfsFile;
use ruff_index::{IndexSlice, IndexVec};
use ruff_python_ast as ast;
use crate::node_key::NodeKey;
use crate::semantic_index::ast_ids::{AstId, AstIds, ScopeClassId, ScopeFunctionId};
use crate::semantic_index::ast_ids::node_key::ExpressionNodeKey;
use crate::semantic_index::ast_ids::AstIds;
use crate::semantic_index::builder::SemanticIndexBuilder;
use crate::semantic_index::definition::{Definition, DefinitionNodeKey};
use crate::semantic_index::expression::Expression;
use crate::semantic_index::symbol::{
FileScopeId, PublicSymbolId, Scope, ScopeId, ScopeKind, ScopedSymbolId, SymbolTable,
FileScopeId, NodeWithScopeKey, NodeWithScopeRef, Scope, ScopeId, ScopedSymbolId, SymbolTable,
};
use crate::Db;
pub(crate) use self::use_def::UseDefMap;
pub mod ast_ids;
mod builder;
pub mod definition;
pub mod expression;
pub mod symbol;
mod use_def;
type SymbolMap = hashbrown::HashMap<ScopedSymbolId, (), ()>;
@@ -28,12 +33,12 @@ type SymbolMap = hashbrown::HashMap<ScopedSymbolId, (), ()>;
///
/// Prefer using [`symbol_table`] when working with symbols from a single scope.
#[salsa::tracked(return_ref, no_eq)]
pub(crate) fn semantic_index(db: &dyn Db, file: VfsFile) -> SemanticIndex {
let _ = tracing::trace_span!("semantic_index", file = ?file.debug(db.upcast())).enter();
pub(crate) fn semantic_index(db: &dyn Db, file: File) -> SemanticIndex<'_> {
let _span = tracing::trace_span!("semantic_index", file = %file.path(db)).entered();
let parsed = parsed_module(db.upcast(), file);
SemanticIndexBuilder::new(parsed).build()
SemanticIndexBuilder::new(db, file, parsed).build()
}
/// Returns the symbol table for a specific `scope`.
@@ -43,58 +48,74 @@ pub(crate) fn semantic_index(db: &dyn Db, file: VfsFile) -> SemanticIndex {
/// is unchanged.
#[salsa::tracked]
pub(crate) fn symbol_table<'db>(db: &'db dyn Db, scope: ScopeId<'db>) -> Arc<SymbolTable> {
let _ = tracing::trace_span!("symbol_table", scope = ?scope.debug(db)).enter();
let index = semantic_index(db, scope.file(db));
let file = scope.file(db);
let _span =
tracing::trace_span!("symbol_table", scope=?scope.as_id(), file=%file.path(db)).entered();
let index = semantic_index(db, file);
index.symbol_table(scope.file_scope_id(db))
}
/// Returns the root scope of `file`.
/// Returns the use-def map for a specific `scope`.
///
/// Using [`use_def_map`] over [`semantic_index`] has the advantage that
/// Salsa can avoid invalidating dependent queries if this scope's use-def map
/// is unchanged.
#[salsa::tracked]
pub(crate) fn root_scope(db: &dyn Db, file: VfsFile) -> ScopeId<'_> {
let _ = tracing::trace_span!("root_scope", file = ?file.debug(db.upcast())).enter();
pub(crate) fn use_def_map<'db>(db: &'db dyn Db, scope: ScopeId<'db>) -> Arc<UseDefMap<'db>> {
let file = scope.file(db);
let _span =
tracing::trace_span!("use_def_map", scope=?scope.as_id(), file=%file.path(db)).entered();
let index = semantic_index(db, file);
FileScopeId::root().to_scope_id(db, file)
index.use_def_map(scope.file_scope_id(db))
}
/// Returns the symbol with the given name in `file`'s public scope or `None` if
/// no symbol with the given name exists.
pub fn public_symbol<'db>(
db: &'db dyn Db,
file: VfsFile,
name: &str,
) -> Option<PublicSymbolId<'db>> {
let root_scope = root_scope(db, file);
let symbol_table = symbol_table(db, root_scope);
let local = symbol_table.symbol_id_by_name(name)?;
Some(local.to_public_symbol(db, file))
/// Returns the module global scope of `file`.
#[salsa::tracked]
pub(crate) fn global_scope(db: &dyn Db, file: File) -> ScopeId<'_> {
let _span = tracing::trace_span!("global_scope", file = %file.path(db)).entered();
FileScopeId::global().to_scope_id(db, file)
}
/// The symbol tables for an entire file.
/// The symbol tables and use-def maps for all scopes in a file.
#[derive(Debug)]
pub struct SemanticIndex {
pub(crate) struct SemanticIndex<'db> {
/// List of all symbol tables in this file, indexed by scope.
symbol_tables: IndexVec<FileScopeId, Arc<SymbolTable>>,
/// List of all scopes in this file.
scopes: IndexVec<FileScopeId, Scope>,
/// Maps expressions to their corresponding scope.
/// Map expressions to their corresponding scope.
/// We can't use [`ExpressionId`] here, because the challenge is how to get from
/// an [`ast::Expr`] to an [`ExpressionId`] (which requires knowing the scope).
expression_scopes: FxHashMap<NodeKey, FileScopeId>,
scopes_by_expression: FxHashMap<ExpressionNodeKey, FileScopeId>,
/// Map from a node creating a definition to its definition.
definitions_by_node: FxHashMap<DefinitionNodeKey, Definition<'db>>,
/// Map from a standalone expression to its [`Expression`] ingredient.
expressions_by_node: FxHashMap<ExpressionNodeKey, Expression<'db>>,
/// Map from nodes that create a scope to the scope they create.
scopes_by_node: FxHashMap<NodeWithScopeKey, FileScopeId>,
/// Map from the file-local [`FileScopeId`] to the salsa-ingredient [`ScopeId`].
scope_ids_by_scope: IndexVec<FileScopeId, ScopeId<'db>>,
/// Use-def map for each scope in this file.
use_def_maps: IndexVec<FileScopeId, Arc<UseDefMap<'db>>>,
/// Lookup table to map between node ids and ast nodes.
///
/// Note: We should not depend on this map when analysing other files or
/// changing a file invalidates all dependents.
ast_ids: IndexVec<FileScopeId, AstIds>,
/// Map from scope to the node that introduces the scope.
scope_nodes: IndexVec<FileScopeId, NodeWithScopeId>,
}
impl SemanticIndex {
impl<'db> SemanticIndex<'db> {
/// Returns the symbol table for a specific scope.
///
/// Use the Salsa cached [`symbol_table`] query if you only need the
@@ -103,18 +124,29 @@ impl SemanticIndex {
self.symbol_tables[scope_id].clone()
}
/// Returns the use-def map for a specific scope.
///
/// Use the Salsa cached [`use_def_map`] query if you only need the
/// use-def map for a single scope.
pub(super) fn use_def_map(&self, scope_id: FileScopeId) -> Arc<UseDefMap> {
self.use_def_maps[scope_id].clone()
}
pub(crate) fn ast_ids(&self, scope_id: FileScopeId) -> &AstIds {
&self.ast_ids[scope_id]
}
/// Returns the ID of the `expression`'s enclosing scope.
pub(crate) fn expression_scope_id(&self, expression: &ast::Expr) -> FileScopeId {
self.expression_scopes[&NodeKey::from_node(expression)]
pub(crate) fn expression_scope_id(
&self,
expression: impl Into<ExpressionNodeKey>,
) -> FileScopeId {
self.scopes_by_expression[&expression.into()]
}
/// Returns the [`Scope`] of the `expression`'s enclosing scope.
#[allow(unused)]
pub(crate) fn expression_scope(&self, expression: &ast::Expr) -> &Scope {
pub(crate) fn expression_scope(&self, expression: impl Into<ExpressionNodeKey>) -> &Scope {
&self.scopes[self.expression_scope_id(expression)]
}
@@ -142,22 +174,43 @@ impl SemanticIndex {
}
/// Returns an iterator over the direct child scopes of `scope`.
#[allow(unused)]
pub(crate) fn child_scopes(&self, scope: FileScopeId) -> ChildrenIter {
ChildrenIter::new(self, scope)
}
/// Returns an iterator over all ancestors of `scope`, starting with `scope` itself.
#[allow(unused)]
pub(crate) fn ancestor_scopes(&self, scope: FileScopeId) -> AncestorsIter {
AncestorsIter::new(self, scope)
}
pub(crate) fn scope_node(&self, scope_id: FileScopeId) -> NodeWithScopeId {
self.scope_nodes[scope_id]
/// Returns the [`Definition`] salsa ingredient for `definition_key`.
pub(crate) fn definition(
&self,
definition_key: impl Into<DefinitionNodeKey>,
) -> Definition<'db> {
self.definitions_by_node[&definition_key.into()]
}
/// Returns the [`Expression`] ingredient for an expression node.
/// Panics if we have no expression ingredient for that node. We can only call this method for
/// standalone-inferable expressions, which we call `add_standalone_expression` for in
/// [`SemanticIndexBuilder`].
pub(crate) fn expression(
&self,
expression_key: impl Into<ExpressionNodeKey>,
) -> Expression<'db> {
self.expressions_by_node[&expression_key.into()]
}
/// Returns the id of the scope that `node` creates. This is different from [`Definition::scope`] which
/// returns the scope in which that definition is defined in.
pub(crate) fn node_scope(&self, node: NodeWithScopeRef) -> FileScopeId {
self.scopes_by_node[&node.node_key()]
}
}
/// ID that uniquely identifies an expression inside a [`Scope`].
pub struct AncestorsIter<'a> {
scopes: &'a IndexSlice<FileScopeId, Scope>,
next_id: Option<FileScopeId>,
@@ -248,152 +301,153 @@ impl<'a> Iterator for ChildrenIter<'a> {
}
}
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
pub(crate) enum NodeWithScopeId {
Module,
Class(AstId<ScopeClassId>),
ClassTypeParams(AstId<ScopeClassId>),
Function(AstId<ScopeFunctionId>),
FunctionTypeParams(AstId<ScopeFunctionId>),
}
impl NodeWithScopeId {
fn scope_kind(self) -> ScopeKind {
match self {
NodeWithScopeId::Module => ScopeKind::Module,
NodeWithScopeId::Class(_) => ScopeKind::Class,
NodeWithScopeId::Function(_) => ScopeKind::Function,
NodeWithScopeId::ClassTypeParams(_) | NodeWithScopeId::FunctionTypeParams(_) => {
ScopeKind::Annotation
}
}
}
}
impl FusedIterator for ChildrenIter<'_> {}
#[cfg(test)]
mod tests {
use ruff_db::files::{system_path_to_file, File};
use ruff_db::parsed::parsed_module;
use ruff_db::vfs::{system_path_to_file, VfsFile};
use ruff_db::system::DbWithTestSystem;
use ruff_python_ast as ast;
use crate::db::tests::TestDb;
use crate::semantic_index::symbol::{FileScopeId, ScopeKind, SymbolTable};
use crate::semantic_index::{root_scope, semantic_index, symbol_table};
use crate::semantic_index::ast_ids::HasScopedUseId;
use crate::semantic_index::definition::DefinitionKind;
use crate::semantic_index::symbol::{FileScopeId, Scope, ScopeKind, SymbolTable};
use crate::semantic_index::{global_scope, semantic_index, symbol_table, use_def_map};
use crate::Db;
struct TestCase {
db: TestDb,
file: VfsFile,
file: File,
}
fn test_case(content: impl ToString) -> TestCase {
let db = TestDb::new();
db.memory_file_system()
.write_file("test.py", content)
.unwrap();
let mut db = TestDb::new();
db.write_file("test.py", content).unwrap();
let file = system_path_to_file(&db, "test.py").unwrap();
TestCase { db, file }
}
fn names(table: &SymbolTable) -> Vec<&str> {
fn names(table: &SymbolTable) -> Vec<String> {
table
.symbols()
.map(|symbol| symbol.name().as_str())
.map(|symbol| symbol.name().to_string())
.collect()
}
#[test]
fn empty() {
let TestCase { db, file } = test_case("");
let root_table = symbol_table(&db, root_scope(&db, file));
let global_table = symbol_table(&db, global_scope(&db, file));
assert_eq!(names(&root_table), Vec::<&str>::new());
let global_names = names(&global_table);
assert_eq!(global_names, Vec::<&str>::new());
}
#[test]
fn simple() {
let TestCase { db, file } = test_case("x");
let root_table = symbol_table(&db, root_scope(&db, file));
let global_table = symbol_table(&db, global_scope(&db, file));
assert_eq!(names(&root_table), vec!["x"]);
assert_eq!(names(&global_table), vec!["x"]);
}
#[test]
fn annotation_only() {
let TestCase { db, file } = test_case("x: int");
let root_table = symbol_table(&db, root_scope(&db, file));
let global_table = symbol_table(&db, global_scope(&db, file));
assert_eq!(names(&root_table), vec!["int", "x"]);
assert_eq!(names(&global_table), vec!["int", "x"]);
// TODO record definition
}
#[test]
fn import() {
let TestCase { db, file } = test_case("import foo");
let root_table = symbol_table(&db, root_scope(&db, file));
let scope = global_scope(&db, file);
let global_table = symbol_table(&db, scope);
assert_eq!(names(&root_table), vec!["foo"]);
let foo = root_table.symbol_by_name("foo").unwrap();
assert_eq!(names(&global_table), vec!["foo"]);
let foo = global_table.symbol_id_by_name("foo").unwrap();
assert_eq!(foo.definitions().len(), 1);
let use_def = use_def_map(&db, scope);
let [definition] = use_def.public_definitions(foo) else {
panic!("expected one definition");
};
assert!(matches!(definition.node(&db), DefinitionKind::Import(_)));
}
#[test]
fn import_sub() {
let TestCase { db, file } = test_case("import foo.bar");
let root_table = symbol_table(&db, root_scope(&db, file));
let global_table = symbol_table(&db, global_scope(&db, file));
assert_eq!(names(&root_table), vec!["foo"]);
assert_eq!(names(&global_table), vec!["foo"]);
}
#[test]
fn import_as() {
let TestCase { db, file } = test_case("import foo.bar as baz");
let root_table = symbol_table(&db, root_scope(&db, file));
let global_table = symbol_table(&db, global_scope(&db, file));
assert_eq!(names(&root_table), vec!["baz"]);
assert_eq!(names(&global_table), vec!["baz"]);
}
#[test]
fn import_from() {
let TestCase { db, file } = test_case("from bar import foo");
let root_table = symbol_table(&db, root_scope(&db, file));
let scope = global_scope(&db, file);
let global_table = symbol_table(&db, scope);
assert_eq!(names(&root_table), vec!["foo"]);
assert_eq!(
root_table
.symbol_by_name("foo")
.unwrap()
.definitions()
.len(),
1
);
assert_eq!(names(&global_table), vec!["foo"]);
assert!(
root_table
global_table
.symbol_by_name("foo")
.is_some_and(|symbol| { symbol.is_defined() || !symbol.is_used() }),
.is_some_and(|symbol| { symbol.is_defined() && !symbol.is_used() }),
"symbols that are defined get the defined flag"
);
let use_def = use_def_map(&db, scope);
let [definition] = use_def.public_definitions(
global_table
.symbol_id_by_name("foo")
.expect("symbol to exist"),
) else {
panic!("expected one definition");
};
assert!(matches!(
definition.node(&db),
DefinitionKind::ImportFrom(_)
));
}
#[test]
fn assign() {
let TestCase { db, file } = test_case("x = foo");
let root_table = symbol_table(&db, root_scope(&db, file));
let scope = global_scope(&db, file);
let global_table = symbol_table(&db, scope);
assert_eq!(names(&root_table), vec!["foo", "x"]);
assert_eq!(
root_table.symbol_by_name("x").unwrap().definitions().len(),
1
);
assert_eq!(names(&global_table), vec!["foo", "x"]);
assert!(
root_table
global_table
.symbol_by_name("foo")
.is_some_and(|symbol| { !symbol.is_defined() && symbol.is_used() }),
"a symbol used but not defined in a scope should have only the used flag"
);
let use_def = use_def_map(&db, scope);
let [definition] =
use_def.public_definitions(global_table.symbol_id_by_name("x").expect("symbol exists"))
else {
panic!("expected one definition");
};
assert!(matches!(
definition.node(&db),
DefinitionKind::Assignment(_)
));
}
#[test]
@@ -405,25 +459,34 @@ class C:
y = 2
",
);
let root_table = symbol_table(&db, root_scope(&db, file));
let global_table = symbol_table(&db, global_scope(&db, file));
assert_eq!(names(&root_table), vec!["C", "y"]);
assert_eq!(names(&global_table), vec!["C", "y"]);
let index = semantic_index(&db, file);
let scopes: Vec<_> = index.child_scopes(FileScopeId::root()).collect();
assert_eq!(scopes.len(), 1);
let (class_scope_id, class_scope) = scopes[0];
let [(class_scope_id, class_scope)] = index
.child_scopes(FileScopeId::global())
.collect::<Vec<_>>()[..]
else {
panic!("expected one child scope")
};
assert_eq!(class_scope.kind(), ScopeKind::Class);
assert_eq!(class_scope.name(), "C");
assert_eq!(class_scope_id.to_scope_id(&db, file).name(&db), "C");
let class_table = index.symbol_table(class_scope_id);
assert_eq!(names(&class_table), vec!["x"]);
assert_eq!(
class_table.symbol_by_name("x").unwrap().definitions().len(),
1
);
let use_def = index.use_def_map(class_scope_id);
let [definition] =
use_def.public_definitions(class_table.symbol_id_by_name("x").expect("symbol exists"))
else {
panic!("expected one definition");
};
assert!(matches!(
definition.node(&db),
DefinitionKind::Assignment(_)
));
}
#[test]
@@ -436,27 +499,34 @@ y = 2
",
);
let index = semantic_index(&db, file);
let root_table = index.symbol_table(FileScopeId::root());
let global_table = index.symbol_table(FileScopeId::global());
assert_eq!(names(&root_table), vec!["func", "y"]);
assert_eq!(names(&global_table), vec!["func", "y"]);
let scopes = index.child_scopes(FileScopeId::root()).collect::<Vec<_>>();
assert_eq!(scopes.len(), 1);
let (function_scope_id, function_scope) = scopes[0];
let [(function_scope_id, function_scope)] = index
.child_scopes(FileScopeId::global())
.collect::<Vec<_>>()[..]
else {
panic!("expected one child scope")
};
assert_eq!(function_scope.kind(), ScopeKind::Function);
assert_eq!(function_scope.name(), "func");
assert_eq!(function_scope_id.to_scope_id(&db, file).name(&db), "func");
let function_table = index.symbol_table(function_scope_id);
assert_eq!(names(&function_table), vec!["x"]);
assert_eq!(
let use_def = index.use_def_map(function_scope_id);
let [definition] = use_def.public_definitions(
function_table
.symbol_by_name("x")
.unwrap()
.definitions()
.len(),
1
);
.symbol_id_by_name("x")
.expect("symbol exists"),
) else {
panic!("expected one definition");
};
assert!(matches!(
definition.node(&db),
DefinitionKind::Assignment(_)
));
}
#[test]
@@ -470,32 +540,36 @@ def func():
",
);
let index = semantic_index(&db, file);
let root_table = index.symbol_table(FileScopeId::root());
let global_table = index.symbol_table(FileScopeId::global());
assert_eq!(names(&root_table), vec!["func"]);
let scopes: Vec<_> = index.child_scopes(FileScopeId::root()).collect();
assert_eq!(scopes.len(), 2);
let (func_scope1_id, func_scope_1) = scopes[0];
let (func_scope2_id, func_scope_2) = scopes[1];
assert_eq!(names(&global_table), vec!["func"]);
let [(func_scope1_id, func_scope_1), (func_scope2_id, func_scope_2)] = index
.child_scopes(FileScopeId::global())
.collect::<Vec<_>>()[..]
else {
panic!("expected two child scopes");
};
assert_eq!(func_scope_1.kind(), ScopeKind::Function);
assert_eq!(func_scope_1.name(), "func");
assert_eq!(func_scope1_id.to_scope_id(&db, file).name(&db), "func");
assert_eq!(func_scope_2.kind(), ScopeKind::Function);
assert_eq!(func_scope_2.name(), "func");
assert_eq!(func_scope2_id.to_scope_id(&db, file).name(&db), "func");
let func1_table = index.symbol_table(func_scope1_id);
let func2_table = index.symbol_table(func_scope2_id);
assert_eq!(names(&func1_table), vec!["x"]);
assert_eq!(names(&func2_table), vec!["y"]);
assert_eq!(
root_table
.symbol_by_name("func")
.unwrap()
.definitions()
.len(),
2
);
let use_def = index.use_def_map(FileScopeId::global());
let [definition] = use_def.public_definitions(
global_table
.symbol_id_by_name("func")
.expect("symbol exists"),
) else {
panic!("expected one definition");
};
assert!(matches!(definition.node(&db), DefinitionKind::Function(_)));
}
#[test]
@@ -508,24 +582,29 @@ def func[T]():
);
let index = semantic_index(&db, file);
let root_table = index.symbol_table(FileScopeId::root());
let global_table = index.symbol_table(FileScopeId::global());
assert_eq!(names(&root_table), vec!["func"]);
assert_eq!(names(&global_table), vec!["func"]);
let scopes: Vec<_> = index.child_scopes(FileScopeId::root()).collect();
assert_eq!(scopes.len(), 1);
let (ann_scope_id, ann_scope) = scopes[0];
let [(ann_scope_id, ann_scope)] = index
.child_scopes(FileScopeId::global())
.collect::<Vec<_>>()[..]
else {
panic!("expected one child scope");
};
assert_eq!(ann_scope.kind(), ScopeKind::Annotation);
assert_eq!(ann_scope.name(), "func");
assert_eq!(ann_scope_id.to_scope_id(&db, file).name(&db), "func");
let ann_table = index.symbol_table(ann_scope_id);
assert_eq!(names(&ann_table), vec!["T"]);
let scopes: Vec<_> = index.child_scopes(ann_scope_id).collect();
assert_eq!(scopes.len(), 1);
let (func_scope_id, func_scope) = scopes[0];
let [(func_scope_id, func_scope)] =
index.child_scopes(ann_scope_id).collect::<Vec<_>>()[..]
else {
panic!("expected one child scope");
};
assert_eq!(func_scope.kind(), ScopeKind::Function);
assert_eq!(func_scope.name(), "func");
assert_eq!(func_scope_id.to_scope_id(&db, file).name(&db), "func");
let func_table = index.symbol_table(func_scope_id);
assert_eq!(names(&func_table), vec!["x"]);
}
@@ -540,16 +619,19 @@ class C[T]:
);
let index = semantic_index(&db, file);
let root_table = index.symbol_table(FileScopeId::root());
let global_table = index.symbol_table(FileScopeId::global());
assert_eq!(names(&root_table), vec!["C"]);
assert_eq!(names(&global_table), vec!["C"]);
let scopes: Vec<_> = index.child_scopes(FileScopeId::root()).collect();
let [(ann_scope_id, ann_scope)] = index
.child_scopes(FileScopeId::global())
.collect::<Vec<_>>()[..]
else {
panic!("expected one child scope");
};
assert_eq!(scopes.len(), 1);
let (ann_scope_id, ann_scope) = scopes[0];
assert_eq!(ann_scope.kind(), ScopeKind::Annotation);
assert_eq!(ann_scope.name(), "C");
assert_eq!(ann_scope_id.to_scope_id(&db, file).name(&db), "C");
let ann_table = index.symbol_table(ann_scope_id);
assert_eq!(names(&ann_table), vec!["T"]);
assert!(
@@ -559,48 +641,49 @@ class C[T]:
"type parameters are defined by the scope that introduces them"
);
let scopes: Vec<_> = index.child_scopes(ann_scope_id).collect();
assert_eq!(scopes.len(), 1);
let (func_scope_id, func_scope) = scopes[0];
let [(class_scope_id, class_scope)] =
index.child_scopes(ann_scope_id).collect::<Vec<_>>()[..]
else {
panic!("expected one child scope");
};
assert_eq!(func_scope.kind(), ScopeKind::Class);
assert_eq!(func_scope.name(), "C");
assert_eq!(names(&index.symbol_table(func_scope_id)), vec!["x"]);
assert_eq!(class_scope.kind(), ScopeKind::Class);
assert_eq!(class_scope_id.to_scope_id(&db, file).name(&db), "C");
assert_eq!(names(&index.symbol_table(class_scope_id)), vec!["x"]);
}
// TODO: After porting the control flow graph.
// #[test]
// fn reachability_trivial() {
// let parsed = parse("x = 1; x");
// let ast = parsed.syntax();
// let index = SemanticIndex::from_ast(ast);
// let table = &index.symbol_table;
// let x_sym = table
// .root_symbol_id_by_name("x")
// .expect("x symbol should exist");
// let ast::Stmt::Expr(ast::StmtExpr { value: x_use, .. }) = &ast.body[1] else {
// panic!("should be an expr")
// };
// let x_defs: Vec<_> = index
// .reachable_definitions(x_sym, x_use)
// .map(|constrained_definition| constrained_definition.definition)
// .collect();
// assert_eq!(x_defs.len(), 1);
// let Definition::Assignment(node_key) = &x_defs[0] else {
// panic!("def should be an assignment")
// };
// let Some(def_node) = node_key.resolve(ast.into()) else {
// panic!("node key should resolve")
// };
// let ast::Expr::NumberLiteral(ast::ExprNumberLiteral {
// value: ast::Number::Int(num),
// ..
// }) = &*def_node.value
// else {
// panic!("should be a number literal")
// };
// assert_eq!(*num, 1);
// }
#[test]
fn reachability_trivial() {
let TestCase { db, file } = test_case("x = 1; x");
let parsed = parsed_module(&db, file);
let scope = global_scope(&db, file);
let ast = parsed.syntax();
let ast::Stmt::Expr(ast::StmtExpr {
value: x_use_expr, ..
}) = &ast.body[1]
else {
panic!("should be an expr")
};
let ast::Expr::Name(x_use_expr_name) = x_use_expr.as_ref() else {
panic!("expected a Name");
};
let x_use_id = x_use_expr_name.scoped_use_id(&db, scope);
let use_def = use_def_map(&db, scope);
let [definition] = use_def.use_definitions(x_use_id) else {
panic!("expected one definition");
};
let DefinitionKind::Assignment(assignment) = definition.node(&db) else {
panic!("should be an assignment definition")
};
let ast::Expr::NumberLiteral(ast::ExprNumberLiteral {
value: ast::Number::Int(num),
..
}) = &*assignment.assignment().value
else {
panic!("should be a number literal")
};
assert_eq!(*num, 1);
}
#[test]
fn expression_scope() {
@@ -614,7 +697,7 @@ class C[T]:
let x = &x_stmt.targets[0];
assert_eq!(index.expression_scope(x).kind(), ScopeKind::Module);
assert_eq!(index.expression_scope_id(x), FileScopeId::root());
assert_eq!(index.expression_scope_id(x), FileScopeId::global());
let def = ast.body[1].as_function_def_stmt().unwrap();
let y_stmt = def.body[0].as_assign_stmt().unwrap();
@@ -625,6 +708,17 @@ class C[T]:
#[test]
fn scope_iterators() {
fn scope_names<'a>(
scopes: impl Iterator<Item = (FileScopeId, &'a Scope)>,
db: &'a dyn Db,
file: File,
) -> Vec<&'a str> {
scopes
.into_iter()
.map(|(scope_id, _)| scope_id.to_scope_id(db, file).name(db))
.collect()
}
let TestCase { db, file } = test_case(
r#"
class Test:
@@ -640,35 +734,32 @@ def x():
let index = semantic_index(&db, file);
let descendents: Vec<_> = index
.descendent_scopes(FileScopeId::root())
.map(|(_, scope)| scope.name().as_str())
.collect();
assert_eq!(descendents, vec!["Test", "foo", "bar", "baz", "x"]);
let descendents = index.descendent_scopes(FileScopeId::global());
assert_eq!(
scope_names(descendents, &db, file),
vec!["Test", "foo", "bar", "baz", "x"]
);
let children: Vec<_> = index
.child_scopes(FileScopeId::root())
.map(|(_, scope)| scope.name.as_str())
.collect();
assert_eq!(children, vec!["Test", "x"]);
let children = index.child_scopes(FileScopeId::global());
assert_eq!(scope_names(children, &db, file), vec!["Test", "x"]);
let test_class = index.child_scopes(FileScopeId::root()).next().unwrap().0;
let test_child_scopes: Vec<_> = index
.child_scopes(test_class)
.map(|(_, scope)| scope.name.as_str())
.collect();
assert_eq!(test_child_scopes, vec!["foo", "baz"]);
let test_class = index.child_scopes(FileScopeId::global()).next().unwrap().0;
let test_child_scopes = index.child_scopes(test_class);
assert_eq!(
scope_names(test_child_scopes, &db, file),
vec!["foo", "baz"]
);
let bar_scope = index
.descendent_scopes(FileScopeId::root())
.descendent_scopes(FileScopeId::global())
.nth(2)
.unwrap()
.0;
let ancestors: Vec<_> = index
.ancestor_scopes(bar_scope)
.map(|(_, scope)| scope.name())
.collect();
let ancestors = index.ancestor_scopes(bar_scope);
assert_eq!(ancestors, vec!["bar", "foo", "Test", "<module>"]);
assert_eq!(
scope_names(ancestors, &db, file),
vec!["bar", "foo", "Test", "<module>"]
);
}
}

View File

@@ -1,15 +1,12 @@
use rustc_hash::FxHashMap;
use ruff_db::parsed::ParsedModule;
use ruff_db::vfs::VfsFile;
use ruff_index::{newtype_index, IndexVec};
use ruff_index::newtype_index;
use ruff_python_ast as ast;
use ruff_python_ast::AnyNodeRef;
use ruff_python_ast::ExpressionRef;
use crate::ast_node_ref::AstNodeRef;
use crate::node_key::NodeKey;
use crate::semantic_index::ast_ids::node_key::ExpressionNodeKey;
use crate::semantic_index::semantic_index;
use crate::semantic_index::symbol::{FileScopeId, ScopeId};
use crate::semantic_index::symbol::ScopeId;
use crate::Db;
/// AST ids for a single scope.
@@ -27,41 +24,21 @@ use crate::Db;
///
/// x = foo()
/// ```
#[derive(Debug)]
pub(crate) struct AstIds {
/// Maps expression ids to their expressions.
expressions: IndexVec<ScopeExpressionId, AstNodeRef<ast::Expr>>,
/// Maps expressions to their expression id. Uses `NodeKey` because it avoids cloning [`Parsed`].
expressions_map: FxHashMap<NodeKey, ScopeExpressionId>,
statements: IndexVec<ScopeStatementId, AstNodeRef<ast::Stmt>>,
statements_map: FxHashMap<NodeKey, ScopeStatementId>,
expressions_map: FxHashMap<ExpressionNodeKey, ScopedExpressionId>,
/// Maps expressions which "use" a symbol (that is, [`ExprName`]) to a use id.
uses_map: FxHashMap<ExpressionNodeKey, ScopedUseId>,
}
impl AstIds {
fn statement_id<'a, N>(&self, node: N) -> ScopeStatementId
where
N: Into<AnyNodeRef<'a>>,
{
self.statements_map[&NodeKey::from_node(node.into())]
fn expression_id(&self, key: impl Into<ExpressionNodeKey>) -> ScopedExpressionId {
self.expressions_map[&key.into()]
}
fn expression_id<'a, N>(&self, node: N) -> ScopeExpressionId
where
N: Into<AnyNodeRef<'a>>,
{
self.expressions_map[&NodeKey::from_node(node.into())]
}
}
#[allow(clippy::missing_fields_in_debug)]
impl std::fmt::Debug for AstIds {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("AstIds")
.field("expressions", &self.expressions)
.field("statements", &self.statements)
.finish()
fn use_id(&self, key: impl Into<ExpressionNodeKey>) -> ScopedUseId {
self.uses_map[&key.into()]
}
}
@@ -69,325 +46,165 @@ fn ast_ids<'db>(db: &'db dyn Db, scope: ScopeId) -> &'db AstIds {
semantic_index(db, scope.file(db)).ast_ids(scope.file_scope_id(db))
}
/// Node that can be uniquely identified by an id in a [`FileScopeId`].
pub trait ScopeAstIdNode {
pub trait HasScopedUseId {
/// The type of the ID uniquely identifying the use.
type Id: Copy;
/// Returns the ID that uniquely identifies the use in `scope`.
fn scoped_use_id(&self, db: &dyn Db, scope: ScopeId) -> Self::Id;
}
/// Uniquely identifies a use of a name in a [`crate::semantic_index::symbol::FileScopeId`].
#[newtype_index]
pub struct ScopedUseId;
impl HasScopedUseId for ast::ExprName {
type Id = ScopedUseId;
fn scoped_use_id(&self, db: &dyn Db, scope: ScopeId) -> Self::Id {
let expression_ref = ExpressionRef::from(self);
expression_ref.scoped_use_id(db, scope)
}
}
impl HasScopedUseId for ast::ExpressionRef<'_> {
type Id = ScopedUseId;
fn scoped_use_id(&self, db: &dyn Db, scope: ScopeId) -> Self::Id {
let ast_ids = ast_ids(db, scope);
ast_ids.use_id(*self)
}
}
pub trait HasScopedAstId {
/// The type of the ID uniquely identifying the node.
type Id: Copy;
/// Returns the ID that uniquely identifies the node in `scope`.
///
/// ## Panics
/// Panics if the node doesn't belong to `file` or is outside `scope`.
fn scope_ast_id(&self, db: &dyn Db, file: VfsFile, scope: FileScopeId) -> Self::Id;
/// Looks up the AST node by its ID.
///
/// ## Panics
/// May panic if the `id` does not belong to the AST of `file`, or is outside `scope`.
fn lookup_in_scope(db: &dyn Db, file: VfsFile, scope: FileScopeId, id: Self::Id) -> &Self
where
Self: Sized;
fn scoped_ast_id(&self, db: &dyn Db, scope: ScopeId) -> Self::Id;
}
/// Extension trait for AST nodes that can be resolved by an `AstId`.
pub trait AstIdNode {
type ScopeId: Copy;
/// Resolves the AST id of the node.
///
/// ## Panics
/// May panic if the node does not belongs to `file`'s AST or is outside of `scope`. It may also
/// return an incorrect node if that's the case.
fn ast_id(&self, db: &dyn Db, file: VfsFile, scope: FileScopeId) -> AstId<Self::ScopeId>;
/// Resolves the AST node for `id`.
///
/// ## Panics
/// May panic if the `id` does not belong to the AST of `file` or it returns an incorrect node.
fn lookup(db: &dyn Db, file: VfsFile, id: AstId<Self::ScopeId>) -> &Self
where
Self: Sized;
}
impl<T> AstIdNode for T
where
T: ScopeAstIdNode,
{
type ScopeId = T::Id;
fn ast_id(&self, db: &dyn Db, file: VfsFile, scope: FileScopeId) -> AstId<Self::ScopeId> {
let in_scope_id = self.scope_ast_id(db, file, scope);
AstId { scope, in_scope_id }
}
fn lookup(db: &dyn Db, file: VfsFile, id: AstId<Self::ScopeId>) -> &Self
where
Self: Sized,
{
let scope = id.scope;
Self::lookup_in_scope(db, file, scope, id.in_scope_id)
}
}
/// Uniquely identifies an AST node in a file.
#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)]
pub struct AstId<L: Copy> {
/// The node's scope.
scope: FileScopeId,
/// The ID of the node inside [`Self::scope`].
in_scope_id: L,
}
impl<L: Copy> AstId<L> {
pub(super) fn new(scope: FileScopeId, in_scope_id: L) -> Self {
Self { scope, in_scope_id }
}
pub(super) fn in_scope_id(self) -> L {
self.in_scope_id
}
}
/// Uniquely identifies an [`ast::Expr`] in a [`FileScopeId`].
/// Uniquely identifies an [`ast::Expr`] in a [`crate::semantic_index::symbol::FileScopeId`].
#[newtype_index]
pub struct ScopeExpressionId;
pub struct ScopedExpressionId;
impl ScopeAstIdNode for ast::Expr {
type Id = ScopeExpressionId;
macro_rules! impl_has_scoped_expression_id {
($ty: ty) => {
impl HasScopedAstId for $ty {
type Id = ScopedExpressionId;
fn scope_ast_id(&self, db: &dyn Db, file: VfsFile, file_scope: FileScopeId) -> Self::Id {
let scope = file_scope.to_scope_id(db, file);
let ast_ids = ast_ids(db, scope);
ast_ids.expressions_map[&NodeKey::from_node(self)]
}
fn lookup_in_scope(db: &dyn Db, file: VfsFile, file_scope: FileScopeId, id: Self::Id) -> &Self {
let scope = file_scope.to_scope_id(db, file);
let ast_ids = ast_ids(db, scope);
ast_ids.expressions[id].node()
}
fn scoped_ast_id(&self, db: &dyn Db, scope: ScopeId) -> Self::Id {
let expression_ref = ExpressionRef::from(self);
expression_ref.scoped_ast_id(db, scope)
}
}
};
}
/// Uniquely identifies an [`ast::Stmt`] in a [`FileScopeId`].
#[newtype_index]
pub struct ScopeStatementId;
impl_has_scoped_expression_id!(ast::ExprBoolOp);
impl_has_scoped_expression_id!(ast::ExprName);
impl_has_scoped_expression_id!(ast::ExprBinOp);
impl_has_scoped_expression_id!(ast::ExprUnaryOp);
impl_has_scoped_expression_id!(ast::ExprLambda);
impl_has_scoped_expression_id!(ast::ExprIf);
impl_has_scoped_expression_id!(ast::ExprDict);
impl_has_scoped_expression_id!(ast::ExprSet);
impl_has_scoped_expression_id!(ast::ExprListComp);
impl_has_scoped_expression_id!(ast::ExprSetComp);
impl_has_scoped_expression_id!(ast::ExprDictComp);
impl_has_scoped_expression_id!(ast::ExprGenerator);
impl_has_scoped_expression_id!(ast::ExprAwait);
impl_has_scoped_expression_id!(ast::ExprYield);
impl_has_scoped_expression_id!(ast::ExprYieldFrom);
impl_has_scoped_expression_id!(ast::ExprCompare);
impl_has_scoped_expression_id!(ast::ExprCall);
impl_has_scoped_expression_id!(ast::ExprFString);
impl_has_scoped_expression_id!(ast::ExprStringLiteral);
impl_has_scoped_expression_id!(ast::ExprBytesLiteral);
impl_has_scoped_expression_id!(ast::ExprNumberLiteral);
impl_has_scoped_expression_id!(ast::ExprBooleanLiteral);
impl_has_scoped_expression_id!(ast::ExprNoneLiteral);
impl_has_scoped_expression_id!(ast::ExprEllipsisLiteral);
impl_has_scoped_expression_id!(ast::ExprAttribute);
impl_has_scoped_expression_id!(ast::ExprSubscript);
impl_has_scoped_expression_id!(ast::ExprStarred);
impl_has_scoped_expression_id!(ast::ExprNamed);
impl_has_scoped_expression_id!(ast::ExprList);
impl_has_scoped_expression_id!(ast::ExprTuple);
impl_has_scoped_expression_id!(ast::ExprSlice);
impl_has_scoped_expression_id!(ast::ExprIpyEscapeCommand);
impl_has_scoped_expression_id!(ast::Expr);
impl ScopeAstIdNode for ast::Stmt {
type Id = ScopeStatementId;
impl HasScopedAstId for ast::ExpressionRef<'_> {
type Id = ScopedExpressionId;
fn scope_ast_id(&self, db: &dyn Db, file: VfsFile, file_scope: FileScopeId) -> Self::Id {
let scope = file_scope.to_scope_id(db, file);
fn scoped_ast_id(&self, db: &dyn Db, scope: ScopeId) -> Self::Id {
let ast_ids = ast_ids(db, scope);
ast_ids.statement_id(self)
}
fn lookup_in_scope(db: &dyn Db, file: VfsFile, file_scope: FileScopeId, id: Self::Id) -> &Self {
let scope = file_scope.to_scope_id(db, file);
let ast_ids = ast_ids(db, scope);
ast_ids.statements[id].node()
}
}
#[derive(Debug, Eq, PartialEq, Copy, Clone, Hash)]
pub struct ScopeFunctionId(pub(super) ScopeStatementId);
impl ScopeAstIdNode for ast::StmtFunctionDef {
type Id = ScopeFunctionId;
fn scope_ast_id(&self, db: &dyn Db, file: VfsFile, file_scope: FileScopeId) -> Self::Id {
let scope = file_scope.to_scope_id(db, file);
let ast_ids = ast_ids(db, scope);
ScopeFunctionId(ast_ids.statement_id(self))
}
fn lookup_in_scope(db: &dyn Db, file: VfsFile, scope: FileScopeId, id: Self::Id) -> &Self {
ast::Stmt::lookup_in_scope(db, file, scope, id.0)
.as_function_def_stmt()
.unwrap()
}
}
#[derive(Debug, Eq, PartialEq, Copy, Clone, Hash)]
pub struct ScopeClassId(pub(super) ScopeStatementId);
impl ScopeAstIdNode for ast::StmtClassDef {
type Id = ScopeClassId;
fn scope_ast_id(&self, db: &dyn Db, file: VfsFile, file_scope: FileScopeId) -> Self::Id {
let scope = file_scope.to_scope_id(db, file);
let ast_ids = ast_ids(db, scope);
ScopeClassId(ast_ids.statement_id(self))
}
fn lookup_in_scope(db: &dyn Db, file: VfsFile, scope: FileScopeId, id: Self::Id) -> &Self {
let statement = ast::Stmt::lookup_in_scope(db, file, scope, id.0);
statement.as_class_def_stmt().unwrap()
}
}
#[derive(Debug, Eq, PartialEq, Copy, Clone, Hash)]
pub struct ScopeAssignmentId(pub(super) ScopeStatementId);
impl ScopeAstIdNode for ast::StmtAssign {
type Id = ScopeAssignmentId;
fn scope_ast_id(&self, db: &dyn Db, file: VfsFile, file_scope: FileScopeId) -> Self::Id {
let scope = file_scope.to_scope_id(db, file);
let ast_ids = ast_ids(db, scope);
ScopeAssignmentId(ast_ids.statement_id(self))
}
fn lookup_in_scope(db: &dyn Db, file: VfsFile, scope: FileScopeId, id: Self::Id) -> &Self {
let statement = ast::Stmt::lookup_in_scope(db, file, scope, id.0);
statement.as_assign_stmt().unwrap()
}
}
#[derive(Debug, Eq, PartialEq, Copy, Clone, Hash)]
pub struct ScopeAnnotatedAssignmentId(ScopeStatementId);
impl ScopeAstIdNode for ast::StmtAnnAssign {
type Id = ScopeAnnotatedAssignmentId;
fn scope_ast_id(&self, db: &dyn Db, file: VfsFile, file_scope: FileScopeId) -> Self::Id {
let scope = file_scope.to_scope_id(db, file);
let ast_ids = ast_ids(db, scope);
ScopeAnnotatedAssignmentId(ast_ids.statement_id(self))
}
fn lookup_in_scope(db: &dyn Db, file: VfsFile, scope: FileScopeId, id: Self::Id) -> &Self {
let statement = ast::Stmt::lookup_in_scope(db, file, scope, id.0);
statement.as_ann_assign_stmt().unwrap()
}
}
#[derive(Debug, Eq, PartialEq, Copy, Clone, Hash)]
pub struct ScopeImportId(pub(super) ScopeStatementId);
impl ScopeAstIdNode for ast::StmtImport {
type Id = ScopeImportId;
fn scope_ast_id(&self, db: &dyn Db, file: VfsFile, file_scope: FileScopeId) -> Self::Id {
let scope = file_scope.to_scope_id(db, file);
let ast_ids = ast_ids(db, scope);
ScopeImportId(ast_ids.statement_id(self))
}
fn lookup_in_scope(db: &dyn Db, file: VfsFile, scope: FileScopeId, id: Self::Id) -> &Self {
let statement = ast::Stmt::lookup_in_scope(db, file, scope, id.0);
statement.as_import_stmt().unwrap()
}
}
#[derive(Debug, Eq, PartialEq, Copy, Clone, Hash)]
pub struct ScopeImportFromId(pub(super) ScopeStatementId);
impl ScopeAstIdNode for ast::StmtImportFrom {
type Id = ScopeImportFromId;
fn scope_ast_id(&self, db: &dyn Db, file: VfsFile, file_scope: FileScopeId) -> Self::Id {
let scope = file_scope.to_scope_id(db, file);
let ast_ids = ast_ids(db, scope);
ScopeImportFromId(ast_ids.statement_id(self))
}
fn lookup_in_scope(db: &dyn Db, file: VfsFile, scope: FileScopeId, id: Self::Id) -> &Self {
let statement = ast::Stmt::lookup_in_scope(db, file, scope, id.0);
statement.as_import_from_stmt().unwrap()
}
}
#[derive(Debug, Eq, PartialEq, Copy, Clone, Hash)]
pub struct ScopeNamedExprId(pub(super) ScopeExpressionId);
impl ScopeAstIdNode for ast::ExprNamed {
type Id = ScopeNamedExprId;
fn scope_ast_id(&self, db: &dyn Db, file: VfsFile, file_scope: FileScopeId) -> Self::Id {
let scope = file_scope.to_scope_id(db, file);
let ast_ids = ast_ids(db, scope);
ScopeNamedExprId(ast_ids.expression_id(self))
}
fn lookup_in_scope(db: &dyn Db, file: VfsFile, scope: FileScopeId, id: Self::Id) -> &Self
where
Self: Sized,
{
let expression = ast::Expr::lookup_in_scope(db, file, scope, id.0);
expression.as_named_expr().unwrap()
ast_ids.expression_id(*self)
}
}
#[derive(Debug)]
pub(super) struct AstIdsBuilder {
expressions: IndexVec<ScopeExpressionId, AstNodeRef<ast::Expr>>,
expressions_map: FxHashMap<NodeKey, ScopeExpressionId>,
statements: IndexVec<ScopeStatementId, AstNodeRef<ast::Stmt>>,
statements_map: FxHashMap<NodeKey, ScopeStatementId>,
expressions_map: FxHashMap<ExpressionNodeKey, ScopedExpressionId>,
uses_map: FxHashMap<ExpressionNodeKey, ScopedUseId>,
}
impl AstIdsBuilder {
pub(super) fn new() -> Self {
Self {
expressions: IndexVec::default(),
expressions_map: FxHashMap::default(),
statements: IndexVec::default(),
statements_map: FxHashMap::default(),
uses_map: FxHashMap::default(),
}
}
/// Adds `stmt` to the AST ids map and returns its id.
///
/// ## Safety
/// The function is marked as unsafe because it calls [`AstNodeRef::new`] which requires
/// that `stmt` is a child of `parsed`.
#[allow(unsafe_code)]
pub(super) unsafe fn record_statement(
&mut self,
stmt: &ast::Stmt,
parsed: &ParsedModule,
) -> ScopeStatementId {
let statement_id = self.statements.push(AstNodeRef::new(parsed.clone(), stmt));
/// Adds `expr` to the expression ids map and returns its id.
pub(super) fn record_expression(&mut self, expr: &ast::Expr) -> ScopedExpressionId {
let expression_id = self.expressions_map.len().into();
self.statements_map
.insert(NodeKey::from_node(stmt), statement_id);
statement_id
}
/// Adds `expr` to the AST ids map and returns its id.
///
/// ## Safety
/// The function is marked as unsafe because it calls [`AstNodeRef::new`] which requires
/// that `expr` is a child of `parsed`.
#[allow(unsafe_code)]
pub(super) unsafe fn record_expression(
&mut self,
expr: &ast::Expr,
parsed: &ParsedModule,
) -> ScopeExpressionId {
let expression_id = self.expressions.push(AstNodeRef::new(parsed.clone(), expr));
self.expressions_map
.insert(NodeKey::from_node(expr), expression_id);
self.expressions_map.insert(expr.into(), expression_id);
expression_id
}
/// Adds `expr` to the use ids map and returns its id.
pub(super) fn record_use(&mut self, expr: &ast::Expr) -> ScopedUseId {
let use_id = self.uses_map.len().into();
self.uses_map.insert(expr.into(), use_id);
use_id
}
pub(super) fn finish(mut self) -> AstIds {
self.expressions.shrink_to_fit();
self.expressions_map.shrink_to_fit();
self.statements.shrink_to_fit();
self.statements_map.shrink_to_fit();
self.uses_map.shrink_to_fit();
AstIds {
expressions: self.expressions,
expressions_map: self.expressions_map,
statements: self.statements,
statements_map: self.statements_map,
uses_map: self.uses_map,
}
}
}
/// Node key that can only be constructed for expressions.
pub(crate) mod node_key {
use ruff_python_ast as ast;
use crate::node_key::NodeKey;
#[derive(Copy, Clone, Eq, PartialEq, Hash, Debug)]
pub(crate) struct ExpressionNodeKey(NodeKey);
impl From<ast::ExpressionRef<'_>> for ExpressionNodeKey {
fn from(value: ast::ExpressionRef<'_>) -> Self {
Self(NodeKey::from_node(value))
}
}
impl From<&ast::Expr> for ExpressionNodeKey {
fn from(value: &ast::Expr) -> Self {
Self(NodeKey::from_node(value))
}
}
}

View File

@@ -2,59 +2,75 @@ use std::sync::Arc;
use rustc_hash::FxHashMap;
use ruff_db::files::File;
use ruff_db::parsed::ParsedModule;
use ruff_index::IndexVec;
use ruff_python_ast as ast;
use ruff_python_ast::name::Name;
use ruff_python_ast::visitor::{walk_expr, walk_stmt, Visitor};
use crate::name::Name;
use crate::node_key::NodeKey;
use crate::semantic_index::ast_ids::{
AstId, AstIdsBuilder, ScopeAssignmentId, ScopeClassId, ScopeFunctionId, ScopeImportFromId,
ScopeImportId, ScopeNamedExprId,
use crate::ast_node_ref::AstNodeRef;
use crate::semantic_index::ast_ids::node_key::ExpressionNodeKey;
use crate::semantic_index::ast_ids::AstIdsBuilder;
use crate::semantic_index::definition::{
AssignmentDefinitionNodeRef, Definition, DefinitionNodeKey, DefinitionNodeRef,
ImportFromDefinitionNodeRef,
};
use crate::semantic_index::definition::{Definition, ImportDefinition, ImportFromDefinition};
use crate::semantic_index::expression::Expression;
use crate::semantic_index::symbol::{
FileScopeId, FileSymbolId, Scope, ScopedSymbolId, SymbolFlags, SymbolTableBuilder,
FileScopeId, NodeWithScopeKey, NodeWithScopeRef, Scope, ScopeId, ScopedSymbolId, SymbolFlags,
SymbolTableBuilder,
};
use crate::semantic_index::{NodeWithScopeId, SemanticIndex};
use crate::semantic_index::use_def::{FlowSnapshot, UseDefMapBuilder};
use crate::semantic_index::SemanticIndex;
use crate::Db;
pub(super) struct SemanticIndexBuilder<'a> {
pub(super) struct SemanticIndexBuilder<'db> {
// Builder state
module: &'a ParsedModule,
db: &'db dyn Db,
file: File,
module: &'db ParsedModule,
scope_stack: Vec<FileScopeId>,
/// the definition whose target(s) we are currently walking
current_definition: Option<Definition>,
/// The assignment we're currently visiting.
current_assignment: Option<CurrentAssignment<'db>>,
/// Flow states at each `break` in the current loop.
loop_break_states: Vec<FlowSnapshot>,
// Semantic Index fields
scopes: IndexVec<FileScopeId, Scope>,
scope_ids_by_scope: IndexVec<FileScopeId, ScopeId<'db>>,
symbol_tables: IndexVec<FileScopeId, SymbolTableBuilder>,
ast_ids: IndexVec<FileScopeId, AstIdsBuilder>,
expression_scopes: FxHashMap<NodeKey, FileScopeId>,
scope_nodes: IndexVec<FileScopeId, NodeWithScopeId>,
use_def_maps: IndexVec<FileScopeId, UseDefMapBuilder<'db>>,
scopes_by_node: FxHashMap<NodeWithScopeKey, FileScopeId>,
scopes_by_expression: FxHashMap<ExpressionNodeKey, FileScopeId>,
definitions_by_node: FxHashMap<DefinitionNodeKey, Definition<'db>>,
expressions_by_node: FxHashMap<ExpressionNodeKey, Expression<'db>>,
}
impl<'a> SemanticIndexBuilder<'a> {
pub(super) fn new(parsed: &'a ParsedModule) -> Self {
impl<'db> SemanticIndexBuilder<'db> {
pub(super) fn new(db: &'db dyn Db, file: File, parsed: &'db ParsedModule) -> Self {
let mut builder = Self {
db,
file,
module: parsed,
scope_stack: Vec::new(),
current_definition: None,
current_assignment: None,
loop_break_states: vec![],
scopes: IndexVec::new(),
symbol_tables: IndexVec::new(),
ast_ids: IndexVec::new(),
expression_scopes: FxHashMap::default(),
scope_nodes: IndexVec::new(),
scope_ids_by_scope: IndexVec::new(),
use_def_maps: IndexVec::new(),
scopes_by_expression: FxHashMap::default(),
scopes_by_node: FxHashMap::default(),
definitions_by_node: FxHashMap::default(),
expressions_by_node: FxHashMap::default(),
};
builder.push_scope_with_parent(
NodeWithScopeId::Module,
&Name::new_static("<module>"),
None,
None,
None,
);
builder.push_scope_with_parent(NodeWithScopeRef::Module, None);
builder
}
@@ -66,44 +82,41 @@ impl<'a> SemanticIndexBuilder<'a> {
.expect("Always to have a root scope")
}
fn push_scope(
&mut self,
node: NodeWithScopeId,
name: &Name,
defining_symbol: Option<FileSymbolId>,
definition: Option<Definition>,
) {
fn push_scope(&mut self, node: NodeWithScopeRef) {
let parent = self.current_scope();
self.push_scope_with_parent(node, name, defining_symbol, definition, Some(parent));
self.push_scope_with_parent(node, Some(parent));
}
fn push_scope_with_parent(
&mut self,
node: NodeWithScopeId,
name: &Name,
defining_symbol: Option<FileSymbolId>,
definition: Option<Definition>,
parent: Option<FileScopeId>,
) {
fn push_scope_with_parent(&mut self, node: NodeWithScopeRef, parent: Option<FileScopeId>) {
let children_start = self.scopes.next_index() + 1;
let scope = Scope {
name: name.clone(),
parent,
defining_symbol,
definition,
kind: node.scope_kind(),
descendents: children_start..children_start,
};
let scope_id = self.scopes.push(scope);
let file_scope_id = self.scopes.push(scope);
self.symbol_tables.push(SymbolTableBuilder::new());
self.use_def_maps.push(UseDefMapBuilder::new());
let ast_id_scope = self.ast_ids.push(AstIdsBuilder::new());
let scope_node_id = self.scope_nodes.push(node);
debug_assert_eq!(ast_id_scope, scope_id);
debug_assert_eq!(scope_id, scope_node_id);
self.scope_stack.push(scope_id);
#[allow(unsafe_code)]
// SAFETY: `node` is guaranteed to be a child of `self.module`
let scope_id = ScopeId::new(
self.db,
self.file,
file_scope_id,
unsafe { node.to_kind(self.module.clone()) },
countme::Count::default(),
);
self.scope_ids_by_scope.push(scope_id);
self.scopes_by_node.insert(node.node_key(), file_scope_id);
debug_assert_eq!(ast_id_scope, file_scope_id);
self.scope_stack.push(file_scope_id);
}
fn pop_scope(&mut self) -> FileScopeId {
@@ -119,56 +132,120 @@ impl<'a> SemanticIndexBuilder<'a> {
&mut self.symbol_tables[scope_id]
}
fn current_use_def_map_mut(&mut self) -> &mut UseDefMapBuilder<'db> {
let scope_id = self.current_scope();
&mut self.use_def_maps[scope_id]
}
fn current_use_def_map(&self) -> &UseDefMapBuilder<'db> {
let scope_id = self.current_scope();
&self.use_def_maps[scope_id]
}
fn current_ast_ids(&mut self) -> &mut AstIdsBuilder {
let scope_id = self.current_scope();
&mut self.ast_ids[scope_id]
}
fn add_or_update_symbol(&mut self, name: Name, flags: SymbolFlags) -> ScopedSymbolId {
let symbol_table = self.current_symbol_table();
symbol_table.add_or_update_symbol(name, flags, None)
fn flow_snapshot(&self) -> FlowSnapshot {
self.current_use_def_map().snapshot()
}
fn add_or_update_symbol_with_definition(
&mut self,
name: Name,
fn flow_restore(&mut self, state: FlowSnapshot) {
self.current_use_def_map_mut().restore(state);
}
definition: Definition,
) -> ScopedSymbolId {
fn flow_merge(&mut self, state: &FlowSnapshot) {
self.current_use_def_map_mut().merge(state);
}
fn add_or_update_symbol(&mut self, name: Name, flags: SymbolFlags) -> ScopedSymbolId {
let symbol_table = self.current_symbol_table();
let (symbol_id, added) = symbol_table.add_or_update_symbol(name, flags);
if added {
let use_def_map = self.current_use_def_map_mut();
use_def_map.add_symbol(symbol_id);
}
symbol_id
}
symbol_table.add_or_update_symbol(name, SymbolFlags::IS_DEFINED, Some(definition))
fn add_definition<'a>(
&mut self,
symbol: ScopedSymbolId,
definition_node: impl Into<DefinitionNodeRef<'a>>,
) -> Definition<'db> {
let definition_node = definition_node.into();
let definition = Definition::new(
self.db,
self.file,
self.current_scope(),
symbol,
#[allow(unsafe_code)]
unsafe {
definition_node.into_owned(self.module.clone())
},
countme::Count::default(),
);
self.definitions_by_node
.insert(definition_node.key(), definition);
self.current_use_def_map_mut()
.record_definition(symbol, definition);
definition
}
/// Record an expression that needs to be a Salsa ingredient, because we need to infer its type
/// standalone (type narrowing tests, RHS of an assignment.)
fn add_standalone_expression(&mut self, expression_node: &ast::Expr) {
let expression = Expression::new(
self.db,
self.file,
self.current_scope(),
#[allow(unsafe_code)]
unsafe {
AstNodeRef::new(self.module.clone(), expression_node)
},
countme::Count::default(),
);
self.expressions_by_node
.insert(expression_node.into(), expression);
}
fn with_type_params(
&mut self,
name: &Name,
with_params: &WithTypeParams,
defining_symbol: FileSymbolId,
with_scope: NodeWithScopeRef,
type_params: Option<&'db ast::TypeParams>,
nested: impl FnOnce(&mut Self) -> FileScopeId,
) -> FileScopeId {
let type_params = with_params.type_parameters();
if let Some(type_params) = type_params {
let type_node = match with_params {
WithTypeParams::ClassDef { id, .. } => NodeWithScopeId::ClassTypeParams(*id),
WithTypeParams::FunctionDef { id, .. } => NodeWithScopeId::FunctionTypeParams(*id),
};
self.push_scope(with_scope);
self.push_scope(
type_node,
name,
Some(defining_symbol),
Some(with_params.definition()),
);
for type_param in &type_params.type_params {
let name = match type_param {
ast::TypeParam::TypeVar(ast::TypeParamTypeVar { name, .. }) => name,
ast::TypeParam::ParamSpec(ast::TypeParamParamSpec { name, .. }) => name,
ast::TypeParam::TypeVarTuple(ast::TypeParamTypeVarTuple { name, .. }) => name,
let (name, bound, default) = match type_param {
ast::TypeParam::TypeVar(ast::TypeParamTypeVar {
range: _,
name,
bound,
default,
}) => (name, bound, default),
ast::TypeParam::ParamSpec(ast::TypeParamParamSpec {
name, default, ..
}) => (name, &None, default),
ast::TypeParam::TypeVarTuple(ast::TypeParamTypeVarTuple {
name,
default,
..
}) => (name, &None, default),
};
self.add_or_update_symbol(Name::new(name), SymbolFlags::IS_DEFINED);
// TODO create Definition for typevars
self.add_or_update_symbol(name.id.clone(), SymbolFlags::IS_DEFINED);
if let Some(bound) = bound {
self.visit_expr(bound);
}
if let Some(default) = default {
self.visit_expr(default);
}
}
}
@@ -181,7 +258,7 @@ impl<'a> SemanticIndexBuilder<'a> {
nested_scope
}
pub(super) fn build(mut self) -> SemanticIndex {
pub(super) fn build(mut self) -> SemanticIndex<'db> {
let module = self.module;
self.visit_body(module.suite());
@@ -189,7 +266,7 @@ impl<'a> SemanticIndexBuilder<'a> {
self.pop_scope();
assert!(self.scope_stack.is_empty());
assert!(self.current_definition.is_none());
assert!(self.current_assignment.is_none());
let mut symbol_tables: IndexVec<_, _> = self
.symbol_tables
@@ -197,6 +274,12 @@ impl<'a> SemanticIndexBuilder<'a> {
.map(|builder| Arc::new(builder.finish()))
.collect();
let mut use_def_maps: IndexVec<_, _> = self
.use_def_maps
.into_iter()
.map(|builder| Arc::new(builder.finish()))
.collect();
let mut ast_ids: IndexVec<_, _> = self
.ast_ids
.into_iter()
@@ -204,63 +287,54 @@ impl<'a> SemanticIndexBuilder<'a> {
.collect();
self.scopes.shrink_to_fit();
ast_ids.shrink_to_fit();
symbol_tables.shrink_to_fit();
self.expression_scopes.shrink_to_fit();
self.scope_nodes.shrink_to_fit();
use_def_maps.shrink_to_fit();
ast_ids.shrink_to_fit();
self.scopes_by_expression.shrink_to_fit();
self.definitions_by_node.shrink_to_fit();
self.scope_ids_by_scope.shrink_to_fit();
self.scopes_by_node.shrink_to_fit();
SemanticIndex {
symbol_tables,
scopes: self.scopes,
scope_nodes: self.scope_nodes,
definitions_by_node: self.definitions_by_node,
expressions_by_node: self.expressions_by_node,
scope_ids_by_scope: self.scope_ids_by_scope,
ast_ids,
expression_scopes: self.expression_scopes,
scopes_by_expression: self.scopes_by_expression,
scopes_by_node: self.scopes_by_node,
use_def_maps,
}
}
}
impl Visitor<'_> for SemanticIndexBuilder<'_> {
fn visit_stmt(&mut self, stmt: &ast::Stmt) {
let module = self.module;
#[allow(unsafe_code)]
let statement_id = unsafe {
// SAFETY: The builder only visits nodes that are part of `module`. This guarantees that
// the current statement must be a child of `module`.
self.current_ast_ids().record_statement(stmt, module)
};
impl<'db, 'ast> Visitor<'ast> for SemanticIndexBuilder<'db>
where
'ast: 'db,
{
fn visit_stmt(&mut self, stmt: &'ast ast::Stmt) {
match stmt {
ast::Stmt::FunctionDef(function_def) => {
for decorator in &function_def.decorator_list {
self.visit_decorator(decorator);
}
let name = Name::new(&function_def.name.id);
let function_id = ScopeFunctionId(statement_id);
let definition = Definition::FunctionDef(function_id);
let scope = self.current_scope();
let symbol = FileSymbolId::new(
scope,
self.add_or_update_symbol_with_definition(name.clone(), definition),
);
let symbol = self
.add_or_update_symbol(function_def.name.id.clone(), SymbolFlags::IS_DEFINED);
self.add_definition(symbol, function_def);
self.with_type_params(
&name,
&WithTypeParams::FunctionDef {
node: function_def,
id: AstId::new(scope, function_id),
},
symbol,
NodeWithScopeRef::FunctionTypeParameters(function_def),
function_def.type_params.as_deref(),
|builder| {
builder.visit_parameters(&function_def.parameters);
for expr in &function_def.returns {
if let Some(expr) = &function_def.returns {
builder.visit_annotation(expr);
}
builder.push_scope(
NodeWithScopeId::Function(AstId::new(scope, function_id)),
&name,
Some(symbol),
Some(definition),
);
builder.push_scope(NodeWithScopeRef::Function(function_def));
builder.visit_body(&function_def.body);
builder.pop_scope()
},
@@ -271,81 +345,124 @@ impl Visitor<'_> for SemanticIndexBuilder<'_> {
self.visit_decorator(decorator);
}
let name = Name::new(&class.name.id);
let class_id = ScopeClassId(statement_id);
let definition = Definition::from(class_id);
let scope = self.current_scope();
let id = FileSymbolId::new(
self.current_scope(),
self.add_or_update_symbol_with_definition(name.clone(), definition),
);
let symbol =
self.add_or_update_symbol(class.name.id.clone(), SymbolFlags::IS_DEFINED);
self.add_definition(symbol, class);
self.with_type_params(
&name,
&WithTypeParams::ClassDef {
node: class,
id: AstId::new(scope, class_id),
},
id,
NodeWithScopeRef::ClassTypeParameters(class),
class.type_params.as_deref(),
|builder| {
if let Some(arguments) = &class.arguments {
builder.visit_arguments(arguments);
}
builder.push_scope(
NodeWithScopeId::Class(AstId::new(scope, class_id)),
&name,
Some(id),
Some(definition),
);
builder.push_scope(NodeWithScopeRef::Class(class));
builder.visit_body(&class.body);
builder.pop_scope()
},
);
}
ast::Stmt::Import(ast::StmtImport { names, .. }) => {
for (i, alias) in names.iter().enumerate() {
ast::Stmt::Import(node) => {
for alias in &node.names {
let symbol_name = if let Some(asname) = &alias.asname {
asname.id.as_str()
asname.id.clone()
} else {
alias.name.id.split('.').next().unwrap()
Name::new(alias.name.id.split('.').next().unwrap())
};
let def = Definition::Import(ImportDefinition {
import_id: ScopeImportId(statement_id),
alias: u32::try_from(i).unwrap(),
});
self.add_or_update_symbol_with_definition(Name::new(symbol_name), def);
let symbol = self.add_or_update_symbol(symbol_name, SymbolFlags::IS_DEFINED);
self.add_definition(symbol, alias);
}
}
ast::Stmt::ImportFrom(ast::StmtImportFrom {
module: _,
names,
level: _,
..
}) => {
for (i, alias) in names.iter().enumerate() {
ast::Stmt::ImportFrom(node) => {
for (alias_index, alias) in node.names.iter().enumerate() {
let symbol_name = if let Some(asname) = &alias.asname {
asname.id.as_str()
&asname.id
} else {
alias.name.id.as_str()
&alias.name.id
};
let def = Definition::ImportFrom(ImportFromDefinition {
import_id: ScopeImportFromId(statement_id),
name: u32::try_from(i).unwrap(),
});
self.add_or_update_symbol_with_definition(Name::new(symbol_name), def);
let symbol =
self.add_or_update_symbol(symbol_name.clone(), SymbolFlags::IS_DEFINED);
self.add_definition(symbol, ImportFromDefinitionNodeRef { node, alias_index });
}
}
ast::Stmt::Assign(node) => {
debug_assert!(self.current_definition.is_none());
debug_assert!(self.current_assignment.is_none());
self.visit_expr(&node.value);
self.current_definition =
Some(Definition::Assignment(ScopeAssignmentId(statement_id)));
self.add_standalone_expression(&node.value);
self.current_assignment = Some(node.into());
for target in &node.targets {
self.visit_expr(target);
}
self.current_definition = None;
self.current_assignment = None;
}
ast::Stmt::AnnAssign(node) => {
debug_assert!(self.current_assignment.is_none());
// TODO deferred annotation visiting
self.visit_expr(&node.annotation);
if let Some(value) = &node.value {
self.visit_expr(value);
}
self.current_assignment = Some(node.into());
self.visit_expr(&node.target);
self.current_assignment = None;
}
ast::Stmt::If(node) => {
self.visit_expr(&node.test);
let pre_if = self.flow_snapshot();
self.visit_body(&node.body);
let mut post_clauses: Vec<FlowSnapshot> = vec![];
for clause in &node.elif_else_clauses {
// snapshot after every block except the last; the last one will just become
// the state that we merge the other snapshots into
post_clauses.push(self.flow_snapshot());
// we can only take an elif/else branch if none of the previous ones were
// taken, so the block entry state is always `pre_if`
self.flow_restore(pre_if.clone());
self.visit_elif_else_clause(clause);
}
for post_clause_state in post_clauses {
self.flow_merge(&post_clause_state);
}
let has_else = node
.elif_else_clauses
.last()
.is_some_and(|clause| clause.test.is_none());
if !has_else {
// if there's no else clause, then it's possible we took none of the branches,
// and the pre_if state can reach here
self.flow_merge(&pre_if);
}
}
ast::Stmt::While(node) => {
self.visit_expr(&node.test);
let pre_loop = self.flow_snapshot();
// Save aside any break states from an outer loop
let saved_break_states = std::mem::take(&mut self.loop_break_states);
self.visit_body(&node.body);
// Get the break states from the body of this loop, and restore the saved outer
// ones.
let break_states =
std::mem::replace(&mut self.loop_break_states, saved_break_states);
// We may execute the `else` clause without ever executing the body, so merge in
// the pre-loop state before visiting `else`.
self.flow_merge(&pre_loop);
self.visit_body(&node.orelse);
// Breaking out of a while loop bypasses the `else` clause, so merge in the break
// states after visiting `else`.
for break_state in break_states {
self.flow_merge(&break_state);
}
}
ast::Stmt::Break(_) => {
self.loop_break_states.push(self.flow_snapshot());
}
_ => {
walk_stmt(self, stmt);
@@ -353,71 +470,78 @@ impl Visitor<'_> for SemanticIndexBuilder<'_> {
}
}
fn visit_expr(&mut self, expr: &'_ ast::Expr) {
let module = self.module;
#[allow(unsafe_code)]
let expression_id = unsafe {
// SAFETY: The builder only visits nodes that are part of `module`. This guarantees that
// the current expression must be a child of `module`.
self.current_ast_ids().record_expression(expr, module)
};
self.expression_scopes
.insert(NodeKey::from_node(expr), self.current_scope());
fn visit_expr(&mut self, expr: &'ast ast::Expr) {
self.scopes_by_expression
.insert(expr.into(), self.current_scope());
self.current_ast_ids().record_expression(expr);
match expr {
ast::Expr::Name(ast::ExprName { id, ctx, .. }) => {
ast::Expr::Name(name_node) => {
let ast::ExprName { id, ctx, .. } = name_node;
let flags = match ctx {
ast::ExprContext::Load => SymbolFlags::IS_USED,
ast::ExprContext::Store => SymbolFlags::IS_DEFINED,
ast::ExprContext::Del => SymbolFlags::IS_DEFINED,
ast::ExprContext::Invalid => SymbolFlags::empty(),
};
match self.current_definition {
Some(definition) if flags.contains(SymbolFlags::IS_DEFINED) => {
self.add_or_update_symbol_with_definition(Name::new(id), definition);
}
_ => {
self.add_or_update_symbol(Name::new(id), flags);
let symbol = self.add_or_update_symbol(id.clone(), flags);
if flags.contains(SymbolFlags::IS_DEFINED) {
match self.current_assignment {
Some(CurrentAssignment::Assign(assignment)) => {
self.add_definition(
symbol,
AssignmentDefinitionNodeRef {
assignment,
target: name_node,
},
);
}
Some(CurrentAssignment::AnnAssign(ann_assign)) => {
self.add_definition(symbol, ann_assign);
}
Some(CurrentAssignment::Named(named)) => {
self.add_definition(symbol, named);
}
None => {}
}
}
if flags.contains(SymbolFlags::IS_USED) {
let use_id = self.current_ast_ids().record_use(expr);
self.current_use_def_map_mut().record_use(symbol, use_id);
}
walk_expr(self, expr);
}
ast::Expr::Named(node) => {
debug_assert!(self.current_definition.is_none());
self.current_definition =
Some(Definition::NamedExpr(ScopeNamedExprId(expression_id)));
debug_assert!(self.current_assignment.is_none());
self.current_assignment = Some(node.into());
// TODO walrus in comprehensions is implicitly nonlocal
self.visit_expr(&node.target);
self.current_definition = None;
self.current_assignment = None;
self.visit_expr(&node.value);
}
ast::Expr::Lambda(lambda) => {
if let Some(parameters) = &lambda.parameters {
self.visit_parameters(parameters);
}
self.push_scope(NodeWithScopeRef::Lambda(lambda));
self.visit_expr(lambda.body.as_ref());
self.pop_scope();
}
ast::Expr::If(ast::ExprIf {
body, test, orelse, ..
}) => {
// TODO detect statically known truthy or falsy test (via type inference, not naive
// AST inspection, so we can't simplify here, need to record test expression in CFG
// for later checking)
// AST inspection, so we can't simplify here, need to record test expression for
// later checking)
self.visit_expr(test);
// let if_branch = self.flow_graph_builder.add_branch(self.current_flow_node());
// self.set_current_flow_node(if_branch);
// self.insert_constraint(test);
let pre_if = self.flow_snapshot();
self.visit_expr(body);
// let post_body = self.current_flow_node();
// self.set_current_flow_node(if_branch);
let post_body = self.flow_snapshot();
self.flow_restore(pre_if);
self.visit_expr(orelse);
// let post_else = self
// .flow_graph_builder
// .add_phi(self.current_flow_node(), post_body);
// self.set_current_flow_node(post_else);
self.flow_merge(&post_body);
}
_ => {
walk_expr(self, expr);
@@ -426,29 +550,27 @@ impl Visitor<'_> for SemanticIndexBuilder<'_> {
}
}
enum WithTypeParams<'a> {
ClassDef {
node: &'a ast::StmtClassDef,
id: AstId<ScopeClassId>,
},
FunctionDef {
node: &'a ast::StmtFunctionDef,
id: AstId<ScopeFunctionId>,
},
#[derive(Copy, Clone, Debug)]
enum CurrentAssignment<'a> {
Assign(&'a ast::StmtAssign),
AnnAssign(&'a ast::StmtAnnAssign),
Named(&'a ast::ExprNamed),
}
impl<'a> WithTypeParams<'a> {
fn type_parameters(&self) -> Option<&'a ast::TypeParams> {
match self {
WithTypeParams::ClassDef { node, .. } => node.type_params.as_deref(),
WithTypeParams::FunctionDef { node, .. } => node.type_params.as_deref(),
}
}
fn definition(&self) -> Definition {
match self {
WithTypeParams::ClassDef { id, .. } => Definition::ClassDef(id.in_scope_id()),
WithTypeParams::FunctionDef { id, .. } => Definition::FunctionDef(id.in_scope_id()),
}
impl<'a> From<&'a ast::StmtAssign> for CurrentAssignment<'a> {
fn from(value: &'a ast::StmtAssign) -> Self {
Self::Assign(value)
}
}
impl<'a> From<&'a ast::StmtAnnAssign> for CurrentAssignment<'a> {
fn from(value: &'a ast::StmtAnnAssign) -> Self {
Self::AnnAssign(value)
}
}
impl<'a> From<&'a ast::ExprNamed> for CurrentAssignment<'a> {
fn from(value: &'a ast::ExprNamed) -> Self {
Self::Named(value)
}
}

View File

@@ -1,76 +1,232 @@
use crate::semantic_index::ast_ids::{
ScopeAnnotatedAssignmentId, ScopeAssignmentId, ScopeClassId, ScopeFunctionId,
ScopeImportFromId, ScopeImportId, ScopeNamedExprId,
};
use ruff_db::files::File;
use ruff_db::parsed::ParsedModule;
use ruff_python_ast as ast;
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
pub enum Definition {
Import(ImportDefinition),
ImportFrom(ImportFromDefinition),
ClassDef(ScopeClassId),
FunctionDef(ScopeFunctionId),
Assignment(ScopeAssignmentId),
AnnotatedAssignment(ScopeAnnotatedAssignmentId),
NamedExpr(ScopeNamedExprId),
/// represents the implicit initial definition of every name as "unbound"
Unbound,
// TODO with statements, except handlers, function args...
use crate::ast_node_ref::AstNodeRef;
use crate::node_key::NodeKey;
use crate::semantic_index::symbol::{FileScopeId, ScopeId, ScopedSymbolId};
use crate::Db;
#[salsa::tracked]
pub struct Definition<'db> {
/// The file in which the definition occurs.
#[id]
pub(crate) file: File,
/// The scope in which the definition occurs.
#[id]
pub(crate) file_scope: FileScopeId,
/// The symbol defined.
#[id]
pub(crate) symbol: ScopedSymbolId,
#[no_eq]
#[return_ref]
pub(crate) node: DefinitionKind,
#[no_eq]
count: countme::Count<Definition<'static>>,
}
impl From<ImportDefinition> for Definition {
fn from(value: ImportDefinition) -> Self {
Self::Import(value)
impl<'db> Definition<'db> {
pub(crate) fn scope(self, db: &'db dyn Db) -> ScopeId<'db> {
self.file_scope(db).to_scope_id(db, self.file(db))
}
}
impl From<ImportFromDefinition> for Definition {
fn from(value: ImportFromDefinition) -> Self {
Self::ImportFrom(value)
#[derive(Copy, Clone, Debug)]
pub(crate) enum DefinitionNodeRef<'a> {
Import(&'a ast::Alias),
ImportFrom(ImportFromDefinitionNodeRef<'a>),
Function(&'a ast::StmtFunctionDef),
Class(&'a ast::StmtClassDef),
NamedExpression(&'a ast::ExprNamed),
Assignment(AssignmentDefinitionNodeRef<'a>),
AnnotatedAssignment(&'a ast::StmtAnnAssign),
}
impl<'a> From<&'a ast::StmtFunctionDef> for DefinitionNodeRef<'a> {
fn from(node: &'a ast::StmtFunctionDef) -> Self {
Self::Function(node)
}
}
impl From<ScopeClassId> for Definition {
fn from(value: ScopeClassId) -> Self {
Self::ClassDef(value)
impl<'a> From<&'a ast::StmtClassDef> for DefinitionNodeRef<'a> {
fn from(node: &'a ast::StmtClassDef) -> Self {
Self::Class(node)
}
}
impl From<ScopeFunctionId> for Definition {
fn from(value: ScopeFunctionId) -> Self {
Self::FunctionDef(value)
impl<'a> From<&'a ast::ExprNamed> for DefinitionNodeRef<'a> {
fn from(node: &'a ast::ExprNamed) -> Self {
Self::NamedExpression(node)
}
}
impl From<ScopeAssignmentId> for Definition {
fn from(value: ScopeAssignmentId) -> Self {
Self::Assignment(value)
impl<'a> From<&'a ast::StmtAnnAssign> for DefinitionNodeRef<'a> {
fn from(node: &'a ast::StmtAnnAssign) -> Self {
Self::AnnotatedAssignment(node)
}
}
impl From<ScopeAnnotatedAssignmentId> for Definition {
fn from(value: ScopeAnnotatedAssignmentId) -> Self {
Self::AnnotatedAssignment(value)
impl<'a> From<&'a ast::Alias> for DefinitionNodeRef<'a> {
fn from(node_ref: &'a ast::Alias) -> Self {
Self::Import(node_ref)
}
}
impl From<ScopeNamedExprId> for Definition {
fn from(value: ScopeNamedExprId) -> Self {
Self::NamedExpr(value)
impl<'a> From<ImportFromDefinitionNodeRef<'a>> for DefinitionNodeRef<'a> {
fn from(node_ref: ImportFromDefinitionNodeRef<'a>) -> Self {
Self::ImportFrom(node_ref)
}
}
#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)]
pub struct ImportDefinition {
pub(crate) import_id: ScopeImportId,
/// Index into [`ruff_python_ast::StmtImport::names`].
pub(crate) alias: u32,
impl<'a> From<AssignmentDefinitionNodeRef<'a>> for DefinitionNodeRef<'a> {
fn from(node_ref: AssignmentDefinitionNodeRef<'a>) -> Self {
Self::Assignment(node_ref)
}
}
#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)]
pub struct ImportFromDefinition {
pub(crate) import_id: ScopeImportFromId,
/// Index into [`ruff_python_ast::StmtImportFrom::names`].
pub(crate) name: u32,
#[derive(Copy, Clone, Debug)]
pub(crate) struct ImportFromDefinitionNodeRef<'a> {
pub(crate) node: &'a ast::StmtImportFrom,
pub(crate) alias_index: usize,
}
#[derive(Copy, Clone, Debug)]
pub(crate) struct AssignmentDefinitionNodeRef<'a> {
pub(crate) assignment: &'a ast::StmtAssign,
pub(crate) target: &'a ast::ExprName,
}
impl DefinitionNodeRef<'_> {
#[allow(unsafe_code)]
pub(super) unsafe fn into_owned(self, parsed: ParsedModule) -> DefinitionKind {
match self {
DefinitionNodeRef::Import(alias) => {
DefinitionKind::Import(AstNodeRef::new(parsed, alias))
}
DefinitionNodeRef::ImportFrom(ImportFromDefinitionNodeRef { node, alias_index }) => {
DefinitionKind::ImportFrom(ImportFromDefinitionKind {
node: AstNodeRef::new(parsed, node),
alias_index,
})
}
DefinitionNodeRef::Function(function) => {
DefinitionKind::Function(AstNodeRef::new(parsed, function))
}
DefinitionNodeRef::Class(class) => {
DefinitionKind::Class(AstNodeRef::new(parsed, class))
}
DefinitionNodeRef::NamedExpression(named) => {
DefinitionKind::NamedExpression(AstNodeRef::new(parsed, named))
}
DefinitionNodeRef::Assignment(AssignmentDefinitionNodeRef { assignment, target }) => {
DefinitionKind::Assignment(AssignmentDefinitionKind {
assignment: AstNodeRef::new(parsed.clone(), assignment),
target: AstNodeRef::new(parsed, target),
})
}
DefinitionNodeRef::AnnotatedAssignment(assign) => {
DefinitionKind::AnnotatedAssignment(AstNodeRef::new(parsed, assign))
}
}
}
pub(super) fn key(self) -> DefinitionNodeKey {
match self {
Self::Import(node) => node.into(),
Self::ImportFrom(ImportFromDefinitionNodeRef { node, alias_index }) => {
(&node.names[alias_index]).into()
}
Self::Function(node) => node.into(),
Self::Class(node) => node.into(),
Self::NamedExpression(node) => node.into(),
Self::Assignment(AssignmentDefinitionNodeRef {
assignment: _,
target,
}) => target.into(),
Self::AnnotatedAssignment(node) => node.into(),
}
}
}
#[derive(Clone, Debug)]
pub enum DefinitionKind {
Import(AstNodeRef<ast::Alias>),
ImportFrom(ImportFromDefinitionKind),
Function(AstNodeRef<ast::StmtFunctionDef>),
Class(AstNodeRef<ast::StmtClassDef>),
NamedExpression(AstNodeRef<ast::ExprNamed>),
Assignment(AssignmentDefinitionKind),
AnnotatedAssignment(AstNodeRef<ast::StmtAnnAssign>),
}
#[derive(Clone, Debug)]
pub struct ImportFromDefinitionKind {
node: AstNodeRef<ast::StmtImportFrom>,
alias_index: usize,
}
impl ImportFromDefinitionKind {
pub(crate) fn import(&self) -> &ast::StmtImportFrom {
self.node.node()
}
pub(crate) fn alias(&self) -> &ast::Alias {
&self.node.node().names[self.alias_index]
}
}
#[derive(Clone, Debug)]
#[allow(dead_code)]
pub struct AssignmentDefinitionKind {
assignment: AstNodeRef<ast::StmtAssign>,
target: AstNodeRef<ast::ExprName>,
}
impl AssignmentDefinitionKind {
pub(crate) fn assignment(&self) -> &ast::StmtAssign {
self.assignment.node()
}
}
#[derive(Copy, Clone, Eq, PartialEq, Hash, Debug)]
pub(crate) struct DefinitionNodeKey(NodeKey);
impl From<&ast::Alias> for DefinitionNodeKey {
fn from(node: &ast::Alias) -> Self {
Self(NodeKey::from_node(node))
}
}
impl From<&ast::StmtFunctionDef> for DefinitionNodeKey {
fn from(node: &ast::StmtFunctionDef) -> Self {
Self(NodeKey::from_node(node))
}
}
impl From<&ast::StmtClassDef> for DefinitionNodeKey {
fn from(node: &ast::StmtClassDef) -> Self {
Self(NodeKey::from_node(node))
}
}
impl From<&ast::ExprName> for DefinitionNodeKey {
fn from(node: &ast::ExprName) -> Self {
Self(NodeKey::from_node(node))
}
}
impl From<&ast::ExprNamed> for DefinitionNodeKey {
fn from(node: &ast::ExprNamed) -> Self {
Self(NodeKey::from_node(node))
}
}
impl From<&ast::StmtAnnAssign> for DefinitionNodeKey {
fn from(node: &ast::StmtAnnAssign) -> Self {
Self(NodeKey::from_node(node))
}
}

View File

@@ -0,0 +1,34 @@
use crate::ast_node_ref::AstNodeRef;
use crate::db::Db;
use crate::semantic_index::symbol::{FileScopeId, ScopeId};
use ruff_db::files::File;
use ruff_python_ast as ast;
use salsa;
/// An independently type-inferable expression.
///
/// Includes constraint expressions (e.g. if tests) and the RHS of an unpacking assignment.
#[salsa::tracked]
pub(crate) struct Expression<'db> {
/// The file in which the expression occurs.
#[id]
pub(crate) file: File,
/// The scope in which the expression occurs.
#[id]
pub(crate) file_scope: FileScopeId,
/// The expression node.
#[no_eq]
#[return_ref]
pub(crate) node: AstNodeRef<ast::Expr>,
#[no_eq]
count: countme::Count<Expression<'static>>,
}
impl<'db> Expression<'db> {
pub(crate) fn scope(self, db: &'db dyn Db) -> ScopeId<'db> {
self.file_scope(db).to_scope_id(db, self.file(db))
}
}

View File

@@ -3,39 +3,32 @@ use std::ops::Range;
use bitflags::bitflags;
use hashbrown::hash_map::RawEntryMut;
use rustc_hash::FxHasher;
use salsa::DebugWithDb;
use smallvec::SmallVec;
use ruff_db::vfs::VfsFile;
use ruff_db::files::File;
use ruff_db::parsed::ParsedModule;
use ruff_index::{newtype_index, IndexVec};
use ruff_python_ast::name::Name;
use ruff_python_ast::{self as ast};
use rustc_hash::FxHasher;
use crate::name::Name;
use crate::semantic_index::definition::Definition;
use crate::semantic_index::{root_scope, semantic_index, symbol_table, SymbolMap};
use crate::ast_node_ref::AstNodeRef;
use crate::node_key::NodeKey;
use crate::semantic_index::{semantic_index, SymbolMap};
use crate::Db;
#[derive(Eq, PartialEq, Debug)]
pub struct Symbol {
name: Name,
flags: SymbolFlags,
/// The nodes that define this symbol, in source order.
definitions: SmallVec<[Definition; 4]>,
}
impl Symbol {
fn new(name: Name, definition: Option<Definition>) -> Self {
fn new(name: Name) -> Self {
Self {
name,
flags: SymbolFlags::empty(),
definitions: definition.into_iter().collect(),
}
}
fn push_definition(&mut self, definition: Definition) {
self.definitions.push(definition);
}
fn insert_flags(&mut self, flags: SymbolFlags) {
self.flags.insert(flags);
}
@@ -54,10 +47,6 @@ impl Symbol {
pub fn is_defined(&self) -> bool {
self.flags.contains(SymbolFlags::IS_DEFINED)
}
pub fn definitions(&self) -> &[Definition] {
&self.definitions
}
}
bitflags! {
@@ -72,15 +61,6 @@ bitflags! {
}
}
/// ID that uniquely identifies a public symbol defined in a module's root scope.
#[salsa::tracked]
pub struct PublicSymbolId<'db> {
#[id]
pub(crate) file: VfsFile,
#[id]
pub(crate) scoped_symbol_id: ScopedSymbolId,
}
/// ID that uniquely identifies a symbol in a file.
#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)]
pub struct FileSymbolId {
@@ -89,13 +69,6 @@ pub struct FileSymbolId {
}
impl FileSymbolId {
pub(super) fn new(scope: FileScopeId, symbol: ScopedSymbolId) -> Self {
Self {
scope,
scoped_symbol_id: symbol,
}
}
pub fn scope(self) -> FileScopeId {
self.scope
}
@@ -115,88 +88,47 @@ impl From<FileSymbolId> for ScopedSymbolId {
#[newtype_index]
pub struct ScopedSymbolId;
impl ScopedSymbolId {
/// Converts the symbol to a public symbol.
///
/// # Panics
/// May panic if the symbol does not belong to `file` or is not a symbol of `file`'s root scope.
pub(crate) fn to_public_symbol(self, db: &dyn Db, file: VfsFile) -> PublicSymbolId {
let symbols = public_symbols_map(db, file);
symbols.public(self)
}
}
/// Returns a mapping from [`FileScopeId`] to globally unique [`ScopeId`].
#[salsa::tracked(return_ref)]
pub(crate) fn scopes_map(db: &dyn Db, file: VfsFile) -> ScopesMap<'_> {
let _ = tracing::trace_span!("scopes_map", file = ?file.debug(db.upcast())).enter();
let index = semantic_index(db, file);
let scopes: IndexVec<_, _> = index
.scopes
.indices()
.map(|id| ScopeId::new(db, file, id))
.collect();
ScopesMap { scopes }
}
/// Maps from the file specific [`FileScopeId`] to the global [`ScopeId`] that can be used as a Salsa query parameter.
///
/// The [`SemanticIndex`] uses [`FileScopeId`] on a per-file level to identify scopes
/// because they allow for more efficient storage of associated data
/// (use of an [`IndexVec`] keyed by [`FileScopeId`] over an [`FxHashMap`] keyed by [`ScopeId`]).
#[derive(Eq, PartialEq, Debug)]
pub(crate) struct ScopesMap<'db> {
scopes: IndexVec<FileScopeId, ScopeId<'db>>,
}
impl<'db> ScopesMap<'db> {
/// Gets the program-wide unique scope id for the given file specific `scope_id`.
fn get(&self, scope: FileScopeId) -> ScopeId<'db> {
self.scopes[scope]
}
}
#[salsa::tracked(return_ref)]
pub(crate) fn public_symbols_map(db: &dyn Db, file: VfsFile) -> PublicSymbolsMap<'_> {
let _ = tracing::trace_span!("public_symbols_map", file = ?file.debug(db.upcast())).enter();
let module_scope = root_scope(db, file);
let symbols = symbol_table(db, module_scope);
let public_symbols: IndexVec<_, _> = symbols
.symbol_ids()
.map(|id| PublicSymbolId::new(db, file, id))
.collect();
PublicSymbolsMap {
symbols: public_symbols,
}
}
/// Maps [`LocalSymbolId`] of a file's root scope to the corresponding [`PublicSymbolId`] (Salsa ingredients).
#[derive(Eq, PartialEq, Debug)]
pub(crate) struct PublicSymbolsMap<'db> {
symbols: IndexVec<ScopedSymbolId, PublicSymbolId<'db>>,
}
impl<'db> PublicSymbolsMap<'db> {
/// Resolve the [`PublicSymbolId`] for the module-level `symbol_id`.
fn public(&self, symbol_id: ScopedSymbolId) -> PublicSymbolId<'db> {
self.symbols[symbol_id]
}
}
/// A cross-module identifier of a scope that can be used as a salsa query parameter.
#[salsa::tracked]
pub struct ScopeId<'db> {
#[allow(clippy::used_underscore_binding)]
#[id]
pub file: VfsFile,
pub file: File,
#[id]
pub file_scope_id: FileScopeId,
/// The node that introduces this scope.
#[no_eq]
#[return_ref]
pub node: NodeWithScopeKind,
#[no_eq]
count: countme::Count<ScopeId<'static>>,
}
impl<'db> ScopeId<'db> {
pub(crate) fn is_function_like(self, db: &'db dyn Db) -> bool {
// Type parameter scopes behave like function scopes in terms of name resolution; CPython
// symbol table also uses the term "function-like" for these scopes.
matches!(
self.node(db),
NodeWithScopeKind::ClassTypeParameters(_)
| NodeWithScopeKind::FunctionTypeParameters(_)
| NodeWithScopeKind::Function(_)
)
}
#[cfg(test)]
pub(crate) fn name(self, db: &'db dyn Db) -> &'db str {
match self.node(db) {
NodeWithScopeKind::Module => "<module>",
NodeWithScopeKind::Class(class) | NodeWithScopeKind::ClassTypeParameters(class) => {
class.name.as_str()
}
NodeWithScopeKind::Function(function)
| NodeWithScopeKind::FunctionTypeParameters(function) => function.name.as_str(),
NodeWithScopeKind::Lambda(_) => "<lambda>",
}
}
}
/// ID that uniquely identifies a scope inside of a module.
@@ -204,39 +136,25 @@ pub struct ScopeId<'db> {
pub struct FileScopeId;
impl FileScopeId {
/// Returns the scope id of the Root scope.
pub fn root() -> Self {
/// Returns the scope id of the module-global scope.
pub fn global() -> Self {
FileScopeId::from_u32(0)
}
pub fn to_scope_id(self, db: &dyn Db, file: VfsFile) -> ScopeId<'_> {
scopes_map(db, file).get(self)
pub fn to_scope_id(self, db: &dyn Db, file: File) -> ScopeId<'_> {
let index = semantic_index(db, file);
index.scope_ids_by_scope[self]
}
}
#[derive(Debug, Eq, PartialEq)]
pub struct Scope {
pub(super) name: Name,
pub(super) parent: Option<FileScopeId>,
pub(super) definition: Option<Definition>,
pub(super) defining_symbol: Option<FileSymbolId>,
pub(super) kind: ScopeKind,
pub(super) descendents: Range<FileScopeId>,
}
impl Scope {
pub fn name(&self) -> &Name {
&self.name
}
pub fn definition(&self) -> Option<Definition> {
self.definition
}
pub fn defining_symbol(&self) -> Option<FileSymbolId> {
self.defining_symbol
}
pub fn parent(self) -> Option<FileScopeId> {
self.parent
}
@@ -280,6 +198,7 @@ impl SymbolTable {
&self.symbols[symbol_id.into()]
}
#[allow(unused)]
pub(crate) fn symbol_ids(&self) -> impl Iterator<Item = ScopedSymbolId> {
self.symbols.indices()
}
@@ -289,7 +208,6 @@ impl SymbolTable {
}
/// Returns the symbol named `name`.
#[allow(unused)]
pub(crate) fn symbol_by_name(&self, name: &str) -> Option<&Symbol> {
let id = self.symbol_id_by_name(name)?;
Some(self.symbol(id))
@@ -339,8 +257,7 @@ impl SymbolTableBuilder {
&mut self,
name: Name,
flags: SymbolFlags,
definition: Option<Definition>,
) -> ScopedSymbolId {
) -> (ScopedSymbolId, bool) {
let hash = SymbolTable::hash_name(&name);
let entry = self
.table
@@ -353,21 +270,17 @@ impl SymbolTableBuilder {
let symbol = &mut self.table.symbols[*entry.key()];
symbol.insert_flags(flags);
if let Some(definition) = definition {
symbol.push_definition(definition);
}
*entry.key()
(*entry.key(), false)
}
RawEntryMut::Vacant(entry) => {
let mut symbol = Symbol::new(name, definition);
let mut symbol = Symbol::new(name);
symbol.insert_flags(flags);
let id = self.table.symbols.push(symbol);
entry.insert_with_hasher(hash, id, (), |id| {
SymbolTable::hash_name(self.table.symbols[*id].name().as_str())
});
id
(id, true)
}
}
}
@@ -377,3 +290,93 @@ impl SymbolTableBuilder {
self.table
}
}
/// Reference to a node that introduces a new scope.
#[derive(Copy, Clone, Debug)]
pub(crate) enum NodeWithScopeRef<'a> {
Module,
Class(&'a ast::StmtClassDef),
Function(&'a ast::StmtFunctionDef),
Lambda(&'a ast::ExprLambda),
FunctionTypeParameters(&'a ast::StmtFunctionDef),
ClassTypeParameters(&'a ast::StmtClassDef),
}
impl NodeWithScopeRef<'_> {
/// Converts the unowned reference to an owned [`NodeWithScopeKind`].
///
/// # Safety
/// The node wrapped by `self` must be a child of `module`.
#[allow(unsafe_code)]
pub(super) unsafe fn to_kind(self, module: ParsedModule) -> NodeWithScopeKind {
match self {
NodeWithScopeRef::Module => NodeWithScopeKind::Module,
NodeWithScopeRef::Class(class) => {
NodeWithScopeKind::Class(AstNodeRef::new(module, class))
}
NodeWithScopeRef::Function(function) => {
NodeWithScopeKind::Function(AstNodeRef::new(module, function))
}
NodeWithScopeRef::Lambda(lambda) => {
NodeWithScopeKind::Lambda(AstNodeRef::new(module, lambda))
}
NodeWithScopeRef::FunctionTypeParameters(function) => {
NodeWithScopeKind::FunctionTypeParameters(AstNodeRef::new(module, function))
}
NodeWithScopeRef::ClassTypeParameters(class) => {
NodeWithScopeKind::ClassTypeParameters(AstNodeRef::new(module, class))
}
}
}
pub(super) fn scope_kind(self) -> ScopeKind {
match self {
NodeWithScopeRef::Module => ScopeKind::Module,
NodeWithScopeRef::Class(_) => ScopeKind::Class,
NodeWithScopeRef::Function(_) => ScopeKind::Function,
NodeWithScopeRef::Lambda(_) => ScopeKind::Function,
NodeWithScopeRef::FunctionTypeParameters(_)
| NodeWithScopeRef::ClassTypeParameters(_) => ScopeKind::Annotation,
}
}
pub(crate) fn node_key(self) -> NodeWithScopeKey {
match self {
NodeWithScopeRef::Module => NodeWithScopeKey::Module,
NodeWithScopeRef::Class(class) => NodeWithScopeKey::Class(NodeKey::from_node(class)),
NodeWithScopeRef::Function(function) => {
NodeWithScopeKey::Function(NodeKey::from_node(function))
}
NodeWithScopeRef::Lambda(lambda) => {
NodeWithScopeKey::Lambda(NodeKey::from_node(lambda))
}
NodeWithScopeRef::FunctionTypeParameters(function) => {
NodeWithScopeKey::FunctionTypeParameters(NodeKey::from_node(function))
}
NodeWithScopeRef::ClassTypeParameters(class) => {
NodeWithScopeKey::ClassTypeParameters(NodeKey::from_node(class))
}
}
}
}
/// Node that introduces a new scope.
#[derive(Clone, Debug)]
pub enum NodeWithScopeKind {
Module,
Class(AstNodeRef<ast::StmtClassDef>),
ClassTypeParameters(AstNodeRef<ast::StmtClassDef>),
Function(AstNodeRef<ast::StmtFunctionDef>),
FunctionTypeParameters(AstNodeRef<ast::StmtFunctionDef>),
Lambda(AstNodeRef<ast::ExprLambda>),
}
#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)]
pub(crate) enum NodeWithScopeKey {
Module,
Class(NodeKey),
ClassTypeParameters(NodeKey),
Function(NodeKey),
FunctionTypeParameters(NodeKey),
Lambda(NodeKey),
}

View File

@@ -0,0 +1,354 @@
//! Build a map from each use of a symbol to the definitions visible from that use.
//!
//! Let's take this code sample:
//!
//! ```python
//! x = 1
//! x = 2
//! y = x
//! if flag:
//! x = 3
//! else:
//! x = 4
//! z = x
//! ```
//!
//! In this snippet, we have four definitions of `x` (the statements assigning `1`, `2`, `3`,
//! and `4` to it), and two uses of `x` (the `y = x` and `z = x` assignments). The first
//! [`Definition`] of `x` is never visible to any use, because it's immediately replaced by the
//! second definition, before any use happens. (A linter could thus flag the statement `x = 1`
//! as likely superfluous.)
//!
//! The first use of `x` has one definition visible to it: the assignment `x = 2`.
//!
//! Things get a bit more complex when we have branches. We will definitely take either the `if` or
//! the `else` branch. Thus, the second use of `x` has two definitions visible to it: `x = 3` and
//! `x = 4`. The `x = 2` definition is no longer visible, because it must be replaced by either `x
//! = 3` or `x = 4`, no matter which branch was taken. We don't know which branch was taken, so we
//! must consider both definitions as visible, which means eventually we would (in type inference)
//! look at these two definitions and infer a type of `Literal[3, 4]` -- the union of `Literal[3]`
//! and `Literal[4]` -- for the second use of `x`.
//!
//! So that's one question our use-def map needs to answer: given a specific use of a symbol, which
//! definition(s) is/are visible from that use. In
//! [`AstIds`](crate::semantic_index::ast_ids::AstIds) we number all uses (that means a `Name` node
//! with `Load` context) so we have a `ScopedUseId` to efficiently represent each use.
//!
//! The other case we need to handle is when a symbol is referenced from a different scope (the
//! most obvious example of this is an import). We call this "public" use of a symbol. So the other
//! question we need to be able to answer is, what are the publicly-visible definitions of each
//! symbol?
//!
//! Technically, public use of a symbol could also occur from any point in control flow of the
//! scope where the symbol is defined (via inline imports and import cycles, in the case of an
//! import, or via a function call partway through the local scope that ends up using a symbol from
//! the scope via a global or nonlocal reference.) But modeling this fully accurately requires
//! whole-program analysis that isn't tractable for an efficient incremental compiler, since it
//! means a given symbol could have a different type every place it's referenced throughout the
//! program, depending on the shape of arbitrarily-sized call/import graphs. So we follow other
//! Python type-checkers in making the simplifying assumption that usually the scope will finish
//! execution before its symbols are made visible to other scopes; for instance, most imports will
//! import from a complete module, not a partially-executed module. (We may want to get a little
//! smarter than this in the future, in particular for closures, but for now this is where we
//! start.)
//!
//! So this means that the publicly-visible definitions of a symbol are the definitions still
//! visible at the end of the scope.
//!
//! The data structure we build to answer these two questions is the `UseDefMap`. It has a
//! `definitions_by_use` vector indexed by [`ScopedUseId`] and a `public_definitions` vector
//! indexed by [`ScopedSymbolId`]. The values in each of these vectors are (in principle) a list of
//! visible definitions at that use, or at the end of the scope for that symbol.
//!
//! In order to avoid vectors-of-vectors and all the allocations that would entail, we don't
//! actually store these "list of visible definitions" as a vector of [`Definition`] IDs. Instead,
//! the values in `definitions_by_use` and `public_definitions` are a [`Definitions`] struct that
//! keeps a [`Range`] into a third vector of [`Definition`] IDs, `all_definitions`. The trick with
//! this representation is that it requires that the definitions visible at any given use of a
//! symbol are stored sequentially in `all_definitions`.
//!
//! There is another special kind of possible "definition" for a symbol: it might be unbound in the
//! scope. (This isn't equivalent to "zero visible definitions", since we may go through an `if`
//! that has a definition for the symbol, leaving us with one visible definition, but still also
//! the "unbound" possibility, since we might not have taken the `if` branch.)
//!
//! The simplest way to model "unbound" would be as an actual [`Definition`] itself: the initial
//! visible [`Definition`] for each symbol in a scope. But actually modeling it this way would
//! dramatically increase the number of [`Definition`] that Salsa must track. Since "unbound" is a
//! special definition in that all symbols share it, and it doesn't have any additional per-symbol
//! state, we can represent it more efficiently: we use the `may_be_unbound` boolean on the
//! [`Definitions`] struct. If this flag is `true`, it means the symbol/use really has one
//! additional visible "definition", which is the unbound state. If this flag is `false`, it means
//! we've eliminated the possibility of unbound: every path we've followed includes a definition
//! for this symbol.
//!
//! To build a [`UseDefMap`], the [`UseDefMapBuilder`] is notified of each new use and definition
//! as they are encountered by the
//! [`SemanticIndexBuilder`](crate::semantic_index::builder::SemanticIndexBuilder) AST visit. For
//! each symbol, the builder tracks the currently-visible definitions for that symbol. When we hit
//! a use of a symbol, it records the currently-visible definitions for that symbol as the visible
//! definitions for that use. When we reach the end of the scope, it records the currently-visible
//! definitions for each symbol as the public definitions of that symbol.
//!
//! Let's walk through the above example. Initially we record for `x` that it has no visible
//! definitions, and may be unbound. When we see `x = 1`, we record that as the sole visible
//! definition of `x`, and flip `may_be_unbound` to `false`. Then we see `x = 2`, and it replaces
//! `x = 1` as the sole visible definition of `x`. When we get to `y = x`, we record that the
//! visible definitions for that use of `x` are just the `x = 2` definition.
//!
//! Then we hit the `if` branch. We visit the `test` node (`flag` in this case), since that will
//! happen regardless. Then we take a pre-branch snapshot of the currently visible definitions for
//! all symbols, which we'll need later. Then we go ahead and visit the `if` body. When we see `x =
//! 3`, it replaces `x = 2` as the sole visible definition of `x`. At the end of the `if` body, we
//! take another snapshot of the currently-visible definitions; we'll call this the post-if-body
//! snapshot.
//!
//! Now we need to visit the `else` clause. The conditions when entering the `else` clause should
//! be the pre-if conditions; if we are entering the `else` clause, we know that the `if` test
//! failed and we didn't execute the `if` body. So we first reset the builder to the pre-if state,
//! using the snapshot we took previously (meaning we now have `x = 2` as the sole visible
//! definition for `x` again), then visit the `else` clause, where `x = 4` replaces `x = 2` as the
//! sole visible definition of `x`.
//!
//! Now we reach the end of the if/else, and want to visit the following code. The state here needs
//! to reflect that we might have gone through the `if` branch, or we might have gone through the
//! `else` branch, and we don't know which. So we need to "merge" our current builder state
//! (reflecting the end-of-else state, with `x = 4` as the only visible definition) with our
//! post-if-body snapshot (which has `x = 3` as the only visible definition). The result of this
//! merge is that we now have two visible definitions of `x`: `x = 3` and `x = 4`.
//!
//! The [`UseDefMapBuilder`] itself just exposes methods for taking a snapshot, resetting to a
//! snapshot, and merging a snapshot into the current state. The logic using these methods lives in
//! [`SemanticIndexBuilder`](crate::semantic_index::builder::SemanticIndexBuilder), e.g. where it
//! visits a `StmtIf` node.
//!
//! (In the future we may have some other questions we want to answer as well, such as "is this
//! definition used?", which will require tracking a bit more info in our map, e.g. a "used" bit
//! for each [`Definition`] which is flipped to true when we record that definition for a use.)
use crate::semantic_index::ast_ids::ScopedUseId;
use crate::semantic_index::definition::Definition;
use crate::semantic_index::symbol::ScopedSymbolId;
use ruff_index::IndexVec;
use std::ops::Range;
/// All definitions that can reach a given use of a name.
#[derive(Debug, PartialEq, Eq)]
pub(crate) struct UseDefMap<'db> {
// TODO store constraints with definitions for type narrowing
/// Definition IDs array for `definitions_by_use` and `public_definitions` to slice into.
all_definitions: Vec<Definition<'db>>,
/// Definitions that can reach a [`ScopedUseId`].
definitions_by_use: IndexVec<ScopedUseId, Definitions>,
/// Definitions of each symbol visible at end of scope.
public_definitions: IndexVec<ScopedSymbolId, Definitions>,
}
impl<'db> UseDefMap<'db> {
pub(crate) fn use_definitions(&self, use_id: ScopedUseId) -> &[Definition<'db>] {
&self.all_definitions[self.definitions_by_use[use_id].definitions_range.clone()]
}
pub(crate) fn use_may_be_unbound(&self, use_id: ScopedUseId) -> bool {
self.definitions_by_use[use_id].may_be_unbound
}
pub(crate) fn public_definitions(&self, symbol: ScopedSymbolId) -> &[Definition<'db>] {
&self.all_definitions[self.public_definitions[symbol].definitions_range.clone()]
}
pub(crate) fn public_may_be_unbound(&self, symbol: ScopedSymbolId) -> bool {
self.public_definitions[symbol].may_be_unbound
}
}
/// Definitions visible for a symbol at a particular use (or end-of-scope).
#[derive(Clone, Debug, PartialEq, Eq)]
struct Definitions {
/// [`Range`] in `all_definitions` of the visible definition IDs.
definitions_range: Range<usize>,
/// Is the symbol possibly unbound at this point?
may_be_unbound: bool,
}
impl Definitions {
/// The default state of a symbol is "no definitions, may be unbound", aka definitely-unbound.
fn unbound() -> Self {
Self {
definitions_range: Range::default(),
may_be_unbound: true,
}
}
}
impl Default for Definitions {
fn default() -> Self {
Definitions::unbound()
}
}
/// A snapshot of the visible definitions for each symbol at a particular point in control flow.
#[derive(Clone, Debug)]
pub(super) struct FlowSnapshot {
definitions_by_symbol: IndexVec<ScopedSymbolId, Definitions>,
}
#[derive(Debug)]
pub(super) struct UseDefMapBuilder<'db> {
/// Definition IDs array for `definitions_by_use` and `definitions_by_symbol` to slice into.
all_definitions: Vec<Definition<'db>>,
/// Visible definitions at each so-far-recorded use.
definitions_by_use: IndexVec<ScopedUseId, Definitions>,
/// Currently visible definitions for each symbol.
definitions_by_symbol: IndexVec<ScopedSymbolId, Definitions>,
}
impl<'db> UseDefMapBuilder<'db> {
pub(super) fn new() -> Self {
Self {
all_definitions: Vec::new(),
definitions_by_use: IndexVec::new(),
definitions_by_symbol: IndexVec::new(),
}
}
pub(super) fn add_symbol(&mut self, symbol: ScopedSymbolId) {
let new_symbol = self.definitions_by_symbol.push(Definitions::unbound());
debug_assert_eq!(symbol, new_symbol);
}
pub(super) fn record_definition(
&mut self,
symbol: ScopedSymbolId,
definition: Definition<'db>,
) {
// We have a new definition of a symbol; this replaces any previous definitions in this
// path.
let def_idx = self.all_definitions.len();
self.all_definitions.push(definition);
self.definitions_by_symbol[symbol] = Definitions {
#[allow(clippy::range_plus_one)]
definitions_range: def_idx..(def_idx + 1),
may_be_unbound: false,
};
}
pub(super) fn record_use(&mut self, symbol: ScopedSymbolId, use_id: ScopedUseId) {
// We have a use of a symbol; clone the currently visible definitions for that symbol, and
// record them as the visible definitions for this use.
let new_use = self
.definitions_by_use
.push(self.definitions_by_symbol[symbol].clone());
debug_assert_eq!(use_id, new_use);
}
/// Take a snapshot of the current visible-symbols state.
pub(super) fn snapshot(&self) -> FlowSnapshot {
FlowSnapshot {
definitions_by_symbol: self.definitions_by_symbol.clone(),
}
}
/// Restore the current builder visible-definitions state to the given snapshot.
pub(super) fn restore(&mut self, snapshot: FlowSnapshot) {
// We never remove symbols from `definitions_by_symbol` (it's an IndexVec, and the symbol
// IDs must line up), so the current number of known symbols must always be equal to or
// greater than the number of known symbols in a previously-taken snapshot.
let num_symbols = self.definitions_by_symbol.len();
debug_assert!(num_symbols >= snapshot.definitions_by_symbol.len());
// Restore the current visible-definitions state to the given snapshot.
self.definitions_by_symbol = snapshot.definitions_by_symbol;
// If the snapshot we are restoring is missing some symbols we've recorded since, we need
// to fill them in so the symbol IDs continue to line up. Since they don't exist in the
// snapshot, the correct state to fill them in with is "unbound", the default.
self.definitions_by_symbol
.resize(num_symbols, Definitions::unbound());
}
/// Merge the given snapshot into the current state, reflecting that we might have taken either
/// path to get here. The new visible-definitions state for each symbol should include
/// definitions from both the prior state and the snapshot.
pub(super) fn merge(&mut self, snapshot: &FlowSnapshot) {
// The tricky thing about merging two Ranges pointing into `all_definitions` is that if the
// two Ranges aren't already adjacent in `all_definitions`, we will have to copy at least
// one or the other of the ranges to the end of `all_definitions` so as to make them
// adjacent. We can't ever move things around in `all_definitions` because previously
// recorded uses may still have ranges pointing to any part of it; all we can do is append.
// It's possible we may end up with some old entries in `all_definitions` that nobody is
// pointing to, but that's OK.
// We never remove symbols from `definitions_by_symbol` (it's an IndexVec, and the symbol
// IDs must line up), so the current number of known symbols must always be equal to or
// greater than the number of known symbols in a previously-taken snapshot.
debug_assert!(self.definitions_by_symbol.len() >= snapshot.definitions_by_symbol.len());
for (symbol_id, current) in self.definitions_by_symbol.iter_mut_enumerated() {
let Some(snapshot) = snapshot.definitions_by_symbol.get(symbol_id) else {
// Symbol not present in snapshot, so it's unbound from that path.
current.may_be_unbound = true;
continue;
};
// If the symbol can be unbound in either predecessor, it can be unbound post-merge.
current.may_be_unbound |= snapshot.may_be_unbound;
// Merge the definition ranges.
let current = &mut current.definitions_range;
let snapshot = &snapshot.definitions_range;
// We never create reversed ranges.
debug_assert!(current.end >= current.start);
debug_assert!(snapshot.end >= snapshot.start);
if current == snapshot {
// Ranges already identical, nothing to do.
} else if snapshot.is_empty() {
// Merging from an empty range; nothing to do.
} else if (*current).is_empty() {
// Merging to an empty range; just use the incoming range.
*current = snapshot.clone();
} else if snapshot.end >= current.start && snapshot.start <= current.end {
// Ranges are adjacent or overlapping, merge them in-place.
*current = current.start.min(snapshot.start)..current.end.max(snapshot.end);
} else if current.end == self.all_definitions.len() {
// Ranges are not adjacent or overlapping, `current` is at the end of
// `all_definitions`, we need to copy `snapshot` to the end so they are adjacent
// and can be merged into one range.
self.all_definitions.extend_from_within(snapshot.clone());
current.end = self.all_definitions.len();
} else if snapshot.end == self.all_definitions.len() {
// Ranges are not adjacent or overlapping, `snapshot` is at the end of
// `all_definitions`, we need to copy `current` to the end so they are adjacent and
// can be merged into one range.
self.all_definitions.extend_from_within(current.clone());
current.start = snapshot.start;
current.end = self.all_definitions.len();
} else {
// Ranges are not adjacent and neither one is at the end of `all_definitions`, we
// have to copy both to the end so they are adjacent and we can merge them.
let start = self.all_definitions.len();
self.all_definitions.extend_from_within(current.clone());
self.all_definitions.extend_from_within(snapshot.clone());
current.start = start;
current.end = self.all_definitions.len();
}
}
}
pub(super) fn finish(mut self) -> UseDefMap<'db> {
self.all_definitions.shrink_to_fit();
self.definitions_by_symbol.shrink_to_fit();
self.definitions_by_use.shrink_to_fit();
UseDefMap {
all_definitions: self.all_definitions,
definitions_by_use: self.definitions_by_use,
public_definitions: self.definitions_by_symbol,
}
}
}

View File

@@ -0,0 +1,249 @@
use ruff_db::files::File;
use ruff_python_ast as ast;
use ruff_python_ast::{Expr, ExpressionRef, StmtClassDef};
use crate::module_name::ModuleName;
use crate::module_resolver::{resolve_module, Module};
use crate::semantic_index::ast_ids::HasScopedAstId;
use crate::semantic_index::semantic_index;
use crate::types::{definition_ty, global_symbol_ty_by_name, infer_scope_types, Type};
use crate::Db;
pub struct SemanticModel<'db> {
db: &'db dyn Db,
file: File,
}
impl<'db> SemanticModel<'db> {
pub fn new(db: &'db dyn Db, file: File) -> Self {
Self { db, file }
}
// TODO we don't actually want to expose the Db directly to lint rules, but we need to find a
// solution for exposing information from types
pub fn db(&self) -> &dyn Db {
self.db
}
pub fn resolve_module(&self, module_name: ModuleName) -> Option<Module> {
resolve_module(self.db, module_name)
}
pub fn global_symbol_ty(&self, module: &Module, symbol_name: &str) -> Type<'db> {
global_symbol_ty_by_name(self.db, module.file(), symbol_name)
}
}
pub trait HasTy {
/// Returns the inferred type of `self`.
///
/// ## Panics
/// May panic if `self` is from another file than `model`.
fn ty<'db>(&self, model: &SemanticModel<'db>) -> Type<'db>;
}
impl HasTy for ast::ExpressionRef<'_> {
fn ty<'db>(&self, model: &SemanticModel<'db>) -> Type<'db> {
let index = semantic_index(model.db, model.file);
let file_scope = index.expression_scope_id(*self);
let scope = file_scope.to_scope_id(model.db, model.file);
let expression_id = self.scoped_ast_id(model.db, scope);
infer_scope_types(model.db, scope).expression_ty(expression_id)
}
}
macro_rules! impl_expression_has_ty {
($ty: ty) => {
impl HasTy for $ty {
#[inline]
fn ty<'db>(&self, model: &SemanticModel<'db>) -> Type<'db> {
let expression_ref = ExpressionRef::from(self);
expression_ref.ty(model)
}
}
};
}
impl_expression_has_ty!(ast::ExprBoolOp);
impl_expression_has_ty!(ast::ExprNamed);
impl_expression_has_ty!(ast::ExprBinOp);
impl_expression_has_ty!(ast::ExprUnaryOp);
impl_expression_has_ty!(ast::ExprLambda);
impl_expression_has_ty!(ast::ExprIf);
impl_expression_has_ty!(ast::ExprDict);
impl_expression_has_ty!(ast::ExprSet);
impl_expression_has_ty!(ast::ExprListComp);
impl_expression_has_ty!(ast::ExprSetComp);
impl_expression_has_ty!(ast::ExprDictComp);
impl_expression_has_ty!(ast::ExprGenerator);
impl_expression_has_ty!(ast::ExprAwait);
impl_expression_has_ty!(ast::ExprYield);
impl_expression_has_ty!(ast::ExprYieldFrom);
impl_expression_has_ty!(ast::ExprCompare);
impl_expression_has_ty!(ast::ExprCall);
impl_expression_has_ty!(ast::ExprFString);
impl_expression_has_ty!(ast::ExprStringLiteral);
impl_expression_has_ty!(ast::ExprBytesLiteral);
impl_expression_has_ty!(ast::ExprNumberLiteral);
impl_expression_has_ty!(ast::ExprBooleanLiteral);
impl_expression_has_ty!(ast::ExprNoneLiteral);
impl_expression_has_ty!(ast::ExprEllipsisLiteral);
impl_expression_has_ty!(ast::ExprAttribute);
impl_expression_has_ty!(ast::ExprSubscript);
impl_expression_has_ty!(ast::ExprStarred);
impl_expression_has_ty!(ast::ExprName);
impl_expression_has_ty!(ast::ExprList);
impl_expression_has_ty!(ast::ExprTuple);
impl_expression_has_ty!(ast::ExprSlice);
impl_expression_has_ty!(ast::ExprIpyEscapeCommand);
impl HasTy for ast::Expr {
fn ty<'db>(&self, model: &SemanticModel<'db>) -> Type<'db> {
match self {
Expr::BoolOp(inner) => inner.ty(model),
Expr::Named(inner) => inner.ty(model),
Expr::BinOp(inner) => inner.ty(model),
Expr::UnaryOp(inner) => inner.ty(model),
Expr::Lambda(inner) => inner.ty(model),
Expr::If(inner) => inner.ty(model),
Expr::Dict(inner) => inner.ty(model),
Expr::Set(inner) => inner.ty(model),
Expr::ListComp(inner) => inner.ty(model),
Expr::SetComp(inner) => inner.ty(model),
Expr::DictComp(inner) => inner.ty(model),
Expr::Generator(inner) => inner.ty(model),
Expr::Await(inner) => inner.ty(model),
Expr::Yield(inner) => inner.ty(model),
Expr::YieldFrom(inner) => inner.ty(model),
Expr::Compare(inner) => inner.ty(model),
Expr::Call(inner) => inner.ty(model),
Expr::FString(inner) => inner.ty(model),
Expr::StringLiteral(inner) => inner.ty(model),
Expr::BytesLiteral(inner) => inner.ty(model),
Expr::NumberLiteral(inner) => inner.ty(model),
Expr::BooleanLiteral(inner) => inner.ty(model),
Expr::NoneLiteral(inner) => inner.ty(model),
Expr::EllipsisLiteral(inner) => inner.ty(model),
Expr::Attribute(inner) => inner.ty(model),
Expr::Subscript(inner) => inner.ty(model),
Expr::Starred(inner) => inner.ty(model),
Expr::Name(inner) => inner.ty(model),
Expr::List(inner) => inner.ty(model),
Expr::Tuple(inner) => inner.ty(model),
Expr::Slice(inner) => inner.ty(model),
Expr::IpyEscapeCommand(inner) => inner.ty(model),
}
}
}
impl HasTy for ast::StmtFunctionDef {
fn ty<'db>(&self, model: &SemanticModel<'db>) -> Type<'db> {
let index = semantic_index(model.db, model.file);
let definition = index.definition(self);
definition_ty(model.db, definition)
}
}
impl HasTy for StmtClassDef {
fn ty<'db>(&self, model: &SemanticModel<'db>) -> Type<'db> {
let index = semantic_index(model.db, model.file);
let definition = index.definition(self);
definition_ty(model.db, definition)
}
}
impl HasTy for ast::Alias {
fn ty<'db>(&self, model: &SemanticModel<'db>) -> Type<'db> {
let index = semantic_index(model.db, model.file);
let definition = index.definition(self);
definition_ty(model.db, definition)
}
}
#[cfg(test)]
mod tests {
use ruff_db::files::system_path_to_file;
use ruff_db::parsed::parsed_module;
use ruff_db::system::{DbWithTestSystem, SystemPathBuf};
use crate::db::tests::TestDb;
use crate::program::{Program, SearchPathSettings};
use crate::python_version::PythonVersion;
use crate::types::Type;
use crate::{HasTy, SemanticModel};
fn setup_db() -> TestDb {
let db = TestDb::new();
Program::new(
&db,
PythonVersion::default(),
SearchPathSettings {
extra_paths: vec![],
src_root: SystemPathBuf::from("/src"),
site_packages: vec![],
custom_typeshed: None,
},
);
db
}
#[test]
fn function_ty() -> anyhow::Result<()> {
let mut db = setup_db();
db.write_file("/src/foo.py", "def test(): pass")?;
let foo = system_path_to_file(&db, "/src/foo.py").unwrap();
let ast = parsed_module(&db, foo);
let function = ast.suite()[0].as_function_def_stmt().unwrap();
let model = SemanticModel::new(&db, foo);
let ty = function.ty(&model);
assert!(matches!(ty, Type::Function(_)));
Ok(())
}
#[test]
fn class_ty() -> anyhow::Result<()> {
let mut db = setup_db();
db.write_file("/src/foo.py", "class Test: pass")?;
let foo = system_path_to_file(&db, "/src/foo.py").unwrap();
let ast = parsed_module(&db, foo);
let class = ast.suite()[0].as_class_def_stmt().unwrap();
let model = SemanticModel::new(&db, foo);
let ty = class.ty(&model);
assert!(matches!(ty, Type::Class(_)));
Ok(())
}
#[test]
fn alias_ty() -> anyhow::Result<()> {
let mut db = setup_db();
db.write_files([
("/src/foo.py", "class Test: pass"),
("/src/bar.py", "from foo import Test"),
])?;
let bar = system_path_to_file(&db, "/src/bar.py").unwrap();
let ast = parsed_module(&db, bar);
let import = ast.suite()[0].as_import_from_stmt().unwrap();
let alias = &import.names[0];
let model = SemanticModel::new(&db, bar);
let ty = alias.ty(&model);
assert!(matches!(ty, Type::Class(_)));
Ok(())
}
}

Some files were not shown because too many files have changed in this diff Show More