Compare commits

...

475 Commits

Author SHA1 Message Date
Charlie Marsh
c23bf395e2 Warn on invalid # noqa rule codes 2024-08-11 19:23:33 -04:00
Charlie Marsh
383676e332 Avoid parsing joint rule codes as distinct codes in # noqa 2024-08-11 19:23:00 -04:00
Yury Fedotov
feba5031dc [Minor typo] Fix article in "an fix" (#12797) 2024-08-10 21:22:00 -04:00
Dylan
0c2b88f224 [flake8-simplify] Further simplify to binary in preview for if-else-block-instead-of-if-exp (SIM108) (#12796)
In most cases we should suggest a ternary operator, but there are three
edge cases where a binary operator is more appropriate.

Given an if-else block of the form

```python
if test:
    target_var = body_value
else:
    target_var = else_value
```
This PR updates the check for SIM108 to the following:

- If `test == body_value` and preview enabled, suggest to replace with
`target_var = test or else_value`
- If `test == not body_value` and preview enabled, suggest to replace
with `target_var = body_value and else_value`
- If `not test == body_value` and preview enabled, suggest to replace
with `target_var = body_value and else_value`
- Otherwise, suggest to replace with `target_var = body_value if test
else else_value`

Closes #12189.
2024-08-10 16:49:25 +00:00
Alex Waygood
cf1a57df5a Remove red_knot_python_semantic::python_version::TargetVersion (#12790) 2024-08-10 14:28:31 +01:00
renovate[bot]
597c5f9124 Update dependency black to v24 (#12728) 2024-08-10 18:04:37 +05:30
Charlie Marsh
69e1c567d4 Treat type(Protocol) et al as metaclass base (#12770)
## Summary

Closes https://github.com/astral-sh/ruff/issues/12736.
2024-08-09 20:10:12 +00:00
Alex Waygood
37b9bac403 [red-knot] Add support for --system-site-packages virtual environments (#12759) 2024-08-09 21:02:16 +01:00
Alex Waygood
83db48d316 RUF031: Ignore unparenthesized tuples in subscripts when the subscript is obviously a type annotation or type alias (#12762) 2024-08-09 20:31:27 +01:00
Alex Waygood
c4e651921b [red-knot] Move, rename and make public the PyVersion type (#12782) 2024-08-09 16:49:17 +01:00
Dylan
b595346213 [ruff] Do not remove parens for tuples with starred expressions in Python <=3.10 RUF031 (#12784) 2024-08-09 17:30:29 +02:00
Ryan Hoban
253474b312 Document that BLE001 supports both BaseException and Exception (#12788) 2024-08-09 17:28:50 +02:00
Micha Reiser
a176679b24 Log warnings when skipping editable installations (#12779) 2024-08-09 16:29:43 +02:00
Charlie Marsh
1f51048fa4 Don't enforce returns and yields in abstract methods (#12771)
## Summary

Closes https://github.com/astral-sh/ruff/issues/12685.
2024-08-09 13:34:14 +00:00
Micha Reiser
2abfab0f9b Move Program and related structs to red_knot_python_semantic (#12777) 2024-08-09 11:50:45 +02:00
Dylan
64f1f3468d [ruff] Skip tuples with slice expressions in incorrectly-parenthesized-tuple-in-subscript (RUF031) (#12768)
## Summary

Adding parentheses to a tuple in a subscript with elements that include
slice expressions causes a syntax error. For example, `d[(1,2,:)]` is a
syntax error.

So, when `lint.ruff.parenthesize-tuple-in-subscript = true` and the
tuple includes a slice expression, we skip this check and fix.

Closes #12766.
2024-08-09 09:22:58 +00:00
Micha Reiser
ffaa35eafe Add test helper to setup tracing (#12741) 2024-08-09 07:04:04 +00:00
Charlie Marsh
c906b0183b Add known problems warning to type-comparison rule (#12769)
## Summary

See: https://github.com/astral-sh/ruff/issues/4560
2024-08-09 01:41:15 +00:00
Carl Meyer
bc5b9b81dd [red-knot] add dev dependency on ruff_db os feature from red_knot_pyt… (#12760) 2024-08-08 18:10:30 +01:00
Dhruv Manilawala
221ea662e0 Bump version to 0.5.7 (#12756) 2024-08-08 20:56:15 +05:30
Alex Waygood
d28c5afd14 [red-knot] Remove mentions of Ruff from the CLI help (#12752)
Co-authored-by: Micha Reiser <micha@reiser.io>
2024-08-08 15:35:10 +01:00
Alex Waygood
f1de08c2a0 [red-knot] Merge the semantic and module-resolver crates (#12751) 2024-08-08 15:34:11 +01:00
Christian Clauss
33e9a6a54e SIM110: any() is ~3x slower than the code it replaces (#12746)
> ~Builtins are also more efficient than `for` loops.~

Let's not promise performance because this code transformation does not
deliver.

Benchmark written by @dcbaker

> `any()` seems to be about 1/3 as fast (Python 3.11.9, NixOS):
```python
loop = 'abcdef'.split()
found = 'f'
nfound = 'g'


def test1():
    for x in loop:
        if x == found:
            return True
    return False


def test2():
    return any(x == found for x in loop)


def test3():
    for x in loop:
        if x == nfound:
            return True
    return False


def test4():
    return any(x == nfound for x in loop)


if __name__ == "__main__":
    import timeit

    print('for loop (found)    :', timeit.timeit(test1))
    print('for loop (not found):', timeit.timeit(test3))
    print('any() (found)       :', timeit.timeit(test2))
    print('any() (not found)   :', timeit.timeit(test4))
```
```
for loop (found)    : 0.051076093994197436
for loop (not found): 0.04388196699437685
any() (found)       : 0.15422860698890872
any() (not found)   : 0.15568504799739458
```
I have retested with longer lists and on multiple Python versions with
similar results.
2024-08-08 08:25:43 -04:00
Dylan
f577e03021 [ruff] Ignore empty tuples for incorrectly-parenthesized-tuple-in-subscript (RUF031) (#12749) 2024-08-08 13:18:03 +02:00
Micha Reiser
f53733525c Remove all useEffect usages (#12659) 2024-08-08 13:16:38 +02:00
Micha Reiser
2daa914334 Gracefully handle errors in CLI (#12747) 2024-08-08 11:02:47 +00:00
Steve C
6d9205e346 [ruff_linter] - Use LibCST in adjust_indentation for mixed whitespace (#12740) 2024-08-08 10:49:58 +02:00
Micha Reiser
df7345e118 Exit with an error if there are check failures (#12735) 2024-08-08 07:10:18 +00:00
Micha Reiser
dc6aafecc2 Setup tracing and document tracing usage (#12730) 2024-08-08 06:28:40 +00:00
Charlie Marsh
5107a50ae7 Parenthesize conditions based on precedence when merging if arms (#12737)
## Summary

Closes https://github.com/astral-sh/ruff/issues/12732.
2024-08-07 23:03:24 -04:00
Micha Reiser
a631d600ac Fix cache invalidation for nested pyproject.toml files (#12727) 2024-08-07 21:53:45 +02:00
Alex Waygood
f34b9a77f0 [red-knot] Cleanups to logic resolving site-packages from a venv path (#12731) 2024-08-07 15:48:15 +01:00
Dylan
7997da47f5 [ruff] Implement incorrectly-parenthesized-tuple-in-subscript (RUF031) (#12480)
Implements the new fixable lint rule `RUF031` which checks for the use or omission of parentheses around tuples in subscripts, depending on the setting `lint.ruff.parenthesize-tuple-in-getitem`. By default, the use of parentheses is considered a violation.
2024-08-07 13:11:29 +00:00
Alex Waygood
d380b37a09 Add a new Binding::is_unused method (#12729) 2024-08-07 11:17:56 +01:00
Alex Waygood
b14fee9320 [ruff] Mark RUF023 fix as unsafe if __slots__ is not a set and the binding is used elsewhere (#12692) 2024-08-07 10:41:03 +01:00
Dhruv Manilawala
037e817450 Use struct instead of type alias for workspace settings index (#12726)
## Summary

Follow-up from https://github.com/astral-sh/ruff/pull/12725, this is
just a small refactor to use a wrapper struct instead of type alias for
workspace settings index. This avoids the need to have the
`register_workspace_settings` as a static method on `Index` and instead
is a method on the new struct itself.
2024-08-07 09:26:59 +00:00
Dhruv Manilawala
7fcfedd430 Ignore non-file workspace URL (#12725)
## Summary

This PR updates the server to ignore non-file workspace URL.

This is to avoid crashing the server if the URL scheme is not "file".
We'd still raise an error if the URL to file path conversion fails.

Also, as per the docs of
[`to_file_path`](https://docs.rs/url/2.5.2/url/struct.Url.html#method.to_file_path):

> Note: This does not actually check the URL’s scheme, and may give
nonsensical results for other schemes. It is the user’s responsibility
to check the URL’s scheme before calling this.

resolves: #12660

## Test Plan

I'm not sure how to test this locally but the change is small enough to
validate on its own.
2024-08-07 09:15:55 +00:00
Dhruv Manilawala
50ff5c7544 Include docs requirements for Renovate upgrades (#12724)
## Summary

This PR updates the Renovate config to account for the
`requirements*.txt` files in `docs/` directory.

The `mkdocs-material` upgrade is ignored because we use commit SHA for
the insider version and it should match the corresponding public version
as per the docs:
https://squidfunk.github.io/mkdocs-material/insiders/upgrade/
(`9.x.x-insiders-4.x.x`).

## Test Plan

```console
❯ renovate-config-validator
(node:83193) [DEP0040] DeprecationWarning: The `punycode` module is deprecated. Please use a userland alternative instead.
(Use `node --trace-deprecation ...` to show where the warning was created)
 INFO: Validating .github/renovate.json5
 INFO: Config validated successfully
```
2024-08-07 13:11:18 +05:30
Charlie Marsh
90e5bc2bd9 Avoid false-positives for list concatenations in SQL construction (#12720)
## Summary

Closes https://github.com/astral-sh/ruff/issues/12710.
2024-08-06 16:26:03 -04:00
Alex Waygood
aae9619d3d [red-knot] Fix build on Windows (#12719)
## Summary

Tests are failing on `main` because automerge landed
https://github.com/astral-sh/ruff/pull/12716 despite the Windows tests
failing.
2024-08-06 20:21:25 +01:00
Alex Waygood
7fa76a2b2b [red-knot] Derive site-packages from a venv path (#12716) 2024-08-06 18:34:37 +00:00
Dhruv Manilawala
14dd6d980e [red-knot] Keep subcommands optional for the binary (#12715)
## Summary

This PR updates the `red_knot` CLI to make the subcommand optional.

## Test Plan

Run the following commands:
* `cargo run --bin red_knot --
--current-directory=~/playground/ruff/type_inference` (no subcommand
requirement)
* `cargo run --bin red_knot -- server` (should start the server)
2024-08-06 20:24:49 +05:30
Micha Reiser
846f57fd15 Update salsa (#12711) 2024-08-06 13:17:39 +00:00
Micha Reiser
8e6aa78796 Remove 'cli' module from red_knot (#12714) 2024-08-06 12:10:36 +00:00
Dhruv Manilawala
e91a0fe94a [red-knot] Implement basic LSP server (#12624)
## Summary

This PR adds basic LSP implementation for the Red Knot project.

This is basically a fork of the existing `ruff_server` crate into a
`red_knot_server` crate. The following are the main differences:
1. The `Session` stores a map from workspace root to the corresponding
Red Knot database (`RootDatabase`).
2. The database is initialized with the newly implemented `LSPSystem`
(implementation of `System` trait)
3. The `LSPSystem` contains the server index corresponding to each
workspace and an underlying OS system implementation. For certain
methods, the system first checks if there's an open document in LSP
system and returns the information from that. Otherwise, it falls back
to the OS system to get that information. These methods are
`path_metadata`, `read_to_string` and `read_to_notebook`
4. Add `as_any_mut` method for `System`

**Why fork?**

Forking allows us to experiment with the functionalities that are
specific to Red Knot. The architecture is completely different and so
the requirements for an LSP implementation are different as well. For
example, Red Knot only supports a single workspace, so the LSP system
needs to map the multi-workspace support to each Red Knot instance. In
the end, the server code isn't too big, it will be easier to implement
Red Knot specific functionality without worrying about existing server
limitations and it shouldn't be difficult to port the existing server.

## Review

Most of the server files hasn't been changed. I'm going to list down the
files that have been changed along with highlight the specific part of
the file that's changed from the existing server code.

Changed files:
* Red Knot CLI implementation:
https://github.com/astral-sh/ruff/pull/12624/files#diff-579596339a29d3212a641232e674778c339b446de33b890c7fdad905b5eb50e1
* In
https://github.com/astral-sh/ruff/pull/12624/files#diff-b9a9041a8a2bace014bf3687c3ef0512f25e0541f112fad6131b14242f408db6,
server capabilities have been updated, dynamic capability registration
is removed
* In
https://github.com/astral-sh/ruff/pull/12624/files#diff-b9a9041a8a2bace014bf3687c3ef0512f25e0541f112fad6131b14242f408db6,
the API for `clear_diagnostics` now take in a `Url` instead of
`DocumentQuery` as the document version doesn't matter when clearing
diagnostics after a document is closed
*
[`did_close`](https://github.com/astral-sh/ruff/pull/12624/files#diff-9271370102a6f3be8defaca40c82485b0048731942520b491a3bdd2ee0e25493),
[`did_close_notebook`](https://github.com/astral-sh/ruff/pull/12624/files#diff-96fb53ffb12c1694356e17313e4bb37b3f0931e887878b5d7c896c19ff60283b),
[`did_open`](https://github.com/astral-sh/ruff/pull/12624/files#diff-60e852cf1aa771e993131cabf98eb4c467963a8328f10eccdb43b3e8f0f1fb12),
[`did_open_notebook`](https://github.com/astral-sh/ruff/pull/12624/files#diff-ac356eb5e36c3b2c1c135eda9dfbcab5c12574d1cb77c71f7da8dbcfcfb2d2f1)
are updated to open / close file from the corresponding Red Knot
workspace
* The [diagnostic
handler](https://github.com/astral-sh/ruff/pull/12624/files#diff-4475f318fd0290d0292834569a7df5699debdcc0a453b411b8c3d329f1b879d9)
is updated to request diagnostics from Red Knot
* The [`Session::new`] method in
https://github.com/astral-sh/ruff/pull/12624/files#diff-55c96201296200c1cab37c8b0407b6c733381374b94be7ae50563bfe95264e4d
is updated to construct the Red Knot databases for each workspace. It
also contains the `index_mut` and `MutIndexGuard` implementation
* And, `LSPSystem` implementation is in
https://github.com/astral-sh/ruff/pull/12624/files#diff-4ed62bd359c43b0bf1a13f04349dcd954966934bb8d544de7813f974182b489e

## Test Plan

First, configure VS Code to use the `red_knot` binary

1. Build the `red_knot` binary by `cargo build`
2. Update the VS Code extension to specify the path to this binary
```json
{
	"ruff.path": ["/path/to/ruff/target/debug/red_knot"]
}
```
3. Restart VS Code

Now, open a file containing red-knot specific diagnostics, close the
file and validate that diagnostics disappear.
2024-08-06 11:27:30 +00:00
Micha Reiser
d2c627efb3 Use standard allocator for wasm (#12713) 2024-08-06 11:20:47 +00:00
Micha Reiser
10e977d5f5 [red-knot] Add basic WASM API (#12654) 2024-08-06 09:21:42 +02:00
Auguste Lalande
f0318ff889 [pydoclint] Consider DOC201 satisfied if docstring begins with "Returns" (#12675)
<!--
Thank you for contributing to Ruff! To help us out with reviewing,
please consider the following:

- Does this pull request include a summary of the change? (See below.)
- Does this pull request include a descriptive title?
- Does this pull request include references to any relevant issues?
-->

## Summary

Resolves #12636

Consider docstrings which begin with the word "Returns" as having
satisfactorily documented they're returns. For example
```python
def f():
    """Returns 1."""
    return 1
```
is valid.

## Test Plan

Added example to test fixture.

---------

Co-authored-by: Dhruv Manilawala <dhruvmanila@gmail.com>
2024-08-06 06:46:38 +00:00
Dhruv Manilawala
5cc3fed9a8 [red-knot] Infer float and complex literal expressions (#12689)
## Summary

This PR implements type inference for float and complex literal
expressions.

## Test Plan

Add test cases for both types.
2024-08-06 06:24:28 +00:00
Steve C
39dd732e27 [refurb] - fix unused autofix for implicit-cwd (FURB177) (#12708) 2024-08-06 08:09:35 +02:00
Dylan
52630a1d55 [flake8-comprehensions] Set comprehensions not a violation for sum in unnecessary-comprehension-in-call (C419) (#12691)
## Summary

Removes set comprehension as a violation for `sum` when checking `C419`,
because set comprehension may de-duplicate entries in a generator,
thereby modifying the value of the sum.

Closes #12690.
2024-08-06 02:30:58 +00:00
Steve C
7b5fd63ce8 [flake8-pyi] - add autofix for future-annotations-in-stub (PYI044) (#12676)
## Summary

add autofix for `PYI044`

## Test Plan

`cargo test`
2024-08-05 22:27:55 -04:00
Alex Waygood
5499821c67 [red-knot] Rename workspace_root variables in the module resolver to src_root (#12697)
Fixes #12337
2024-08-05 23:07:18 +01:00
Alex Waygood
7ee7c68f36 Add a new script to generate builtin module names (#12696) 2024-08-05 21:33:36 +01:00
Carl Meyer
2393d19f91 [red-knot] infer instance types for builtins (#12695)
Previously we wrongly inferred the type of the builtin type itself (e.g.
`Literal[int]`); we need to infer the instance type instead.
2024-08-05 13:32:42 -07:00
Dhruv Manilawala
a8e2ba508e [red-knot] Infer boolean literal expression (#12688)
## Summary

This PR implements type inference for boolean literal expressions.

## Test Plan

Add test cases for `True` and `False`.
2024-08-05 11:30:53 -07:00
Alex Waygood
0b4d3ce39b TRY002: fixup docs (#12683) 2024-08-05 08:56:12 +00:00
epenet
0a345dc627 [tryceratops] Add BaseException to raise-vanilla-class rule (TRY002) (#12620) 2024-08-05 09:45:49 +01:00
Micha Reiser
ff2aa3ea00 Revert "Remove criterion/codspeed compat layer (#12524)" (#12680) 2024-08-05 07:49:04 +00:00
Micha Reiser
0d3bad877d Fix module resolver symlink test on macOs (#12682) 2024-08-05 07:22:54 +00:00
Micha Reiser
756060d676 Upgrade Salsa to a version with a 32bit compatible concurrent vec (#12679) 2024-08-05 08:50:32 +02:00
Micha Reiser
b647f3fba8 Disable testing ruff_benchmark by default (#12678) 2024-08-05 06:15:52 +00:00
Dhruv Manilawala
82e69ebf23 Update broken links in the documentation (#12677)
## Summary

Running `mkdocs server -f mkdocs.insiders.yml` gave warnings about these
broken links.

## Test plan

I built the docs locally and verified that the updated links work
properly.
2024-08-05 05:35:23 +00:00
renovate[bot]
2c79045342 Update Rust crate pep440_rs to v0.6.6 (#12666) 2024-08-04 22:42:43 -04:00
Charlie Marsh
3497f5257b Add preview note to unnecessary-comprehension-in-call (#12673) 2024-08-05 02:27:00 +00:00
Dylan
25aabec814 [flake8-comprehensions] Account for list and set comprehensions in unnecessary-literal-within-tuple-call (C409) (#12657)
## Summary

Make it a violation of `C409` to call `tuple` with a list or set
comprehension, and
implement the (unsafe) fix of calling the `tuple` with the underlying
generator instead.

Closes #12648.

## Test Plan

Test fixture updated, cargo test, docs checked for updated description.
2024-08-04 22:14:52 -04:00
renovate[bot]
0e71485ea9 Update Rust crate regex to v1.10.6 (#12667) 2024-08-04 22:10:40 -04:00
renovate[bot]
43a9d282f7 Update Rust crate ordermap to v0.5.1 (#12665) 2024-08-04 22:10:32 -04:00
renovate[bot]
6f357b8b45 Update Rust crate tempfile to v3.11.0 (#12671) 2024-08-05 02:08:20 +00:00
renovate[bot]
73d9f11a9c Update pre-commit dependencies (#12670) 2024-08-05 02:08:07 +00:00
renovate[bot]
d6c6db5a44 Update NPM Development dependencies (#12672) 2024-08-04 22:07:55 -04:00
renovate[bot]
56d985a972 Update Rust crate toml to v0.8.19 (#12669) 2024-08-04 22:07:44 -04:00
renovate[bot]
b3e0655cc9 Update Rust crate serde_json to v1.0.122 (#12668) 2024-08-04 22:07:35 -04:00
renovate[bot]
06baffec9e Update Rust crate clap to v4.5.13 (#12664) 2024-08-04 22:07:26 -04:00
Steve C
67a2ae800a [ruff] - add autofix zip-instead-of-pairwise (RUF007) (#12663)
## Summary

Adds autofix for `RUF007`

## Test Plan

`cargo test`, however I get errors for `test resolver::tests::symlink
... FAILED` which seems to not be my fault
2024-08-04 21:57:50 -04:00
InSync
7a2c75e2fc Replace ruff-lsp links in README.md with links to new documentation page (#12618)
Since `ruff-lsp` has been (semi-)deprecated for sometime, it wouldn't
make sense to mention it in the most prominent sections of the `README`.
Instead, they should point to the new <i>[Editor
Integrations](https://docs.astral.sh/ruff/editors/)</i> documentation
page.
2024-08-04 15:31:36 +05:30
DavideRagazzon
9ee44637ca Fix typo in configuration docs (#12655) 2024-08-04 09:43:51 +02:00
Charlie Marsh
733341ab39 Ignore DOC errors for stub functions (#12651)
## Summary

Closes https://github.com/astral-sh/ruff/issues/12650.
2024-08-03 08:13:21 -04:00
Micha Reiser
341a25eec1 Fix file watching on macOS if a module-search path is a symlink (#12634) 2024-08-03 07:24:07 +00:00
Charlie Marsh
38e178e914 Try both 'Raises' section styles when convention is unspecified (#12649)
## Summary

Closes https://github.com/astral-sh/ruff/issues/12647.
2024-08-02 21:04:46 -04:00
Alex Waygood
daccb3f4f3 [pydoclint] Deduplicate collected exceptions after traversing function bodies (#12642) 2024-08-02 23:17:06 +01:00
Charlie Marsh
c858afe03a [flake8-bugbear] Treat return as equivalent to break (B909) (#12646)
Closes https://github.com/astral-sh/ruff/issues/12640.
2024-08-02 18:14:17 -04:00
Alex Waygood
3c1c3199d0 [pydoclint] Teach rules to understand reraised exceptions as being explicitly raised (#12639)
## Summary

Fixes #12630.

DOC501 and DOC502 now understand functions with constructs like this to
be explicitly raising `TypeError` (which should be documented in a
function's docstring):

```py
try:
    foo():
except TypeError:
    ...
    raise
```

I made an exception for `Exception` and `BaseException`, however.
Constructs like this are reasonably common, and I don't think anybody
would say that it's worth putting in the docstring that it raises "some
kind of generic exception":

```py
try:
    foo()
except BaseException:
    do_some_logging()
    raise
```

## Test Plan

`cargo test -p ruff_linter --lib`
2024-08-02 22:47:22 +01:00
Ran Benita
fbfe2cb2f5 [flake8-async] Fix false positives with multiple async with items (ASYNC100) (#12643)
## Summary

Please see
https://github.com/astral-sh/ruff/pull/12605#discussion_r1699957443 for
a description of the issue.

They way I fixed it is to get the *last* timeout item in the `with`, and
if it's an `async with` and there are items after it, then don't trigger
the lint.

## Test Plan

Updated the fixture with some more cases.
2024-08-02 21:25:13 +00:00
Carl Meyer
1c311e4fdb [red-knot] update benchmark to run on tomllib (#12635)
Changes the red-knot benchmark to run on the stdlib "tomllib" library
(which is self-contained, four files, uses type annotations) instead of
on very small bits of handwritten code.

Also remove the `without_parse` benchmark: now that we are running on
real code that uses typeshed, we'd either have to pre-parse all of
typeshed (slow) or find some way to determine which typeshed modules
will be used by the benchmark (not feasible with reasonable complexity.)

## Test Plan

`cargo bench -p ruff_benchmark --bench red_knot`
2024-08-02 11:23:52 -07:00
Micha Reiser
12177a42e3 Set durabilities for low-durability fields on high-durability inputs (#12627) 2024-08-02 19:42:34 +02:00
Micha Reiser
dfb08856eb Fix file watcher stop data race (#12626) 2024-08-02 19:02:49 +02:00
Auguste Lalande
94d817e1a5 [pydoclint] Add docstring-missing-yields amd docstring-extraneous-yields (DOC402, DOC403) (#12538) 2024-08-02 17:55:42 +01:00
ember91
9296bd4e3f Fix a typo (#12633)
Co-authored-by: Emil Berg <emil.berg@ericsson.com>
2024-08-02 16:39:27 +01:00
Micha Reiser
da824ba316 Release Ruff 0.5.6 (#12629)
Co-authored-by: Alex Waygood <Alex.Waygood@Gmail.com>
2024-08-02 17:35:14 +02:00
Micha Reiser
012198a1b0 Enable notebooks by default in preview mode (#12621) 2024-08-02 13:36:53 +00:00
Alex Waygood
fbab04fbe1 [red-knot] Allow multiple site-packages search paths (#12609) 2024-08-02 13:33:19 +00:00
Dhruv Manilawala
9aa43d5f91 Separate red_knot into CLI and red_knot_workspace crates (#12623)
## Summary

This PR separates the current `red_knot` crate into two crates:
1. `red_knot` - This will be similar to the `ruff` crate, it'll act as
the CLI crate
2. `red_knot_workspace` - This includes everything except for the CLI
functionality from the existing `red_knot` crate

Note that the code related to the file watcher is in
`red_knot_workspace` for now but might be required to extract it out in
the future.

The main motivation for this change is so that we can have a `red_knot
server` command. This makes it easier to test the server out without
making any changes in the VS Code extension. All we need is to specify
the `red_knot` executable path in `ruff.path` extension setting.

## Test Plan

- `cargo build`
- `cargo clippy --workspace --all-targets --all-features`
- `cargo shear --fix`
2024-08-02 11:24:36 +00:00
Micha Reiser
966563c79b Add tests for hard and soft links (#12590) 2024-08-02 10:14:28 +00:00
Micha Reiser
27edadec29 Make server panic hook more error resilient (#12610) 2024-08-02 12:10:06 +02:00
InSync
2e2b1b460f Fix a typo in docs/editors/settings.md (#12614)
Diff:

```diff
-- `false: Same as`off\`
+- `false`: Same as `off`
```
2024-08-01 11:23:55 -05:00
Charlie Marsh
a3e67abf4c Add newlines before comments in E305 (#12606)
## Summary

There's still a problem here. Given:

```python
class Class():
    pass

    # comment

    # another comment
a = 1
```

We only add one newline before `a = 1` on the first pass, because
`max_precedling_blank_lines` is 1... We then add the second newline on
the second pass, so it ends up in the right state, but the logic is
clearly wonky.

Closes https://github.com/astral-sh/ruff/issues/11508.
2024-07-31 23:11:00 -04:00
Carl Meyer
ee0518e8f7 [red-knot] implement attribute of union (#12601)
I hit this `todo!` trying to run type inference over some real modules.
Since it's a one-liner to implement it, I just did that rather than
changing to `Type::Unknown`.
2024-07-31 19:45:24 -07:00
Charlie Marsh
d774a3bd48 Avoid unused async when context manager includes TaskGroup (#12605)
## Summary

Closes https://github.com/astral-sh/ruff/issues/12354.
2024-08-01 02:12:43 +00:00
Charlie Marsh
7e6b19048e Don't attach comments with mismatched indents (#12604)
## Summary

Given:

```python
def test_update():
    pass
    # comment
def test_clientmodel():
    pass
```

We don't want `# comment` to be attached to `def test_clientmodel()`.

Closes https://github.com/astral-sh/ruff/issues/12589.
2024-07-31 22:09:05 -04:00
Charlie Marsh
8e383b9587 Respect start index in unnecessary-list-index-lookup (#12603)
## Summary

Closes https://github.com/astral-sh/ruff/issues/12594.
2024-08-01 01:21:15 +00:00
github-actions[bot]
3f49ab126f Sync vendored typeshed stubs (#12602) 2024-08-01 01:44:56 +01:00
Chris Krycho
c1bc7f4dee Remove ecosystem_ci flag from Ruff CLI (#12596)
## Summary

@zanieb noticed while we were discussing #12595 that this flag is now
unnecessary, so remove it and the flags which reference it.

## Test Plan

Question for maintainers: is there a test to add *or* remove here? (I’ve
opened this as a draft PR with that in view!)
2024-07-31 11:40:03 -05:00
Bowen Liang
a44d579f21 Add Dify to Ruff users (#12593)
## Summary

<!-- What's the purpose of the change? What does it do, and why? -->
- Add the popular LLM Ops project Dify to the user list in Readme, as
Dify introduced Ruff for lining since Feb 2024 in
https://github.com/langgenius/dify/pull/2366
2024-07-31 08:56:52 -04:00
Alex Waygood
a3900d2b0b [pyflakes] Fix preview-mode bugs in F401 when attempting to autofix unused first-party submodule imports in an __init__.py file (#12569) 2024-07-31 13:34:30 +01:00
Alex Waygood
83b1c48a93 Make setting and retrieving pydocstyle settings less tedious (#12582) 2024-07-31 10:39:33 +01:00
Micha Reiser
138e70bd5c Upgrade to Rust 1.80 (#12586) 2024-07-30 19:18:08 +00:00
Eero Vaher
ee103ffb25 Fix an argument name in B905 description (#12588)
The description of `zip-without-explicit-strict` erroneously mentions a
non-existing `check` argument for `zip()`.
2024-07-30 14:40:56 -04:00
Micha Reiser
18f87b9497 Flaky file watching tests, add debug assertions (#12587) 2024-07-30 18:09:55 +00:00
Micha Reiser
adc8d4e1e7 File watch events: Add dynamic wait period before writing new changes (#12585) 2024-07-30 19:18:43 +02:00
Alex Waygood
90db361199 Consider more stdlib decorators to be property-like (#12583) 2024-07-30 17:18:23 +00:00
Alex Waygood
4738135801 Improve consistency between linter rules in determining whether a function is property (#12581) 2024-07-30 17:42:04 +01:00
Micha Reiser
264cd750e9 Add delay between updating a file (#12576) 2024-07-30 18:31:29 +02:00
Alex Waygood
7a4419a2a5 Improve handling of metaclasses in various linter rules (#12579) 2024-07-30 14:48:36 +01:00
Alex Waygood
ac1666d6e2 Remove several incorrect uses of map_callable() (#12580) 2024-07-30 14:30:25 +01:00
epenet
459c85ba27 [flake8-return] Exempt cached properties and other property-like decorators from explicit return rule (RET501) (#12563)
Co-authored-by: Alex Waygood <alex.waygood@gmail.com>
2024-07-30 11:06:28 +00:00
Alex Waygood
aaa56eb0bd Fix NFKC normalization bug when removing unused imports (#12571) 2024-07-30 09:54:35 +00:00
Dhruv Manilawala
f3c14a4276 Keep track of deleted cell for reorder change request (#12575)
## Summary

This PR fixes a bug where the server wouldn't retain the cell content in
case of a reorder change request.

As mentioned in
https://github.com/astral-sh/ruff/issues/12573#issuecomment-2257819298,
this change request is modeled as (a) remove these cell URIs and (b) add
these cell URIs. The cell content isn't provided. But, the way we've
modeled the `NotebookCell` (it contains the underlying `TextDocument`),
we need to keep track of the deleted cells to get the content.

This is not an ideal solution and a better long term solution would be
to model it as per the spec but that is a big structural change and will
affect multiple parts of the server. Modeling as per the spec would also
avoid bugs like https://github.com/astral-sh/ruff/pull/11864. For
context, that model would add complexity per
https://github.com/astral-sh/ruff/pull/11206#discussion_r1600165481.

fixes: #12573

## Test Plan

This video shows the before and after the bug is fixed:


https://github.com/user-attachments/assets/2fcad4b5-f9af-4776-8640-4cd1fa16e325
2024-07-30 09:51:26 +00:00
Alex Waygood
3169d408fa [red-knot] Fix typos in the module resolver (#12574) 2024-07-30 09:38:38 +00:00
Micha Reiser
a2286c8e47 Set Durability to 'HIGH' for most inputs and third-party libraries (#12566) 2024-07-30 09:03:59 +00:00
Piotr Osiewicz
fb9f566f56 Use $/logTrace for server trace logs in Zed and VS Code (#12564)
## Summary

This pull request adds support for logging via `$/logTrace` RPC
messages. It also enables that code path for when a client is Zed editor
or VS Code (as there's no way for us to generically tell whether a client prefers
`$/logTrace` over stderr.

Related to: #12523

## Test Plan

I've built Ruff from this branch and tested it manually with Zed.

---------

Co-authored-by: Dhruv Manilawala <dhruvmanila@gmail.com>
2024-07-30 08:32:20 +05:30
Micha Reiser
381bd1ff4a Delete left over debug statement (#12567) 2024-07-29 16:16:12 +02:00
Micha Reiser
2f54d05d97 Remove salsa::report_untracked_read when finding the dynamic module resolution paths (#12509) 2024-07-29 09:31:29 +00:00
Micha Reiser
e18b4e42d3 [red-knot] Upgrade to the *new* *new* salsa (#12406) 2024-07-29 07:21:24 +00:00
Dhruv Manilawala
9495331a5f Recommend client config for trace setting in Neovim (#12562) 2024-07-29 06:14:34 +00:00
renovate[bot]
e1076db7d0 Update CodSpeedHQ/action action to v3 (#12559) 2024-07-29 07:37:02 +02:00
renovate[bot]
1986c9e8e2 Update NPM Development dependencies (#12556) 2024-07-28 22:17:44 -04:00
renovate[bot]
d7e80dc955 Update pre-commit dependencies (#12555) 2024-07-28 22:17:34 -04:00
renovate[bot]
87d09f77cd Update Rust crate imperative to v1.0.6 (#12552) 2024-07-28 22:17:28 -04:00
renovate[bot]
bd37ef13b8 Update Rust crate bstr to v1.10.0 (#12557) 2024-07-28 22:17:11 -04:00
renovate[bot]
ec23c974db Update Rust crate toml to v0.8.16 (#12554) 2024-07-28 22:17:01 -04:00
renovate[bot]
122e5ab428 Update Rust crate serde_json to v1.0.121 (#12553) 2024-07-28 22:16:55 -04:00
renovate[bot]
2f2149aca8 Update Rust crate env_logger to v0.11.5 (#12550) 2024-07-28 22:16:49 -04:00
renovate[bot]
9d5c31e7da Update Rust crate imara-diff to v0.1.7 (#12551) 2024-07-28 22:16:42 -04:00
renovate[bot]
25f3ad6238 Update Rust crate clap to v4.5.11 (#12549) 2024-07-28 22:16:36 -04:00
renovate[bot]
79926329a4 Update Rust crate argfile to v0.2.1 (#12548) 2024-07-28 22:16:31 -04:00
Aleksei Latyshev
9cdc578dd9 [flake8-builtins] Implement import, lambda, and module shadowing (#12546)
## Summary

<!-- What's the purpose of the change? What does it do, and why? -->
Extend `flake8-builtins` to imports, lambda-arguments, and modules to be
consistent with original checker
[flake8_builtins](https://github.com/gforcada/flake8-builtins/blob/main/flake8_builtins.py).

closes #12540 

## Details

- Implement builtin-import-shadowing (A004)
- Stop tracking imports shadowing in builtin-variable-shadowing (A001)
in preview mode.
- Implement builtin-lambda-argument-shadowing (A005)
- Implement builtin-module-shadowing (A006)
  - Add new option `linter.flake8_builtins.builtins_allowed_modules`

## Test Plan

cargo test
2024-07-29 01:42:42 +00:00
Charlie Marsh
665c75f7ab Add document for executable determination (#12547)
Closes https://github.com/astral-sh/ruff/issues/12505.
2024-07-28 16:23:00 -04:00
Micha Reiser
f37b39d6cc Allow downloading ecosystem results from forks (#12544) 2024-07-27 19:57:19 +02:00
Charlie Marsh
e18c45c310 Avoid marking required imports as unused (#12537)
## Summary

If an import is marked as "required", we should never flag it as unused.
In practice, this is rare, since required imports are typically used for
`__future__` annotations, which are always considered "used".

Closes https://github.com/astral-sh/ruff/issues/12458.
2024-07-26 14:23:43 -04:00
Charlie Marsh
d930052de8 Move required import parsing out of lint rule (#12536)
## Summary

Instead, make it part of the serialization and deserialization itself.
This makes it _much_ easier to reuse when solving
https://github.com/astral-sh/ruff/issues/12458.
2024-07-26 13:35:45 -04:00
Sigurd Spieckermann
7ad4df9e9f Complete FBT002 example with Enum argument (#12525)
## Summary

I've completed `FBT002` rule example with an `Enum` argument to show the
full usage in this case.
2024-07-26 11:50:19 -04:00
Charlie Marsh
425761e960 Use colon rather than dot formatting for integer-only types (#12534)
## Summary

Closes https://github.com/astral-sh/ruff/issues/12421.
2024-07-26 15:48:19 +00:00
Carl Meyer
4b69271809 [red-knot] resolve int/list/dict/set/tuple to builtin type (#12521)
Now that we have builtins available, resolve some simple cases to the
right builtin type.

We should also adjust the display for types to include their module
name; that's not done yet here.
2024-07-26 08:21:31 -07:00
Micha Reiser
bf23d38a21 Remove unnecessary clone in workspace API (#12529) 2024-07-26 17:19:05 +02:00
Charlie Marsh
49f51583fa Always allow explicit multi-line concatenations when implicit are banned (#12532)
## Summary

Closes https://github.com/astral-sh/ruff/issues/11582.
2024-07-26 10:36:35 -04:00
Charlie Marsh
1fe4a5faed Avoid recommending __slots__ for classes that inherit from more than namedtuple (#12531)
## Summary

Closes https://github.com/astral-sh/ruff/issues/11887.
2024-07-26 14:24:40 +00:00
Charlie Marsh
998bfe0847 Avoid recommending no-argument super in slots=True dataclasses (#12530)
## Summary

Closes https://github.com/astral-sh/ruff/issues/12506.
2024-07-26 10:09:51 -04:00
Dhruv Manilawala
6f4db8675b [red-knot] Add support for untitled files (#12492)
## Summary

This PR adds support for untitled files in the Red Knot project.

Refer to the [design
discussion](https://github.com/astral-sh/ruff/discussions/12336) for
more details.

### Changes
* The `parsed_module` always assumes that the `SystemVirtual` path is of
`PySourceType::Python`.
* For the module resolver, as suggested, I went ahead by adding a new
`SystemOrVendoredPath` enum and renamed `FilePathRef` to
`SystemOrVendoredPathRef` (happy to consider better names here).
* The `file_to_module` query would return if it's a
`FilePath::SystemVirtual` variant because a virtual file doesn't belong
to any module.
* The sync implementation for the system virtual path is basically the
same as that of system path except that it uses the
`virtual_path_metadata`. The reason for this is that the system
(language server) would provide the metadata on whether it still exists
or not and if it exists, the corresponding metadata.

For point (1), VS Code would use `Untitled-1` for Python files and
`Untitled-1.ipynb` for Jupyter Notebooks. We could use this distinction
to determine whether the source type is `Python` or `Ipynb`.

## Test Plan

Added test cases in #12526
2024-07-26 18:13:31 +05:30
Micha Reiser
71f7aa4971 Remove criterion/codspeed compat layer (#12524) 2024-07-26 12:22:16 +02:00
Auguste Lalande
9f72f474e6 [pydoclint] Add docstring-missing-returns amd docstring-extraneous-returns (DOC201, DOC202) (#12485)
Co-authored-by: Micha Reiser <micha@reiser.io>
2024-07-26 06:36:00 +00:00
Carl Meyer
10c993e21a [red-knot] remove wrong __init__.py from file-watching tests (#12519) 2024-07-26 07:14:01 +01:00
Carl Meyer
2d3914296d [red-knot] handle all syntax without panic (#12499)
Extend red-knot type inference to cover all syntax, so that inferring
types for a scope gives all expressions a type. This means we can run
the red-knot semantic lint on all Python code without panics. It also
means we can infer types for `builtins.pyi` without panics.

To keep things simple, this PR intentionally doesn't add any new type
inference capabilities: the expanded coverage is all achieved with
`Type::Unknown`. But this puts the skeleton in place for adding better
inference of all these language features.

I also had to add basic Salsa cycle recovery (with just `Type::Unknown`
for now), because some `builtins.pyi` definitions are cyclic.

To test this, I added a comprehensive corpus of test snippets sourced
from Cinder under [MIT
license](https://github.com/facebookincubator/cinder/blob/cinder/3.10/cinderx/LICENSE),
which matches Ruff's license. I also added to this corpus some
additional snippets for newer language features: all the
`27_func_generic_*` and `73_class_generic_*` files, as well as
`20_lambda_default_arg.py`, and added a test which runs semantic-lint
over all these files. (The test doesn't assert the test-corpus files are
lint-free; just that they are able to lint without a panic.)
2024-07-25 17:38:08 -07:00
Charlie Marsh
7571da8778 Preserve trailing inline comments on import-from statements (#12498)
## Summary

Right now, in the isort comment model, there's nowhere for trailing
comments on the _statement_ to go, as in:

```python
from mylib import (
    MyClient,
    MyMgmtClient,
)  # some comment
```

If the comment is on the _alias_, we do preserve it, because we attach
it to the alias, as in:

```python
from mylib import (
    MyClient,
    MyMgmtClient,  # some comment
)
```

Similarly, if the comment is trailing on an import statement
(non-`from`), we again attach it to the alias, because it can't be
parenthesized, as in:

```python
import foo  # some comment
```

This PR adds logic to track and preserve those trailing comments.

We also no longer drop several other comments, like:

```python
from mylib import (
    # some comment
    MyClient
)
```

Closes https://github.com/astral-sh/ruff/issues/12487.
2024-07-25 17:46:58 -04:00
Alex Waygood
2ceac5f868 [red-knot] Rename some methods in the module resolver (#12517) 2024-07-25 19:28:48 +00:00
Alex Waygood
5ce80827d2 [red-knot] Refactor path.rs in the module resolver (#12494) 2024-07-25 19:29:28 +01:00
Dhruv Manilawala
e047b9685a Use docs bot email for docs publish (#12511)
Ref: https://github.com/astral-sh/uv/pull/5369
2024-07-25 21:50:00 +05:30
Dhruv Manilawala
fc16d8d04d Bump version to 0.5.5 (#12510) 2024-07-25 20:17:01 +05:30
Uriya Harpeness
175e5d7b88 Add missing traceback line in f-string-in-exception docstring. (#12508)
## Summary

Add missing traceback line in `f-string-in-exception` docstring.

Solves https://github.com/astral-sh/ruff/issues/12504.
2024-07-25 10:22:05 -04:00
Dhruv Manilawala
c03f257ed7 Add note about the breaking change in nvim-lspconfig (#12507)
Refer https://github.com/astral-sh/ruff/issues/12408
2024-07-25 14:01:16 +00:00
Dhruv Manilawala
6bbb4a28c2 Add setup docs for Zed editor (#12501)
## Summary

This PR adds the setup documentation for using Ruff with the Zed editor.

Closes: #12388
2024-07-25 13:09:17 +00:00
Micha Reiser
2ce3e3ae60 Fix the search path tests on MacOS (#12503) 2024-07-25 08:21:38 +02:00
Charlie Marsh
2a64cccb61 Avoid applying ignore-names to self and cls function names (#12497)
## Summary

Closes https://github.com/astral-sh/ruff/issues/12465.
2024-07-24 18:08:23 -04:00
Alex Waygood
928ffd6650 Ignore NPY201 inside except blocks for compatibility with older numpy versions (#12490) 2024-07-24 20:03:23 +00:00
Alex Waygood
e52be0951a [red-knot] Improve validation for search paths (#12376) 2024-07-24 15:02:25 +01:00
Dylan
889073578e [flake8-bugbear] Allow singleton tuples with starred expressions in B013 (#12484) 2024-07-24 15:19:30 +02:00
Micha Reiser
eac965ecaf [red-knot] Watch search paths (#12407) 2024-07-24 07:38:50 +00:00
Auguste Lalande
8659f2f4ea [pydoclint] Fix documentation for DOC501 (#12483)
## Summary

The doc was written backwards. mb.
2024-07-24 00:08:53 -04:00
Alex Waygood
c1b292a0dc Refactor NPY201 (#12479) 2024-07-23 18:24:20 +01:00
Micha Reiser
3af6ccb720 Fix Ord of cmp_fix (#12471) 2024-07-23 15:14:22 +02:00
Micha Reiser
f0fc6a95fe [red-knot] Lazy package file discovery (#12452)
Co-authored-by: Carl Meyer <carl@astral.sh>
2024-07-23 08:47:15 +00:00
Mateusz Sokół
f96a3c71ff Fix NumPy 2.0 rule for np.alltrue and np.sometrue (#12473)
Co-authored-by: Micha Reiser <micha@reiser.io>
2024-07-23 08:34:43 +00:00
Micha Reiser
b9b7deff17 Implement upcast_mut for new TestDb (#12470) 2024-07-23 07:11:00 +00:00
Micha Reiser
40d9324f5a [red-knot] Improved file watching (#12382) 2024-07-23 08:18:59 +02:00
Pathompong Kwangtong
a9f8bd59b2 Add Eglot setup guide for Emacs editor (#12426)
## Summary

The purpose of this change is to explain how to use ruff as a language
server in Eglot with automatic formatting because I've struggle to use
it with Eglot. I've search it online and found that there are some
people also struggle too. (See [this reddit
post](https://www.reddit.com/r/emacs/comments/118mo6w/eglot_automatic_formatting/)
and
https://github.com/astral-sh/ruff-lsp/issues/19#issuecomment-1435138828)


## Test Plan

I've use this setting myself. And I will continue maintain this part as
long as I use it.

---------

Co-authored-by: Dhruv Manilawala <dhruvmanila@gmail.com>
2024-07-23 10:50:51 +05:30
Piotr Osiewicz
143e172431 Do not bail code action resolution when a quick fix is requested (#12462)
## Summary

When working on improving Ruff integration with Zed I noticed that it
errors out when we try to resolve a code action of a `QUICKFIX` kind;
apparently, per @dhruvmanila we shouldn't need to resolve it, as the
edit is provided in the initial response for the code action. However,
it's possible for the `resolve` call to fill out other fields (such as
`command`).
AFAICT Helix also tries to resolve the code actions unconditionally (as
in, when either `edit` or `command` is absent); so does VSC. They can
still apply the quickfixes though, as they do not error out on a failed
call to resolve code actions - Zed does. Following suit on Zed's side
does not cut it though, as we still get a log request from Ruff for that
failure (which is surfaced in the UI).
There are also other language servers (such as
[rust-analyzer](c1c9e10f72/crates/rust-analyzer/src/handlers/request.rs (L1257)))
that fill out both `command` and `edit` fields as a part of code action
resolution.

This PR makes the resolve calls for quickfix actions return the input
value.

## Test Plan

N/A
2024-07-23 10:30:03 +05:30
Auguste Lalande
b2d3a05ee4 [flake8-async] Fix references in documentation not displaying (#12467)
## Summary

Fix references in documentation of several `ASYNC` rules not displaying

## Test Plan

Validated documentation now displays correctly
2024-07-22 19:38:13 -04:00
Josh Cannon
ef1ca0dd38 Fix bad markdown in CONTRIBUTING.md (#12466)
See
https://github.com/astral-sh/ruff/blob/main/CONTRIBUTING.md#import-categorization
2024-07-23 00:03:30 +01:00
Carl Meyer
c7b13bb8fc [red-knot] add cycle-free while-loop control flow (#12413)
Add support for while-loop control flow.

This doesn't yet include general support for terminals and reachability;
that is wider than just while loops and belongs in its own PR.

This also doesn't yet add support for cyclic definitions in loops; that
comes with enough of its own complexity in Salsa that I want to handle
it separately.
2024-07-22 14:27:33 -07:00
Carl Meyer
dbbe3526ef [red-knot] add while-loop to benchmark (#12464)
So we can get some signal from the benchmark result on
https://github.com/astral-sh/ruff/pull/12413
2024-07-22 14:16:56 -07:00
Carl Meyer
f22c8ab811 [red-knot] add maybe-undefined lint rule (#12414)
Add a lint rule to detect if a name is definitely or possibly undefined
at a given usage.

If I create the file `undef/main.py` with contents:

```python
x = int
def foo():
    z
    return x
if flag:
    y = x
y
```

And then run `cargo run --bin red_knot -- --current-directory
../ruff-examples/undef`, I get the output:

```
Name 'z' used when not defined.
Name 'flag' used when not defined.
Name 'y' used when possibly not defined.
```

If I modify the file to add `y = 0` at the top, red-knot re-checks it
and I get the new output:

```
Name 'z' used when not defined.
Name 'flag' used when not defined.
```

Note that `int` is not flagged, since it's a builtin, and `return x` in
the function scope is not flagged, since it refers to the global `x`.
2024-07-22 13:53:59 -07:00
Alex Waygood
2a8f95c437 [red-knot] Use a distinct type for module search paths in the module resolver (#12379) 2024-07-22 19:44:27 +00:00
Dhruv Manilawala
ea2d51c2bb Add note to include notebook files for native server (#12449)
## Summary

Similar to https://github.com/astral-sh/ruff-vscode/pull/547 but for the
online docs.

Refer to https://github.com/astral-sh/ruff-vscode/issues/546

## Preview

<img width="1728" alt="Screenshot 2024-07-22 at 14 51 40"
src="https://github.com/user-attachments/assets/39014278-c868-45b0-9058-42858a060fd8">
2024-07-22 21:40:30 +05:30
Micha Reiser
ed238e0c76 Fix incorrect placement of leading function comment with type params (#12447) 2024-07-22 14:17:00 +02:00
Micha Reiser
3ace12943e Ignore more open ai notebooks for now (#12448) 2024-07-22 14:16:48 +02:00
Dhruv Manilawala
978909fcf4 Raise syntax error for unparenthesized generator expr in multi-argument call (#12445)
## Summary

This PR fixes a bug to raise a syntax error when an unparenthesized
generator expression is used as an argument to a call when there are
more than one argument.

For reference, the grammar is:
```
primary:
    | ...
    | primary genexp 
    | primary '(' [arguments] ')' 
    | ...

genexp:
    | '(' ( assignment_expression | expression !':=') for_if_clauses ')' 
```

The `genexp` requires the parenthesis as mentioned in the grammar. So,
the grammar for a call expression is either a name followed by a
generator expression or a name followed by a list of argument. In the
former case, the parenthesis are excluded because the generator
expression provides them while in the later case, the parenthesis are
explicitly provided for a list of arguments which means that the
generator expression requires it's own parenthesis.

This was discovered in https://github.com/astral-sh/ruff/issues/12420.

## Test Plan

Add test cases for valid and invalid syntax.

Make sure that the parser from CPython also raises this at the parsing
step:
```console
$ python3.13 -m ast parser/_.py
  File "parser/_.py", line 1
    total(1, 2, x for x in range(5), 6)
                ^^^^^^^^^^^^^^^^^^^
SyntaxError: Generator expression must be parenthesized

$ python3.13 -m ast parser/_.py
  File "parser/_.py", line 1
    sum(x for x in range(10), 10)
        ^^^^^^^^^^^^^^^^^^^^
SyntaxError: Generator expression must be parenthesized
```
2024-07-22 14:44:20 +05:30
Dhruv Manilawala
f8735e1ee8 Remove unused dependencies, sync existing versions (#12446)
## Summary

This PR removes unused dependencies from `fuzz` crate and syncs the
`similar` crate to the workspace version. This will help in resolve
https://github.com/astral-sh/ruff/pull/12442.

## Test Plan

Build the fuzz crate:

For Mac (it requires the nightly build):
```
cargo +nightly fuzz build
```
2024-07-22 10:49:05 +05:30
renovate[bot]
d70ceb6a56 Update Rust crate uuid to v1.10.0 (#12444) 2024-07-21 21:50:53 -04:00
renovate[bot]
fc7d9e95b8 Update Rust crate tracing-tree to 0.4.0 (#12443) 2024-07-21 21:50:46 -04:00
renovate[bot]
b578fca9cb Update NPM Development dependencies (#12441) 2024-07-21 21:50:32 -04:00
renovate[bot]
8d3146c2b2 Update pre-commit hook astral-sh/ruff-pre-commit to v0.5.4 (#12440) 2024-07-21 21:50:27 -04:00
renovate[bot]
fa5c841154 Update Rust crate thiserror to v1.0.63 (#12437) 2024-07-21 21:49:42 -04:00
renovate[bot]
f8fcbc19d9 Update dependency react-resizable-panels to v2.0.22 (#12439) 2024-07-21 21:49:33 -04:00
renovate[bot]
97fdd48208 Update Rust crate toml to v0.8.15 (#12438) 2024-07-21 21:49:23 -04:00
renovate[bot]
731ed2e40b Update Rust crate syn to v2.0.72 (#12436) 2024-07-21 21:49:16 -04:00
Auguste Lalande
3a742c17f8 [pydoclint] Fix DOC501 panic #12428 (#12435)
## Summary

Fix panic reported in #12428. Where a string would sometimes get split
within a character boundary. This bypasses the need to split the string.

This does not guarantee the correct formatting of the docstring, but
neither did the previous implementation.

Resolves #12428 

## Test Plan

Test case added to fixture
2024-07-21 19:30:06 +00:00
TomerBin
053243635c [fastapi] Implement FAST001 (fastapi-redundant-response-model) and FAST002 (fastapi-non-annotated-dependency) (#11579)
## Summary

Implements ruff specific role for fastapi routes, and its autofix.

## Test Plan

`cargo test` / `cargo insta review`
2024-07-21 18:28:10 +00:00
Ivan Carvalho
82355712c3 Add IBM to Who is Using ruff (#12433)
<!--
Thank you for contributing to Ruff! To help us out with reviewing,
please consider the following:

- Does this pull request include a summary of the change? (See below.)
- Does this pull request include a descriptive title?
- Does this pull request include references to any relevant issues?
-->

## Summary

Just updating the README to reflect that IBM has been using ruff for a
year already: https://github.com/Qiskit/qiskit/pull/10116.
2024-07-21 11:17:24 -05:00
Auguste Lalande
4bc73dd87e [pydoclint] Implement docstring-missing-exception and docstring-extraneous-exception (DOC501, DOC502) (#11471)
## Summary

These are the first rules implemented as part of #458, but I plan to
implement more.

Specifically, this implements `docstring-missing-exception` which checks
for raised exceptions not documented in the docstring, and
`docstring-extraneous-exception` which checks for exceptions in the
docstring not present in the body.

## Test Plan

Test fixtures added for both google and numpy style.
2024-07-20 19:41:51 +00:00
T-256
53b84ab054 Cleanup redundant spaces from changelog (#12424) 2024-07-20 17:46:15 +00:00
Charlie Marsh
3664f85f45 Bump version to v0.5.4 (#12423) 2024-07-20 17:28:13 +00:00
Charlie Marsh
2c1926beeb Insert parentheses for multi-argument generators (#12422)
## Summary

Closes https://github.com/astral-sh/ruff/issues/12420.
2024-07-20 16:41:55 +00:00
Charlie Marsh
4bcc96ae51 Avoid shadowing diagnostics for @override methods (#12415)
Closes https://github.com/astral-sh/ruff/issues/12412.
2024-07-19 21:32:33 -04:00
FishAlchemist
c0a2b49bac Fix the Github link error for Neovim in the setup for editors in the docs. (#12410)
## Summary

Fix Github link error for Neovim setup editors .

## Test Plan
Click Neovim Github link with mkdocs on local.
2024-07-19 16:24:12 -04:00
Sashko
ca22248628 Update docs Settings output-format default (#12409)
## Update docs Settings output-format default

Fixes https://github.com/astral-sh/ruff/issues/12350

## Test Plan

Run all automation mentioned here
fe04f2b09d/CONTRIBUTING.md (development)

Manually verified changes in the generated MkDocs site.

Co-authored-by: Oleksandr Zavertniev <oleksandr.zavertniev@yellowbrick.com>
2024-07-19 17:51:46 +00:00
Alex Waygood
d8cf8ac2ef [red-knot] Resolve symbols from builtins.pyi in the stdlib if they cannot be found in other scopes (#12390)
Co-authored-by: Carl Meyer <carl@astral.sh>
2024-07-19 17:44:56 +01:00
Carl Meyer
1c7b84059e [red-knot] fix incremental benchmark (#12400)
We should write `BAR_CODE` to `bar.py`, not to `foo.py`.
2024-07-19 08:32:37 -07:00
Carl Meyer
f82bb67555 [red-knot] trace file when inferring types (#12401)
When poring over traces, the ones that just include a definition or
symbol or expression ID aren't very useful, because you don't know which
file it comes from. This adds that information to the trace.

I guess the downside here is that if calling `.file(db)` on a
scope/definition/expression would execute other traced code, it would be
marked as outside the span? I don't think that's a concern, because I
don't think a simple field access on a tracked struct should ever
execute our code. If I'm wrong and this is a problem, it seems like the
tracing crate has this feature where you can record a field as
`tracing::field::Empty` and then fill in its value later with
`span.record(...)`, but when I tried this it wasn't working for me, not
sure why.

I think there's a lot more we can do to make our tracing output more
useful for debugging (e.g. record an event whenever a
definition/symbol/expression/use id is created with the details of that
definition/symbol/expression/use), this is just dipping my toes in the
water.
2024-07-19 07:13:51 -07:00
Alex Waygood
5f96f69151 [red-knot] Fix bug where module resolution would not be invalidated if an entire package was deleted (#12378) 2024-07-19 13:53:09 +01:00
Micha Reiser
ad19b3fd0e [red-knot] Add verbosity argument to CLI (#12404) 2024-07-19 11:38:24 +00:00
Carl Meyer
a62e2d2000 [red-knot] preparse builtins in without_parse benchmark (#12395) 2024-07-19 05:58:27 +00:00
Dylan
d61747093c [ruff] Rename RUF007 to zip-instead-of-pairwise (#12399)
## Summary

<!-- What's the purpose of the change? What does it do, and why? -->

Renames the rule
[RUF007](https://docs.astral.sh/ruff/rules/pairwise-over-zipped/) from
`pairwise-over-zipped` to `zip-instead-of-pairwise`. This closes #12397.

Specifically, in this PR:

- The file containing the rule was renamed
- The struct was renamed
- The function implementing the rule was renamed

## Testing

<!-- How was it tested? -->

- `cargo test`
- Docs re-built locally and verified that new rule name is displayed.
(Screenshots below).

<img width="939" alt="New rule name in rule summary"
src="https://github.com/user-attachments/assets/bf638bc9-1b7a-4675-99bf-e4de88fec167">

<img width="805" alt="New rule name in rule details"
src="https://github.com/user-attachments/assets/6fffd745-2568-424a-84e5-f94a41351022">
2024-07-18 19:26:27 -04:00
ukyen
0ba7fc63d0 [pydocstyle] Escaped docstring in docstring (D301 ) (#12192)
<!--
Thank you for contributing to Ruff! To help us out with reviewing,
please consider the following:

- Does this pull request include a summary of the change? (See below.)
- Does this pull request include a descriptive title?
- Does this pull request include references to any relevant issues?
-->

## Summary

<!-- What's the purpose of the change? What does it do, and why? -->
This PR updates D301 rule to allow inclduing escaped docstring, e.g.
`\"""Foo.\"""` or `\"\"\"Bar.\"\"\"`, within a docstring.

Related issue: #12152 

## Test Plan

Add more test cases to D301.py and update the snapshot file.

<!-- How was it tested? -->
2024-07-18 18:36:05 -04:00
Carl Meyer
fa5b19d4b6 [red-knot] use a simpler builtin in the benchmark (#12393)
In preparation for supporting resolving builtins, simplify the benchmark
so it doesn't look up `str`, which is actually a complex builtin to deal
with because it inherits `Sequence[str]`.

Co-authored-by: Alex Waygood <alex.waygood@gmail.com>
2024-07-18 14:04:33 -07:00
Carl Meyer
181e7b3c0d [red-knot] rename module_global to global (#12385)
Per comments in https://github.com/astral-sh/ruff/pull/12269, "module
global" is kind of long, and arguably redundant.

I tried just using "module" but there were too many cases where I felt
this was ambiguous. I like the way "global" works out better, though it
does require an understanding that in Python "global" generally means
"module global" not "globally global" (though in a sense module globals
are also globally global since modules are singletons).
2024-07-18 13:05:30 -07:00
Carl Meyer
519eca9fe7 [red-knot] support implicit global name lookups (#12374)
Support falling back to a global name lookup if a name isn't defined in
the local scope, in the cases where that is correct according to Python
semantics.

In class scopes, a name lookup checks the local namespace first, and if
the name isn't found there, looks it up in globals.

In function scopes (and type parameter scopes, which are function-like),
if a name has any definitions in the local scope, it is a local, and
accessing it when none of those definitions have executed yet just
results in an `UnboundLocalError`, it does not fall back to a global. If
the name does not have any definitions in the local scope, then it is an
implicit global.

Public symbol type lookups never include such a fall back. For example,
if a name is not defined in a class scope, it is not available as a
member on that class, even if a name lookup within the class scope would
have fallen back to a global lookup.

This PR makes the `@override` lint rule work again.

Not yet included/supported in this PR:

* Support for free variables / closures: a free symbol in a nested
function-like scope referring to a symbol in an outer function-like
scope.
* Support for `global` and `nonlocal` statements, which force a symbol
to be treated as global or nonlocal even if it has definitions in the
local scope.
* Module-global lookups should fall back to builtins if the name isn't
found in the module scope.

I would like to expose nicer APIs for the various kinds of symbols
(explicit global, implicit global, free, etc), but this will also wait
for a later PR, when more kinds of symbols are supported.
2024-07-18 10:50:43 -07:00
Dhruv Manilawala
f0d589d7a3 Provide custom job permissions to cargo-dist (#12386)
We can't just directly update the `release.yml` file because that's
auto-generated using `cargo-dist`. So, update the permissions in
`Cargo.toml` and then use `cargo dist generate` to make sure there's no
diff.
2024-07-18 16:49:38 +00:00
Dhruv Manilawala
512c8b2cc5 Provide contents read permission to wasm publish job (#12384)
The job has asked for the permission:
811f78d94d/.github/workflows/publish-wasm.yml (L25)
2024-07-18 22:02:49 +05:30
Carl Meyer
811f78d94d [red-knot] small efficiency improvements and bugfixes to use-def map building (#12373)
Adds inference tests sufficient to give full test coverage of the
`UseDefMapBuilder::merge` method.

In the process I realized that we could implement visiting of if
statements in `SemanticBuilder` with fewer `snapshot`, `restore`, and
`merge` operations, so I restructured that visit a bit.

I also found one correctness bug in the `merge` method (it failed to
extend the given snapshot with "unbound" for any missing symbols,
meaning we would just lose the fact that the symbol could be unbound in
the merged-in path), and two efficiency bugs (if one of the ranges to
merge is empty, we can just use the other one, no need for copies, and
if the ranges are overlapping -- which can occur with nested branches --
we can still just merge them with no copies), and fixed all three.
2024-07-18 09:24:58 -07:00
Dhruv Manilawala
8f1be31289 Update 0.5.3 changelog caption (#12383)
As suggested in
https://github.com/astral-sh/ruff/pull/12381#discussion_r1683123202
2024-07-18 16:17:07 +00:00
Dhruv Manilawala
8cfbac71a4 Bump version to 0.5.3 (#12381) 2024-07-18 16:07:34 +00:00
Charlie Marsh
9460857932 Migrate to standalone docs repo (#12341)
## Summary

See: https://github.com/astral-sh/uv/pull/5081
2024-07-18 15:35:49 +00:00
Dhruv Manilawala
a028ca22f0 Add VS Code specific extension settings (#12380)
## Summary

This PR adds VS Code specific extension settings in the online
documentation.

The content is basically taken from the `package.json` file in the
`ruff-vscode` repository.
2024-07-18 20:58:14 +05:30
Dhruv Manilawala
7953f6aa79 Update versioning policy for editor integration (#12375)
## Summary

Following the stabilization of the Ruff language server, we need to
update our versioning policy to account for any changes in it. This
could be server settings, capability, etc.

This PR also adds a new section for the VS Code extension which is
adopted from [Biome's versioning
policy](https://biomejs.dev/internals/versioning/#visual-studio-code-extension)
for the same.

---------

Co-authored-by: Zanie Blue <contact@zanie.dev>
2024-07-18 15:17:36 +00:00
Charlie Marsh
764d9ab4ee Allow repeated-equality-comparison for mixed operations (#12369)
## Summary

This PR allows us to fix both expressions in `foo == "a" or foo == "b"
or ("c" != bar and "d" != bar)`, but limits the rule to consecutive
comparisons, following https://github.com/astral-sh/ruff/issues/7797.

I think this logic was _probably_ added because of
https://github.com/astral-sh/ruff/pull/12368 -- the intent being that
we'd replace the _entire_ expression.
2024-07-18 11:16:40 -04:00
Charlie Marsh
9b9d701500 Allow additional arguments for sum and max comprehensions (#12364)
## Summary

These can have other arguments, so it seems wrong to gate on single
argument here.

Closes https://github.com/astral-sh/ruff/issues/12358.
2024-07-18 08:37:28 -04:00
Dhruv Manilawala
648cca199b Add docs for Ruff language server (#12344)
## Summary

This PR adds documentation for the Ruff language server.

It mainly does the following:
1. Combines various READMEs containing instructions for different editor
setup in their respective section on the online docs
2. Provide an enumerated list of server settings. Additionally, it also
provides a section for VS Code specific options.
3. Adds a "Features" section which enumerates all the current
capabilities of the native server

For (2), the settings documentation is done manually but a future
improvement (easier after `ruff-lsp` is deprecated) is to move the docs
in to Rust struct and generate the documentation from the code itself.
And, the VS Code extension specific options can be generated by diffing
against the `package.json` in `ruff-vscode` repository.

### Structure

1. Setup: This section contains the configuration for setting up the
language server for different editors
2. Features: This section contains a list of capabilities provided by
the server along with short GIF to showcase it
3. Settings: This section contains an enumerated list of settings in a
similar format to the one for the linter / formatter
4. Migrating from `ruff-lsp`

> [!NOTE]
>
> The settings page is manually written but could possibly be
auto-generated via a macro similar to `OptionsMetadata` on the
`ClientSettings` struct

resolves: #11217 

## Test Plan

Generate and open the documentation locally using:
1. `python scripts/generate_mkdocs.py`
2. `mkdocs serve -f mkdocs.insiders.yml`
2024-07-18 17:41:43 +05:30
Dhruv Manilawala
2e77b775b0 Consider --preview flag for server subcommand (#12208)
## Summary

This PR removes the requirement of `--preview` flag to run the `ruff
server` and instead considers it to be an indicator to turn on preview
mode for the linter and the formatter.

resolves: #12161 

## Test Plan

Add test cases to assert the `preview` value is updated accordingly.

In an editor context, I used the local `ruff` executable in Neovim with
the `--preview` flag and verified that the preview-only violations are
being highlighted.

Running with:
```lua
require('lspconfig').ruff.setup({
  cmd = {
    '/Users/dhruv/work/astral/ruff/target/debug/ruff',
    'server',
    '--preview',
  },
})
```
The screenshot shows that `E502` is highlighted with the below config in
`pyproject.toml`:

<img width="877" alt="Screenshot 2024-07-17 at 16 43 09"
src="https://github.com/user-attachments/assets/c7016ef3-55b1-4a14-bbd3-a07b1bcdd323">
2024-07-18 11:05:01 +05:30
Dhruv Manilawala
ebe5b06c95 Use fallback settings when indexing the project (#12362)
## Summary

This PR updates the settings index building logic in the language server
to consider the fallback settings for applying ignore filters in
`WalkBuilder` and the exclusion via `exclude` / `extend-exclude`.

This flow matches the one in the `ruff` CLI where the root settings is
built by (1) finding the workspace setting in the ancestor directory (2)
finding the user configuration if that's missing and (3) fallback to
using the default configuration.

Previously, the index building logic was being executed before (2) and
(3). This PR reverses the logic so that the exclusion /
`respect_gitignore` is being considered from the default settings if
there's no workspace / user settings. This has the benefit that the
server no longer enters the `.git` directory or any other excluded
directory when a user opens a file in the home directory.

Related to #11366

## Test plan

Opened a test file from the home directory and confirmed with the debug
trace (removed in #12360) that the server excludes the `.git` directory
when indexing.
2024-07-18 09:16:45 +05:30
Carl Meyer
b2a49d8140 [red-knot] better docs for use-def maps (#12357)
Add better doc comments and comments, as well as one debug assertion, to
use-def map building.
2024-07-17 17:50:58 -07:00
Carl Meyer
985a999234 [red-knot] better docs for type inference (#12356)
Add some docs for how type inference works.

Also a couple minor code changes to rearrange or rename for better
clarity.
2024-07-17 13:36:58 -07:00
cake-monotone
1df51b1fbf [pyupgrade] Implement unnecessary-default-type-args (UP043) (#12371)
## Summary

Add new rule and implement for `unnecessary default type arguments`
under the `UP` category (`UP043`).

```py
// < py313
Generator[int, None, None] 

// >= py313
Generator[int]
```

I think that as Python 3.13 develops, there might be more default type
arguments added besides `Generator` and `AsyncGenerator`. So, I made
this more flexible to accommodate future changes.

related issue: #12286

## Test Plan

snapshot included..!
2024-07-17 19:45:43 +00:00
Charlie Marsh
1435b0f022 Remove discard, remove, and pop allowance for loop-iterator-mutation (#12365)
## Summary

Pretty sure this should still be an error, but also, I think I added
this because of ecosystem CI? So want to see what pops up.

Closes https://github.com/astral-sh/ruff/issues/12164.
2024-07-17 17:42:14 +00:00
Charlie Marsh
e39298dcbc Use UTF-8 as default encoding in unspecified-encoding fix (#12370)
## Summary

This is the _intended_ default that PEP 597 _wants_, but it's not
backwards compatible. The fix is already unsafe, so it's better for us
to recommend the desired and expected behavior.

Closes https://github.com/astral-sh/ruff/issues/12069.
2024-07-17 12:57:27 -04:00
Charlie Marsh
1de8ff3308 Detect enumerate iterations in loop-iterator-mutation (#12366)
## Summary

Closes https://github.com/astral-sh/ruff/issues/12164.
2024-07-17 12:03:36 -04:00
Charlie Marsh
72e02206d6 Avoid dropping extra boolean operations in repeated-equality-comparison (#12368)
## Summary

Closes https://github.com/astral-sh/ruff/issues/12062.
2024-07-17 11:49:27 -04:00
Charlie Marsh
80f0116641 Ignore self and cls when counting arguments (#12367)
## Summary

Closes https://github.com/astral-sh/ruff/issues/12320.
2024-07-17 10:49:38 -04:00
Micha Reiser
79b535587b [red-knot] Reload notebook on file change (#12361) 2024-07-17 12:23:48 +00:00
Dhruv Manilawala
6e0cbe0f35 Remove leftover debug log (#12360)
This was a leftover from #12299
2024-07-17 17:52:44 +05:30
Micha Reiser
91338ae902 [red-knot] Add basic workspace support (#12318) 2024-07-17 11:34:21 +02:00
Micha Reiser
0c72577b5d [red-knot] Add notebook support (#12338) 2024-07-17 08:26:33 +00:00
Matthew Runyon
fe04f2b09d Publish wasm API to npm (#12317) 2024-07-17 08:50:38 +02:00
Carl Meyer
073588b48e [red-knot] improve semantic index tests (#12355)
Improve semantic index tests with better assertions than just `.len()`,
and re-add use-definition test that was commented out in the switch to
Salsa initially.
2024-07-16 23:46:49 -07:00
Alex Waygood
9a2dafb43d [red-knot] Add support for editable installs to the module resolver (#12307)
Co-authored-by: Micha Reiser <micha@reiser.io>
Co-authored-by: Carl Meyer <carl@astral.sh>
2024-07-16 18:17:47 +00:00
Carl Meyer
595b1aa4a1 [red-knot] per-definition inference, use-def maps (#12269)
Implements definition-level type inference, with basic control flow
(only if statements and if expressions so far) in Salsa.

There are a couple key ideas here:

1) We can do type inference queries at any of three region
granularities: an entire scope, a single definition, or a single
expression. These are represented by the `InferenceRegion` enum, and the
entry points are the salsa queries `infer_scope_types`,
`infer_definition_types`, and `infer_expression_types`. Generally
per-scope will be used for scopes that we are directly checking and
per-definition will be used anytime we are looking up symbol types from
another module/scope. Per-expression should be uncommon: used only for
the RHS of an unpacking or multi-target assignment (to avoid
re-inferring the RHS once per symbol defined in the assignment) and for
test nodes in type narrowing (e.g. the `test` of an `If` node). All
three queries return a `TypeInference` with a map of types for all
definitions and expressions within their region. If you do e.g.
scope-level inference, when it hits a definition, or an
independently-inferable expression, it should use the relevant query
(which may already be cached) to get all types within the smaller
region. This avoids double-inferring smaller regions, even though larger
regions encompass smaller ones.

2) Instead of building a control-flow graph and lazily traversing it to
find definitions which reach a use of a name (which is O(n^2) in the
worst case), instead semantic indexing builds a use-def map, where every
use of a name knows which definitions can reach that use. We also no
longer track all definitions of a symbol in the symbol itself; instead
the use-def map also records which defs remain visible at the end of the
scope, and considers these the publicly-visible definitions of the
symbol (see below).

Major items left as TODOs in this PR, to be done in follow-up PRs:

1) Free/global references aren't supported yet (only lookup based on
definitions in current scope), which means the override-check example
doesn't currently work. This is the first thing I'll fix as follow-up to
this PR.

2) Control flow outside of if statements and expressions.

3) Type narrowing.

There are also some smaller relevant changes here:

1) Eliminate `Option` in the return type of member lookups; instead
always return `Type::Unbound` for a name we can't find. Also use
`Type::Unbound` for modules we can't resolve (not 100% sure about this
one yet.)

2) Eliminate the use of the terms "public" and "root" to refer to
module-global scope or symbols. Instead consistently use the term
"module-global". It's longer, but it's the clearest, and the most
consistent with typical Python terminology. In particular I don't like
"public" for this use because it has other implications around author
intent (is an underscore-prefixed module-global symbol "public"?). And
"root" is just not commonly used for this in Python.

3) Eliminate the `PublicSymbol` Salsa ingredient. Many non-module-global
symbols can also be seen from other scopes (e.g. by a free var in a
nested scope, or by class attribute access), and thus need to have a
"public type" (that is, the type not as seen from a particular use in
the control flow of the same scope, but the type as seen from some other
scope.) So all symbols need to have a "public type" (here I want to keep
the use of the term "public", unless someone has a better term to
suggest -- since it's "public type of a symbol" and not "public symbol"
the confusion with e.g. initial underscores is less of an issue.) At
least initially, I would like to try not having special handling for
module-global symbols vs other symbols.

4) Switch to using "definitions that reach end of scope" rather than
"all definitions" in determining the public type of a symbol. I'm
convinced that in general this is the right way to go. We may want to
refine this further in future for some free-variable cases, but it can
be changed purely by making changes to the building of the use-def map
(the `public_definitions` index in it), without affecting any other
code. One consequence of combining this with no control-flow support
(just last-definition-wins) is that some inference tests now give more
wrong-looking results; I left TODO comments on these tests to fix them
when control flow is added.

And some potential areas for consideration in the future:

1) Should `symbol_ty` be a Salsa query? This would require making all
symbols a Salsa ingredient, and tracking even more dependencies. But it
would save some repeated reconstruction of unions, for symbols with
multiple public definitions. For now I'm not making it a query, but open
to changing this in future with actual perf evidence that it's better.
2024-07-16 11:02:30 -07:00
Charlie Marsh
30cef67b45 Remove BindingKind::ComprehensionVar (#12347)
## Summary

This doesn't seem to be used anywhere. Maybe it mattered when we didn't
handle generator scopes properly?
2024-07-16 11:18:04 -04:00
Charlie Marsh
d0c5925672 Consider expression before statement when determining binding kind (#12346)
## Summary

I believe these should always bind more tightly -- e.g., in:

```python
for _ in bar(baz for foo in [1]):
    pass
```

The inner `baz` and `foo` should be considered comprehension variables,
not for loop bindings.

We need to revisit this more holistically. In some of these cases,
`BindingKind` should probably be a flag, not an enum, since the values
aren't mutually exclusive. Separately, we should probably be more
precise in how we set it (e.g., by passing down from the parent rather
than sniffing in `handle_node_store`).

Closes https://github.com/astral-sh/ruff/issues/12339
2024-07-16 14:49:26 +00:00
Micha Reiser
b1487b6b4f Ignore more OpenAI notebooks with syntax errors in the ecosystem check (#12342) 2024-07-16 10:32:00 +02:00
Micha Reiser
85ae02d62e [red-knot] Add walk_directories to System (#12297) 2024-07-16 06:40:10 +00:00
konsti
9a817a2922 Insert empty line between suite and alternative branch after def/class (#12294)
When there is a function or class definition at the end of a suite
followed by the beginning of an alternative block, we have to insert a
single empty line between them.

In the if-else-statement example below, we insert an empty line after
the `foo` in the if-block, but none after the else-block `foo`, since in
the latter case the enclosing suite already adds empty lines.

```python
if sys.version_info >= (3, 10):
    def foo():
        return "new"
else:
    def foo():
        return "old"
class Bar:
    pass
```

To do so, we track whether the current suite is the last one in the
current statement with a new option on the suite kind.

Fixes #12199

---------

Co-authored-by: Micha Reiser <micha@reiser.io>
2024-07-15 12:59:33 +02:00
Dhruv Manilawala
ecd4b4d943 Build settings index in parallel for the native server (#12299)
## Summary

This PR updates the server to build the settings index in parallel using
similar logic as `python_files_in_path`.

This should help with https://github.com/astral-sh/ruff/issues/11366 but
ideally we would want to build it lazily.

## Test Plan

`cargo insta test`
2024-07-15 09:57:54 +00:00
github-actions[bot]
b9a8cd390f Sync vendored typeshed stubs (#12325)
Close and reopen this PR to trigger CI

Co-authored-by: typeshedbot <>
2024-07-15 07:46:55 +01:00
renovate[bot]
2348714081 Update pre-commit dependencies (#12330) 2024-07-15 07:27:10 +01:00
renovate[bot]
3817b207cf Update NPM Development dependencies (#12331)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2024-07-15 08:08:10 +02:00
renovate[bot]
b1cf9ea663 Update Rust crate clap_complete_command to 0.6.0 (#12332)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2024-07-15 08:05:07 +02:00
renovate[bot]
8ad10b9307 Update Rust crate compact_str to 0.8.0 (#12333)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
Co-authored-by: Micha Reiser <micha@reiser.io>
2024-07-15 06:03:23 +00:00
renovate[bot]
9c5524a9a2 Update Rust crate tikv-jemallocator to 0.6.0 (#12335)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2024-07-15 08:01:43 +02:00
renovate[bot]
1530223311 Update Rust crate serde_with to v3.9.0 (#12334)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2024-07-15 07:58:18 +02:00
renovate[bot]
b9671522c4 Update Rust crate thiserror to v1.0.62 (#12329)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2024-07-15 05:54:00 +00:00
renovate[bot]
9918202422 Update Rust crate syn to v2.0.71 (#12328)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2024-07-15 07:52:09 +02:00
renovate[bot]
42e7147860 Update Rust crate clap to v4.5.9 (#12326)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2024-07-15 07:51:48 +02:00
renovate[bot]
25feab93f8 Update Rust crate matchit to v0.8.4 (#12327)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2024-07-15 07:50:58 +02:00
Charlie Marsh
dc8db1afb0 Make some amendments to the v0.5.2 changelog (#12319) 2024-07-14 14:47:51 +00:00
Charlie Marsh
18c364d5df [flake8-bandit] Support explicit string concatenations in S310 HTTP detection (#12315)
Closes https://github.com/astral-sh/ruff/issues/12314.
2024-07-14 10:44:08 -04:00
Charlie Marsh
7a7c601d5e Bump version to v0.5.2 (#12316) 2024-07-14 10:43:58 -04:00
Charlie Marsh
3bfbbbc78c Avoid allocation when validating HTTP and HTTPS prefixes (#12313) 2024-07-13 17:25:02 -04:00
Tim Chan
1a3ee45b23 [flake8-bandit] Avoid S310 violations for HTTP-safe f-strings (#12305)
this resolves https://github.com/astral-sh/ruff/issues/12245
2024-07-13 20:57:05 +00:00
Charlie Marsh
65848869d5 [refurb] Make list-reverse-copy an unsafe fix (#12303)
## Summary

I don't know that there's more to do here. We could consider not raising
the violation at all for arguments, but that would have some false
negatives and could also be surprising to users.

Closes https://github.com/astral-sh/ruff/issues/12267.
2024-07-13 15:45:35 -04:00
Charlie Marsh
456d6a2fb2 Consider with blocks as single-item branches (#12311)
## Summary

Ensures that, e.g., the following is not considered a
redefinition-without-use:

```python
import contextlib

foo = None
with contextlib.suppress(ImportError):
    from some_module import foo
```

Closes https://github.com/astral-sh/ruff/issues/12309.
2024-07-13 15:22:17 -04:00
Charlie Marsh
940df67823 Omit code frames for fixes with empty ranges (#12304)
## Summary

Closes https://github.com/astral-sh/ruff/issues/12291.

## Test Plan

```shell
❯ cargo run check ../uv/foo --select INP
/Users/crmarsh/workspace/uv/foo/bar/baz.py:1:1: INP001 File `/Users/crmarsh/workspace/uv/foo/bar/baz.py` is part of an implicit namespace package. Add an `__init__.py`.
Found 1 error.
```
2024-07-12 15:21:28 +00:00
Charlie Marsh
e58713e2ac Make cache-write failures non-fatal (#12302)
## Summary

Closes https://github.com/astral-sh/ruff/issues/12284.
2024-07-12 10:33:54 -04:00
Charlie Marsh
aa5c53b38b Remove 'non-obvious' allowance for E721 (#12300)
## Summary

I don't fully understand the purpose of this. In #7905, it was just
copied over from the previous non-preview implementation. But it means
that (e.g.) we don't treat `type(self.foo)` as a type -- which is wrong.

Closes https://github.com/astral-sh/ruff/issues/12290.
2024-07-12 09:21:43 -04:00
Charlie Marsh
4e6ecb2348 Treat not operations as boolean tests (#12301)
## Summary

Closes https://github.com/astral-sh/ruff/issues/12285.
2024-07-12 08:53:37 -04:00
Alex Waygood
6febd96dfe [red-knot] Add a read_directory() method to the ruff_db::system::System trait (#12289) 2024-07-12 12:31:05 +00:00
Matthias
17e84d5f40 [numpy] Update NPY201: add np.NAN to exception (#12292)
Co-authored-by: Micha Reiser <micha@reiser.io>
2024-07-12 12:09:55 +00:00
Victorien
b6545ce5d6 Use indentation consistently (#12293) 2024-07-12 14:08:56 +02:00
Dhruv Manilawala
90e9aae3f4 Consider nested configs for settings reloading (#12253)
## Summary

This PR fixes a bug in the settings reloading logic to consider nested
configuration in a workspace.

fixes: #11766

## Test Plan


https://github.com/astral-sh/ruff/assets/67177269/69704b7b-44b9-4cc7-b5a7-376bf87c6ef4
2024-07-12 05:00:08 +00:00
Micha Reiser
bd01004a42 Use space separator before parenthesiszed expressions in comprehensions with leading comments. (#12282) 2024-07-11 22:38:12 +02:00
Gaétan Lepage
d0298dc26d Explicitly add schemars to ruff_python_ast Cargo.toml (#12275)
Co-authored-by: Micha Reiser <micha@reiser.io>
2024-07-11 06:46:34 +00:00
Jack Desert
bbb9fe1692 [Docs] Clear instruction for single quotes (linter and formatter) (#12015)
## Summary

In order to use single quotes with both the ruff linter and the ruff
formatter,
two different rules must be applied. This was not clear to me when 
internet searching "configure ruff single quotes" and it eventually
I filed this issue:

https://github.com/astral-sh/ruff/issues/12003
2024-07-10 16:29:29 +00:00
Alex Waygood
5b21922420 [red-knot] Add more stress tests for module resolver invalidation (#12272) 2024-07-10 14:34:06 +00:00
Micha Reiser
abcf07c8c5 Change File::touch_path to only take a SystemPath (#12273) 2024-07-10 12:15:14 +00:00
Alex Waygood
e8b5341c97 [red-knot] Rework module resolver tests (#12260) 2024-07-10 10:40:21 +00:00
Auguste Lalande
880c31d164 [flake8-async] Update ASYNC116 to match upstream (#12266)
Co-authored-by: Micha Reiser <micha@reiser.io>
2024-07-10 09:58:33 +02:00
Auguste Lalande
d365f1a648 [flake8-async] Update ASYNC115 to match upstream (#12262)
Co-authored-by: Micha Reiser <micha@reiser.io>
2024-07-10 09:43:11 +02:00
Micha Reiser
4cc7bc9d32 Use more threads when discovering python files (#12258) 2024-07-10 09:29:17 +02:00
Dhruv Manilawala
0bb2fc6eec Conside include, extend-include for the native server (#12252)
## Summary

This PR updates the native server to consider the `include` and
`extend-include` file resolver settings.

fixes: #12242 

## Test Plan

Note: Settings reloading doesn't work for nested configs which is fixed
in #12253 so the preview here only showcases root level config.

https://github.com/astral-sh/ruff/assets/67177269/e8969128-c175-4f98-8114-0d692b906cc8
2024-07-10 04:12:57 +00:00
Auguste Lalande
855d62cdde [flake8-async] Update ASYNC110 to match upstream (#12261)
## Summary

Update the name of `ASYNC110` to match
[upstream](https://flake8-async.readthedocs.io/en/latest/rules.html).

Also update to the functionality to match upstream by adding support for
`asyncio` and `anyio` (gated behind preview).

Part of https://github.com/astral-sh/ruff/issues/12039.

## Test Plan

Added tests for `asyncio` and `anyio`
2024-07-09 17:17:28 -07:00
Auguste Lalande
88abc6aed8 [flake8-async] Update ASYNC100 to match upstream (#12221)
## Summary

Update the name of `ASYNC100` to match
[upstream](https://flake8-async.readthedocs.io/en/latest/rules.html).

Also update to the functionality to match upstream by supporting
additional context managers from asyncio and anyio. Matching this
[list](https://flake8-async.readthedocs.io/en/latest/glossary.html#timeout-context).

Part of #12039.

## Test Plan

Added the new context managers to the fixture.
2024-07-09 17:55:18 +00:00
Alex Waygood
6fa4e32ad3 [red-knot] Use vendored typeshed stubs for stdlib module resolution (#12224) 2024-07-09 09:21:52 +00:00
Alex Waygood
000dabcd88 [red-knot] Allow module-resolution options to be specified via the CLI (#12246) 2024-07-09 09:16:28 +00:00
Micha Reiser
f8ff42a13d [red-knot] Prevent salsa cancellation from aborting the program (#12183) 2024-07-09 08:26:15 +00:00
Micha Reiser
b5834d57af [red-knot] Only store absolute paths in Files (#12215) 2024-07-09 09:52:13 +02:00
renovate[bot]
3d3ff10bb9 Update dependency react-resizable-panels to v2.0.20 (#12231)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2024-07-09 07:26:08 +00:00
Micha Reiser
ac04380f36 [red-knot] Rename FileSystem to System (#12214) 2024-07-09 07:20:51 +00:00
Auguste Lalande
16a63c88cf [flake8-async] Update ASYNC109 to match upstream (#12236)
## Summary

Update the name of `ASYNC109` to match
[upstream](https://flake8-async.readthedocs.io/en/latest/rules.html).

Also update to the functionality to match upstream by supporting
additional context managers from `asyncio` and `anyio`. This doesn't
change any of the detection functionality, but recommends additional
context managers from `asyncio` and `anyio` depending on context.

Part of https://github.com/astral-sh/ruff/issues/12039.

## Test Plan

Added fixture for asyncio recommendation
2024-07-09 04:14:27 +00:00
Dani Bodor
10f07d88a2 Update help and documentation for --output-format to reflect "full" default (#12248)
fix #12247 

changed help to list "full" as the default for --output-format and
removed "text" as an option (as this is no longer supported).
2024-07-09 02:45:24 +00:00
Evan Rittenhouse
1e04bd0b73 Restrict fowarding newline argument in open() calls to Python versions >= 3.10 (#12244)
Fixes https://github.com/astral-sh/ruff/issues/12222
2024-07-09 02:43:31 +00:00
epenet
2041b0e5fb [flake8-return] Exempt properties from explicit return rule (RET501) (#12243)
First contribution - apologies if something is missing

Fixes #12197
2024-07-08 19:39:30 -07:00
Micha Reiser
bf3d903939 Warn about D203 formatter incompatibility (#12238) 2024-07-08 15:12:14 +02:00
Micha Reiser
64855c5f06 Remove default-run from 'red_knot' crate (#12241) 2024-07-08 09:31:45 +00:00
renovate[bot]
b5ab4ce293 Update pre-commit dependencies (#12232) 2024-07-08 01:51:24 +00:00
renovate[bot]
c396b9f08b Update cloudflare/wrangler-action action to v3.7.0 (#12235) 2024-07-07 21:41:24 -04:00
renovate[bot]
9ed3893e6d Update Rust crate ureq to v2.10.0 (#12234) 2024-07-07 21:41:18 -04:00
renovate[bot]
30c9604c1d Update NPM Development dependencies (#12233) 2024-07-07 21:41:09 -04:00
renovate[bot]
7e4a1c2b33 Update Rust crate syn to v2.0.69 (#12230) 2024-07-07 21:40:42 -04:00
renovate[bot]
e379160941 Update Rust crate serde_with to v3.8.3 (#12229) 2024-07-07 21:40:35 -04:00
renovate[bot]
38b503ebcc Update Rust crate serde_json to v1.0.120 (#12228) 2024-07-07 21:40:29 -04:00
renovate[bot]
dac476f2c0 Update Rust crate serde to v1.0.204 (#12227) 2024-07-07 21:40:20 -04:00
renovate[bot]
754e5d6a7d Update Rust crate imara-diff to v0.1.6 (#12226) 2024-07-07 21:40:03 -04:00
Alex Waygood
d9c15e7a12 [Ecosystem checks] trio has changed its default branch name to main (#12225)
Fixes CI errors seen in
https://github.com/astral-sh/ruff/pull/12224#issuecomment-2212594024

x-ref
17b3644f64
2024-07-07 23:37:27 +01:00
Trim21
757c75752e [flake8-bandit] fix S113 false positive for httpx without timeout argument (#12213)
## Summary

S113 exists because `requests` doesn't have a default timeout, so
request without timeout may hang indefinitely

> B113: Test for missing requests timeout
This plugin test checks for requests or httpx calls without a timeout
specified.
>
> Nearly all production code should use this parameter in nearly all
requests, **Failure to do so can cause your program to hang
indefinitely.**


But httpx has default timeout 5s, so S113 for httpx request without
`timeout` argument is a false positive, only valid case would be
`timeout=None`.

https://www.python-httpx.org/advanced/timeouts/

> HTTPX is careful to enforce timeouts everywhere by default.
>
> The default behavior is to raise a TimeoutException after 5 seconds of
network inactivity.


## Test Plan

snap updated
2024-07-06 14:08:40 -05:00
Micha Reiser
9d61727289 [red-knot] Exclude drop time in benchmark (#12218) 2024-07-06 17:35:00 +02:00
Alex Waygood
a62a432a48 [red-knot] Respect typeshed's VERSIONS file when resolving stdlib modules (#12141) 2024-07-05 22:43:31 +00:00
Charlie Marsh
8198723201 Move SELinux docs to example (#12211) 2024-07-05 20:42:43 +00:00
Maximilian Kolb
7df10ea3e9 Docs: Respect SELinux with podman for docker mount (#12102)
Tested on Fedora 40 with Podman 5.1.1 and ruff "0.5.0" and "latest".
source: https://unix.stackexchange.com/q/651198


## Error without fix

````
$ podman run --rm -it -v .:/io ghcr.io/astral-sh/ruff:latest check
error: Failed to initialize cache at /io/.ruff_cache: Permission denied (os error 13)
warning: Encountered error: Permission denied (os error 13)
All checks passed!

$ podman run --rm -it -v .:/io ghcr.io/astral-sh/ruff:latest format
error: Failed to initialize cache at /io/.ruff_cache: Permission denied (os error 13)
error: Encountered error: Permission denied (os error 13)
````

## Summary

Running ruff by using a docker container requires `:Z` when mounting the
current directory on Fedora with SELinux and Podman.

## Test Plan

````
$ podman run --rm -it -v .:/io:Z ghcr.io/astral-sh/ruff:latest check
$ podman run --rm -it -v .:/io:Z ghcr.io/astral-sh/ruff:0.5.0 check
````
2024-07-05 15:39:00 -05:00
Carl Meyer
0e44235981 [red-knot] intern types using Salsa (#12061)
Intern types using Salsa interning instead of in the `TypeInference`
result.

This eliminates the need for `TypingContext`, and also paves the way for
finer-grained type inference queries.
2024-07-05 12:16:37 -07:00
Dhruv Manilawala
7b50061b43 Fix eslint errors for playground source code (#12207)
Refer
https://github.com/astral-sh/ruff/actions/runs/9808907924/job/27086333001
2024-07-05 13:43:16 +00:00
Dhruv Manilawala
3a72400202 Rename publish workflow file extension (yaml -> yml) (#12206) 2024-07-05 13:12:49 +00:00
Dhruv Manilawala
1b3bff0330 Bump version to 0.5.1 (#12205) 2024-07-05 18:33:14 +05:30
Alex Waygood
0f6f73ecf3 [red-knot] Require that FileSystem objects implement Debug (#12204) 2024-07-05 12:53:30 +01:00
Dhruv Manilawala
7910beecc4 Consider the content of the new cells during notebook sync (#12203)
## Summary

This PR fixes the bug where the server was not considering the
`cells.structure.didOpen` field to sync up the new content of the newly
added cells.

The parameters corresponding to this request provides two fields to get
the newly added cells:
1. `cells.structure.array.cells`: This is a list of `NotebookCell` which
doesn't contain any cell content. The only useful information from this
array is the cell kind and the cell document URI which we use to
initialize the new cell in the index.
2. `cells.structure.didOpen`: This is a list of `TextDocumentItem` which
corresponds to the newly added cells. This actually contains the text
content and the version.

This wasn't a problem before because we initialize the cell with an
empty string and this isn't a problem when someone just creates an empty
cell. But, when someone copy-pastes a cell, the cell needs to be
initialized with the content.

fixes: #12201 

## Test Plan

First, let's see the panic in action:

1. Press <kbd>Esc</kbd> to allow using the keyboard to perform cell
actions (move around, copy, paste, etc.)
2. Copy the second cell with <kbd>c</kbd> key
3. Delete the second cell with <kbd>dd</kbd> key
4. Paste the copied cell with <kbd>p</kbd> key

You can see that the content isn't synced up because the `unused-import`
for `sys` is still being highlighted but it's being used in the second
cell. And, the hover isn't working either. Then, as I start editing the
second cell, it panics.


https://github.com/astral-sh/ruff/assets/67177269/fc58364c-c8fc-4c11-a917-71b6dd90c1ef

Now, here's the preview of the fixed version:


https://github.com/astral-sh/ruff/assets/67177269/207872dd-dca6-49ee-8b6e-80435c7ef22e
2024-07-05 17:10:00 +05:30
Dhruv Manilawala
f3ccd152e9 Revert "Remove --preview as a required argument for ruff server (#12053)" (#12196)
This reverts commit b28dc9ac14.

We're not ready to stabilize the server yet. There's some pending work
for the VS Code extension and documentation improvements.

This change is to unblock Ruff release.
2024-07-05 11:58:35 +05:30
Javier Kauer
1e07bfa373 [pycodestyle] Whitespace after decorator (E204) (#12140)
## Summary

<!-- What's the purpose of the change? What does it do, and why? -->
This is the implementation for the new rule of `pycodestyle (E204)`. It
follows the guidlines described in the contributing site, and as such it
has a new file named `whitespace_after_decorator.rs`, a new test file
called `E204.py`, and as such invokes the `function` in the `AST
statement checker` for functions and functions in classes. Linking #2402
because it has all the pycodestyle rules.

## Test Plan

<!-- How was it tested? -->
The file E204.py, has a `decorator` defined called wrapper, and this
decorator is used for 2 cases. The first one is when a `function` which
has a `decorator` is called in the file, and the second one is when
there is a `class` and 2 `methods` are defined for the `class` with a
`decorator` attached it.

Test file:

``` python
def foo(fun):
    def wrapper():
        print('before')
        fun()
        print('after')
    return wrapper

# No error
@foo
def bar():
    print('bar')

# E204
@ foo
def baz():
    print('baz')

class Test:
    # No error
    @foo
    def bar(self):
        print('bar')

    # E204
    @ foo
    def baz(self):
        print('baz')
```

I am still new to rust and any suggestion is appreciated. Specially with
the way im using native ruff utilities.

---------

Co-authored-by: Charlie Marsh <charlie.r.marsh@gmail.com>
2024-07-04 23:31:03 +00:00
Mathieu Kniewallner
5e7ba05612 docs(*): fix a few typos, consistency issues and links (#12193)
## Summary

Fixes a few typos, consistency issues and dead links found across the
documentation.
2024-07-04 19:05:51 -04:00
Mathieu Kniewallner
d12570ea00 docs(options): fix some typos and improve consistency (#12191)
## Summary

Fixes a few typos and consistency issues in the "Settings"
documentation:
- use "Ruff" consistently in the few places where "ruff" is used
- use double quotes in the few places where single quotes are used
- add backticks around rule codes where they are currently missing
- update a few example values where they are the same as the defaults,
for consistency

2nd commit might be controversial, as there are many options mentioned
where we don't currently link to the documentation sections, so maybe
it's done on purpose, as this will also appear in the JSON schema where
it's not desirable? If that's the case, I can easily drop it.

## Test Plan

Local testing.
2024-07-04 19:05:03 -04:00
Mathieu Kniewallner
2f3264e148 fix(rules): skip dummy variables for PLR1704 (#12190)
## Summary

Resolves #12157.

## Test Plan

Snapshot tests.
2024-07-04 20:09:31 +00:00
Micha Reiser
e2e0889a30 [red-knot] Add very basic benchmark (#12182) 2024-07-04 15:29:00 +00:00
Micha Reiser
497fd4c505 Update code owners for red knot (#12187) 2024-07-04 12:14:24 +00:00
Dhruv Manilawala
6cdf3e7af8 Reorder installation section in README (#12177)
See https://github.com/astral-sh/ruff/pull/12163#issuecomment-2207016631
2024-07-04 14:11:54 +05:30
Micha Reiser
4d385b60c8 [red-knot] Migrate CLI to Salsa (#11972) 2024-07-04 07:23:45 +00:00
Micha Reiser
262053f85c [red-knot]: Implement HasTy for Alias (#11971) 2024-07-04 07:17:10 +00:00
Micha Reiser
3ce8b9fcae Make Definition a salsa-ingredient (#12151) 2024-07-04 06:46:08 +00:00
Dhruv Manilawala
e6e09ea93a Avoid syntax error notification for source code actions (#12148)
## Summary

This PR avoids the error notification if a user selects the source code
actions and there's a syntax error in the source.

Before https://github.com/astral-sh/ruff/pull/12134, the change would've
been different. But that PR disables generating fixes if there's a
syntax error. This means that we can return an empty map instead as
there won't be any fixes in the diagnostics returned by the `lint_fix`
function.

For reference, following are the screenshot as on `main` with the error:

**VS Code:**

<img width="1715" alt="Screenshot 2024-07-02 at 16 39 59"
src="https://github.com/astral-sh/ruff/assets/67177269/62f3e99b-0b0c-4608-84a2-26aeabcc6933">

**Neovim:**

<img width="1717" alt="Screenshot 2024-07-02 at 16 38 50"
src="https://github.com/astral-sh/ruff/assets/67177269/5d637c36-d7f8-4a3b-8011-9a89708919a8">

fixes: #11931

## Test Plan

Considering the following code snippet where there are two diagnostics
(syntax error and useless semicolon `E703`):
```py
x;

y =
```

### VS Code


https://github.com/astral-sh/ruff/assets/67177269/943537fc-ed8d-448d-8a36-1e34536c4f3e

### Neovim


https://github.com/astral-sh/ruff/assets/67177269/4e6f3372-6e5b-4380-8919-6221066efd5b
2024-07-04 09:37:16 +05:30
Dhruv Manilawala
d870720841 Fix replacement edit range computation (#12171)
## Summary

This PR fixes various bugs for computing the replacement range between
the original and modified source for the language server.

1. When finding the end offset of the source and modified range, we
should apply `zip` on the reversed iterator. The bug was that it was
reversing the already zipped iterator. The problem here is that the
length of both slices aren't going to be the same unless the source
wasn't modified at all. Refer to the [Rust
playground](https://play.rust-lang.org/?version=stable&mode=debug&edition=2021&gist=44f860d31bd26456f3586b6ab530c22f)
where you can see this in action.
2. Skip the first line when computing the start offset because the first
line start value will always be 0 and the default value of the source /
modified range start is also 0. So, comparing 0 and 0 is not useful
which means we can skip the first value.
3. While iterating in the reverse direction, we should only stop if the
line start is strictly less than the source start i.e., we should use
`<` instead of `<=`.

fixes: #12128 

## Test Plan

Add test cases where the text is being inserted, deleted, and replaced
between the original and new source code, validate the replacement
ranges.
2024-07-04 09:24:07 +05:30
Mathieu Kniewallner
8210c1ed5b [flake8-bandit] Detect httpx for S113 (#12174)
## Summary

Bandit now also reports `B113` on `httpx`
(https://github.com/PyCQA/bandit/pull/1060). This PR implements the same
logic, to detect missing or `None` timeouts for `httpx` alongside
`requests`.

## Test Plan

Snapshot tests.
2024-07-03 19:26:55 -04:00
Charlie Marsh
a184f84f69 Upgrade cargo-dist to v0.18.0 (#12175)
## Summary

This enables us to get rid of `allow-dirty`!
2024-07-03 22:38:29 +00:00
Zanie Blue
c487b99e93 Add standalone installers to Ruff installation in README (#12163)
Note this is already included in our installation page at
`docs/installation.md`

---------

Co-authored-by: Dhruv Manilawala <dhruvmanila@gmail.com>
2024-07-03 13:39:58 -05:00
Thomas Faivre
24524771f2 Fix typo in CHANGELOG for misplaced-bare-raise URL (#12173)
Hi all!

## Summary

Fix a typo.

## Test Plan

URL was tested with curl.



Not much left to say, except that I originally saw this issue on the
blog post: https://astral.sh/blog/ruff-v0.5.0
Not sure how it is related to the CHANGELOG.md file, so the post might
need fixing as well.

Thanks for this incredible tool!

Signed-off-by: Thomas Faivre <thomas.faivre@6wind.com>
2024-07-03 13:39:33 -05:00
Micha Reiser
b950a6c389 Replace Mutex<RefCell> with Mutex in vendored file system" (#12170) 2024-07-03 15:12:13 +02:00
Zanie Blue
47eb6ee42b Fix cache key collisions for paths with separators (#12159)
Closes https://github.com/astral-sh/ruff/issues/12158

Hashing `Path` does not take into account path separators so `foo/bar`
is the same as `foobar` which is no good for our case. I'm guessing this
is an upstream bug, perhaps introduced by
45082b077b?
I'm investigating that further.
2024-07-03 07:36:46 -05:00
Zanie Blue
b4f7d5b2fb Fix latest version detection during Rooster invocations (#12162)
We no longer use the "v" prefix so Rooster detects the wrong version.
2024-07-03 07:35:34 -05:00
Zanie Blue
c13c60bc47 Update release script to match uv (#11496)
See https://github.com/astral-sh/uv/pull/3764

---------

Co-authored-by: T-256 <132141463+T-256@users.noreply.github.com>
2024-07-03 07:35:28 -05:00
Dhruv Manilawala
ee90017d3f Remove demisto/content from ecosystem checks (#12160)
## Summary

Follow-up to https://github.com/astral-sh/ruff/pull/12129 to remove the
`demisto/content` from ecosystem checks. The previous PR removed it from
the deprecated script which I didn't notice until recently.

## Test Plan

Ecosystem comment
2024-07-03 08:25:29 +00:00
Micha Reiser
adfd78e05a Correct parenthesized long nested-expressions example to match Ruff's output (#12153) 2024-07-03 10:03:08 +02:00
Alex Waygood
7c8112614a Remove use of deprecated E999 from the fuzz-parser script (#12150) 2024-07-02 14:18:25 +01:00
Dhruv Manilawala
8f40928534 Enable token-based rules on source with syntax errors (#11950)
## Summary

This PR updates the linter, specifically the token-based rules, to work
on the tokens that come after a syntax error.

For context, the token-based rules only diagnose the tokens up to the
first lexical error. This PR builds up an error resilience by
introducing a `TokenIterWithContext` which updates the `nesting` level
and tries to reflect it with what the lexer is seeing. This isn't 100%
accurate because if the parser recovered from an unclosed parenthesis in
the middle of the line, the context won't reduce the nesting level until
it sees the newline token at the end of the line.

resolves: #11915

## Test Plan

* Add test cases for a bunch of rules that are affected by this change.
* Run the fuzzer for a long time, making sure to fix any other bugs.
2024-07-02 08:57:46 +00:00
Dhruv Manilawala
88a4cc41f7 Disable auto-fix when source has syntax errors (#12134)
## Summary

This PR updates Ruff to **not** generate auto-fixes if the source code
contains syntax errors as determined by the parser.

The main motivation behind this is to avoid infinite autofix loop when
the token-based rules are run over any source with syntax errors in
#11950.

Although even after this, it's not certain that there won't be an
infinite autofix loop because the logic might be incorrect. For example,
https://github.com/astral-sh/ruff/issues/12094 and
https://github.com/astral-sh/ruff/pull/12136.

This requires updating the test infrastructure to not validate for fix
availability status when the source contained syntax errors. This is
required because otherwise the fuzzer might fail as it uses the test
function to run the linter and validate the source code.

resolves: #11455 

## Test Plan

`cargo insta test`
2024-07-02 14:22:51 +05:30
Micha Reiser
dcb9523b1e Address review feedback from 11963 (#12145) 2024-07-02 09:05:55 +02:00
Micha Reiser
25080acb7a [red-knot] Introduce ExpressionNodeKey to improve typing of expression_map (#12142) 2024-07-01 16:15:53 +02:00
Micha Reiser
228b1c4235 [red-knot] Remove Scope::name (#12137) 2024-07-01 15:55:50 +02:00
Micha Reiser
955138b74a Refactor ast_ids traits to take ScopeId instead of VfsFile plus FileScopeId. (#12139) 2024-07-01 15:50:07 +02:00
Dhruv Manilawala
5677614079 Use char-wise width instead of str-width (#12135)
## Summary

This PR updates various references in the linter to compute the
line-width for summing the width of each `char` in a `str` instead of
computing the width of the `str` itself.

Refer to #12133 for more details.

fixes: #12130 

## Test Plan

Add a file with null (`\0`) character which is zero-width. Run this test
case on `main` to make sure it panics and switch over to this branch to
make sure it doesn't panic now.
2024-07-01 18:56:27 +05:30
Micha Reiser
37f260b5af Introduce HasTy trait and SemanticModel facade (#11963) 2024-07-01 14:48:27 +02:00
Dhruv Manilawala
3f25561511 Avoid E275 if keyword followed by comma (#12136)
## Summary

Use the following to reproduce this:
```console
$ cargo run -- check --select=E275,E203 --preview --no-cache ~/playground/ruff/src/play.py --fix
debug error: Failed to converge after 100 iterations in `/Users/dhruv/playground/ruff/src/play.py` with rule codes E275:---
yield,x

---
/Users/dhruv/playground/ruff/src/play.py:1:1: E275 Missing whitespace after keyword
  |
1 | yield,x
  | ^^^^^ E275
  |
  = help: Added missing whitespace after keyword

Found 101 errors (100 fixed, 1 remaining).
[*] 1 fixable with the `--fix` option.
```

## Test Plan

Add a test case and run `cargo insta test`.
2024-07-01 18:04:23 +05:30
Charlie Marsh
eaf33d85ed Remove demisto/content from ecosystem checks (#12129)
## Summary

Unfortunately `demisto/content` uses an explicit `select` for `E999`, so
it will _always_ fail in preview. And they're on a fairly old version.
I'd like to keep checking it, but seems easiest for now to just disable
it.

In response, I've added a few new repos.

---------

Co-authored-by: Dhruv Manilawala <dhruvmanila@gmail.com>
2024-07-01 12:20:13 +00:00
renovate[bot]
aaa6cabf3a Update Rust crate dashmap to v6 (#12126)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
Co-authored-by: Micha Reiser <micha@reiser.io>
2024-07-01 08:48:26 +00:00
Micha Reiser
9a4d9072c1 Update salsa (#12132) 2024-07-01 08:33:04 +00:00
Micha Reiser
4cb6a09fc0 Use CompactString for ModuleName (#12131) 2024-07-01 10:22:34 +02:00
Micha Reiser
5109b50bb3 Use CompactString for Identifier (#12101) 2024-07-01 10:06:02 +02:00
github-actions[bot]
db6ee74cbe Sync vendored typeshed stubs (#12116) 2024-07-01 07:07:45 +01:00
Tom Kuson
d1aeadc009 [pytest] Reverse PT001 and PT0023 defaults (#12106)
## Summary

This patch inverts the defaults for
[pytest-fixture-incorrect-parentheses-style
(PT001)](https://docs.astral.sh/ruff/rules/pytest-fixture-incorrect-parentheses-style/)
and [pytest-incorrect-mark-parentheses-style
(PT003)](https://docs.astral.sh/ruff/rules/pytest-incorrect-mark-parentheses-style/)
to prefer dropping superfluous parentheses.

Presently, Ruff defaults to adding superfluous parentheses on pytest
mark and fixture decorators for documented purpose of consistency; for
example,

```diff
 import pytest


-@pytest.mark.foo
+@pytest.mark.foo()
 def test_bar(): ...
```

This behaviour is counter to the official pytest recommendation and
diverges from the flake8-pytest-style plugin as of version 2.0.0 (see
https://github.com/m-burst/flake8-pytest-style/issues/272). Seeing as
either default satisfies the documented benefit of consistency across a
codebase, it makes sense to change the behaviour to be consistent with
pytest and the flake8 plugin as well.

This change is breaking, so is gated behind preview (at least under my
understanding of Ruff versioning). The implementation of this gating
feature is a bit hacky, but seemed to be the least disruptive solution
without performing invasive surgery on the `#[option()]` macro.

Related to #8796.

### Caveat

Whilst updating the documentation, I sought to reference the pytest
recommendation to drop superfluous parentheses, but couldn't find any
official instruction beyond it being a revealed preference within the
pytest documentation code examples (as well as the linked issues from a
core pytest developer). Thus, the wording of the preference is
deliberately timid; it's to cohere with pytest rather than follow an
explicit guidance.

## Test Plan

`cargo nextest run`

I also ran

```sh
cargo run -p ruff -- check crates/ruff_linter/resources/test/fixtures/flake8_pytest_style/PT001.py --no-cache --diff --select PT001
```

and compared against it with `--preview` to verify that the default does
change under preview (I also repeated this with `echo
'[tool.ruff]\npreview = true' > pyproject.toml` to verify that it works
with a configuration file).

---------

Co-authored-by: Charlie Marsh <charlie.r.marsh@gmail.com>
2024-07-01 02:06:11 +00:00
Tom Kuson
d80a9d9ce9 [flake8-bugbear] Implement mutable-contextvar-default (B039) (#12113)
## Summary

Implement mutable-contextvar-default (B039) which was added to
flake8-bugbear in https://github.com/PyCQA/flake8-bugbear/pull/476.

This rule is similar to [mutable-argument-default
(B006)](https://docs.astral.sh/ruff/rules/mutable-argument-default) and
[function-call-in-default-argument
(B008)](https://docs.astral.sh/ruff/rules/function-call-in-default-argument),
except that it checks the `default` keyword argument to
`contextvars.ContextVar`.

```
B039.py:19:26: B039 Do not use mutable data structures for ContextVar defaults
   |
18 | # Bad
19 | ContextVar("cv", default=[])
   |                          ^^ B039
20 | ContextVar("cv", default={})
21 | ContextVar("cv", default=list())
   |
   = help: Replace with `None`; initialize with `.set()` after checking for `None`
```

In the upstream flake8-plugin, this rule is written expressly as a
corollary to B008 and shares much of its logic. Likewise, this
implementation reuses the logic of the Ruff implementation of B008,
namely


f765d19402/crates/ruff_linter/src/rules/flake8_bugbear/rules/function_call_in_argument_default.rs (L104-L106)

and 


f765d19402/crates/ruff_linter/src/rules/flake8_bugbear/rules/mutable_argument_default.rs (L106)

Thus, this rule deliberately replicates B006's and B008's heuristics.
For example, this rule assumes that all functions are mutable unless
otherwise qualified. If improvements are to be made to B039 heuristics,
they should probably be made to B006 and B008 as well (whilst trying to
match the upstream implementation).

This rule does not have an autofix as it is unknown where the ContextVar
next used (and it might not be within the same file).

Closes #12054

## Test Plan

`cargo nextest run`
2024-07-01 01:55:49 +00:00
renovate[bot]
85ede4a88c Update docker/build-push-action action to v6 (#12127) 2024-06-30 21:25:24 -04:00
renovate[bot]
d2fefc8bf3 Update NPM Development dependencies (#12122) 2024-06-30 21:21:20 -04:00
renovate[bot]
5fd3f43de1 Update pre-commit hook astral-sh/ruff-pre-commit to v0.5.0 (#12125) 2024-06-30 21:21:03 -04:00
renovate[bot]
ab372f5f48 Update Rust crate uuid to v1.9.1 (#12124) 2024-06-30 21:20:48 -04:00
renovate[bot]
6a8a7b65e9 Update Rust crate bitflags to v2.6.0 (#12123) 2024-06-30 21:20:17 -04:00
renovate[bot]
211cafc571 Update Rust crate log to v0.4.22 (#12118) 2024-06-30 21:19:50 -04:00
renovate[bot]
0b1b94567a Update Rust crate serde_with to v3.8.2 (#12121) 2024-06-30 21:19:33 -04:00
renovate[bot]
168112d343 Update Rust crate serde_json to v1.0.119 (#12120) 2024-06-30 21:19:10 -04:00
renovate[bot]
a5355084b5 Update Rust crate matchit to v0.8.3 (#12119) 2024-06-30 21:18:51 -04:00
renovate[bot]
deedb29e75 Update Rust crate clap to v4.5.8 (#12117) 2024-06-30 21:18:21 -04:00
Micha Reiser
f765d19402 Mention that Cursor is based on rustc's implementation. (#12109) 2024-06-30 16:53:25 +01:00
Gilles Peiffer
d1079680bb [pylint] Add fix for duplicate-bases (PLE0241) (#12105)
## Summary

This adds a fix for the `duplicate-bases` rule that removes the
duplicate base from the class definition.

## Test Plan

`cargo nextest run duplicate_bases`, `cargo insta review`.
2024-06-29 17:48:24 +00:00
Micha Reiser
da78de0439 Remove allcation in parse_identifier (#12103) 2024-06-29 15:00:24 +02:00
Dhruv Manilawala
47b227394e Avoid E275 if keyword is followed by a semicolon (#12095)
fixes: #12094
2024-06-28 20:51:35 +05:30
Charlie Marsh
c326778652 Make requires-python inference robust to == (#12091)
## Summary

Instead of using a high patch version, attempt to detect the
minimum-supported minor.

Closes #12088.
2024-06-28 09:38:17 -04:00
Dhruv Manilawala
434ce307a7 Revert "Use correct range to highlight line continuation error" (#12089)
This PR reverts https://github.com/astral-sh/ruff/pull/12016 with a
small change where the error location points to the continuation
character only. Earlier, it would also highlight the whitespace that
came before it.

The motivation for this change is to avoid panic in
https://github.com/astral-sh/ruff/pull/11950. For example:

```py
\)
```

Playground: https://play.ruff.rs/87711071-1b54-45a3-b45a-81a336a1ea61

The range of `Unknown` token and `Rpar` is the same. Once #11950 is
enabled, the indexer would panic. It won't panic in the stable version
because we stop at the first `Unknown` token.
2024-06-28 18:10:00 +05:30
Charlie Marsh
6a37d7a1e6 Add bandit rule changes to breaking section (#12090)
Closes https://github.com/astral-sh/ruff/issues/12086.
2024-06-28 11:41:45 +00:00
Dhruv Manilawala
0179ff97da Add standalone installer instruction to docs (#12081)
Adopted from `uv` README
(https://github.com/astral-sh/uv#getting-started), this PR adds a
section of using standalone installers in the installation section of
Ruff docs.
2024-06-28 11:34:46 +00:00
Charlie Marsh
2b54fab02c Publish docs and playground on cargo-dist release (#12079)
## Summary

These are now `post-announce-jobs`. So if they fail, the release itself
will still succeed, which seems ok. (If we make them `publish-jobs`,
then we might end up publishing to PyPI but failing the release itself
if one of these fails.)

The intent is that these are still runnable via `workflow_dispatch` too.

Closes https://github.com/astral-sh/ruff/issues/12074.
2024-06-28 07:29:04 -04:00
Micha Reiser
117ab789c9 Add more NPY201 tests (#12087) 2024-06-28 09:58:39 +02:00
Étienne BERSAC
2336c078e2 Improve Emacs configuration (#12070)
Replace black and combine `ruff check --select=I --fix` and `ruff
format`.
2024-06-28 13:09:29 +05:30
Dhruv Manilawala
9fec384d11 Show syntax errors on the playground (#12083)
## Summary

This PR updates the playground to show syntax errors.

(I forgot to update this and noticed it this morning.)

## Test Plan

Build the playground locally and preview it:

<img width="764" alt="Screenshot 2024-06-28 at 11 03 35"
src="https://github.com/astral-sh/ruff/assets/67177269/1fd48d6c-ae41-4672-bf3c-32a61d9946ef">
2024-06-28 13:06:15 +05:30
Dhruv Manilawala
526efd398a Remove E999 to find diagnostic severity (#12080)
## Summary

This PR removes the need to check for `E999` code to find the diagnostic
severity in the server.

**Note:** This is just removing a redundant check because all
`ParseErrors` are converted to `Diagnostic` with default `Error`
severity by
63c92586a1/crates/ruff_server/src/lint.rs (L309-L346)

## Test Plan

Verify that syntax errors are still shown with error severity as it did
before:

<img width="1313" alt="Screenshot 2024-06-28 at 09 30 20"
src="https://github.com/astral-sh/ruff/assets/67177269/75e389a7-01ea-461c-86a2-0dfc244e515d">
2024-06-28 09:31:35 +05:30
Jane Lewis
b28dc9ac14 Remove --preview as a required argument for ruff server (#12053)
## Summary

`ruff server` has reached a point of stabilization, and `--preview` is
no longer required as a flag.

`--preview` is still supported as a flag, since future features may be
need to gated behind it initially.

## Test Plan

A simple way to test this is to run `ruff server` from the command line.
No error about a missing `--preview` argument should be reported.
2024-06-27 19:27:15 +00:00
Mateusz Sokół
59ea94ce88 [numpy] Update NPY201 to include exception deprecations (#12065)
Hi!

This PR updates `NPY201` rule to address
https://github.com/astral-sh/ruff/issues/12034 and partially
https://github.com/numpy/numpy/issues/26800.
2024-06-27 18:56:56 +00:00
Alex Waygood
5bef2b0361 fix link to the release workflow in CONTRIBUTING.md (#12073) 2024-06-27 16:15:31 +00:00
Charlie Marsh
244b923f61 Add necessary permissions for cargo-dist Docker build (#12072) 2024-06-27 17:16:05 +02:00
Micha Reiser
a8b48fce7e Release v0.5.0 (#12068)
Co-authored-by: Alex Waygood <Alex.Waygood@Gmail.com>
2024-06-27 14:46:44 +00:00
Charlie Marsh
04c8597b8a [flake8-simplify] Stabilize detection of Yoda conditions for "constant" collections (SIM300) (#12050)
Co-authored-by: Alex Waygood <alex.waygood@gmail.com>
2024-06-27 13:44:11 +02:00
Alex Waygood
4029a25ebd [Ruff v0.5] Stabilise 15 pylint rules (#12051) 2024-06-27 13:44:11 +02:00
Micha Reiser
0917ce16f4 Update documentation to mention etcetera crate instead of dirs for user configuration discovery (#12064) 2024-06-27 13:44:11 +02:00
Dhruv Manilawala
22cebdf29b Add server config to filter out syntax error diagnostics (#12059)
## Summary

Follow-up from #11901 

This PR adds a new server setting to show / hide syntax errors.

## Test Plan

### VS Code

Using https://github.com/astral-sh/ruff-vscode/pull/504 with the
following config:

```json
{
  "ruff.nativeServer": true,
  "ruff.path": ["/Users/dhruv/work/astral/ruff/target/debug/ruff"],
  "ruff.showSyntaxErrors": true
}
```

First, set `ruff.showSyntaxErrors` to `true`:
<img width="1177" alt="Screenshot 2024-06-27 at 08 34 58"
src="https://github.com/astral-sh/ruff/assets/67177269/5d77547a-a908-4a00-8714-7c00784e8679">

And then set it to `false`:
<img width="1185" alt="Screenshot 2024-06-27 at 08 35 19"
src="https://github.com/astral-sh/ruff/assets/67177269/9720f089-f10c-420b-a2c1-2bbb2245be35">

### Neovim

Using the following Ruff server config:

```lua
require('lspconfig').ruff.setup {
  init_options = {
    settings = {
      showSyntaxErrors = false,
    },
  },
}
```

First, set `showSyntaxErrors` to `true`:
<img width="1279" alt="Screenshot 2024-06-27 at 08 28 03"
src="https://github.com/astral-sh/ruff/assets/67177269/e694e231-91ba-47f8-8e8a-ad2e82b85a45">

And then set it to `false`:
<img width="1284" alt="Screenshot 2024-06-27 at 08 28 20"
src="https://github.com/astral-sh/ruff/assets/67177269/25b86a57-02b1-44f7-9f65-cf5fdde93b0c">
2024-06-27 13:44:11 +02:00
Dhruv Manilawala
72b6c26101 Simplify LinterResult, avoid cloning ParseError (#11903)
## Summary

Follow-up to #11902

This PR simplifies the `LinterResult` struct by avoiding the generic and
not store the `ParseError`.

This is possible because the callers already have access to the
`ParseError` via the `Parsed` output. This also means that we can
simplify the return type of `check_path` and avoid the generic `T` on
`LinterResult`.

## Test Plan

`cargo insta test`
2024-06-27 13:44:11 +02:00
Dhruv Manilawala
73851e73ab Avoid displaying syntax error as log message (#11902)
## Summary

Follow-up to #11901 

This PR avoids displaying the syntax errors as log message now that the
`E999` diagnostic cannot be disabled.

For context on why this was added, refer to
https://github.com/astral-sh/ruff/pull/2505. Basically, we would allow
ignoring the syntax error diagnostic because certain syntax feature
weren't supported back then like `match` statement. And, if a user
ignored `E999`, Ruff would give no feedback if the source code contained
any syntax error. So, this log message was a way to indicate to the user
even if `E999` was disabled.

The current state of the parser is such that (a) it matches with the
latest grammar and (b) it's easy to add support for any new syntax.

**Note:** This PR doesn't remove the `DisplayParseError` struct because
it's still being used by the formatter.

## Test Plan

Update existing snapshots from the integration tests.
2024-06-27 13:44:11 +02:00
Dhruv Manilawala
e7b49694a7 Remove E999 as a rule, disallow any disablement methods for syntax error (#11901)
## Summary

This PR updates the way syntax errors are handled throughout the linter.

The main change is that it's now not considered as a rule which involves
the following changes:
* Update `Message` to be an enum with two variants - one for diagnostic
message and the other for syntax error message
* Provide methods on the new message enum to query information required
by downstream usages

This means that the syntax errors cannot be hidden / disabled via any
disablement methods. These are:
1. Configuration via `select`, `ignore`, `per-file-ignores`, and their
`extend-*` variants
	```console
$ cargo run -- check ~/playground/ruff/src/lsp.py --extend-select=E999
--no-preview --no-cache
	    Finished `dev` profile [unoptimized + debuginfo] target(s) in 0.10s
Running `target/debug/ruff check /Users/dhruv/playground/ruff/src/lsp.py
--extend-select=E999 --no-preview --no-cache`
warning: Rule `E999` is deprecated and will be removed in a future
release. Syntax errors will always be shown regardless of whether this
rule is selected or not.
/Users/dhruv/playground/ruff/src/lsp.py:1:8: F401 [*] `abc` imported but
unused
	  |
	1 | import abc
	  |        ^^^ F401
	2 | from pathlib import Path
	3 | import os
	  |
	  = help: Remove unused import: `abc`
	```
3. Command-line flags via `--select`, `--ignore`, `--per-file-ignores`,
and their `--extend-*` variants
	```console
$ cargo run -- check ~/playground/ruff/src/lsp.py --no-cache
--config=~/playground/ruff/pyproject.toml
	    Finished `dev` profile [unoptimized + debuginfo] target(s) in 0.11s
Running `target/debug/ruff check /Users/dhruv/playground/ruff/src/lsp.py
--no-cache --config=/Users/dhruv/playground/ruff/pyproject.toml`
warning: Rule `E999` is deprecated and will be removed in a future
release. Syntax errors will always be shown regardless of whether this
rule is selected or not.
/Users/dhruv/playground/ruff/src/lsp.py:1:8: F401 [*] `abc` imported but
unused
	  |
	1 | import abc
	  |        ^^^ F401
	2 | from pathlib import Path
	3 | import os
	  |
	  = help: Remove unused import: `abc`
	```

This also means that the **output format** needs to be updated:
1. The `code`, `noqa_row`, `url` fields in the JSON output is optional
(`null` for syntax errors)
2. Other formats are changed accordingly
For each format, a new test case specific to syntax errors have been
added. Please refer to the snapshot output for the exact format for
syntax error message.

The output of the `--statistics` flag will have a blank entry for syntax
errors:
```
315     F821    [ ] undefined-name
119             [ ] syntax-error
103     F811    [ ] redefined-while-unused
```

The **language server** is updated to consider the syntax errors by
convert them into LSP diagnostic format separately.

### Preview

There are no quick fixes provided to disable syntax errors. This will
automatically work for `ruff-lsp` because the `noqa_row` field will be
`null` in that case.
<img width="772" alt="Screenshot 2024-06-26 at 14 57 08"
src="https://github.com/astral-sh/ruff/assets/67177269/aaac827e-4777-4ac8-8c68-eaf9f2c36774">

Even with `noqa` comment, the syntax error is displayed:
<img width="763" alt="Screenshot 2024-06-26 at 14 59 51"
src="https://github.com/astral-sh/ruff/assets/67177269/ba1afb68-7eaf-4b44-91af-6d93246475e2">

Rule documentation page:
<img width="1371" alt="Screenshot 2024-06-26 at 16 48 07"
src="https://github.com/astral-sh/ruff/assets/67177269/524f01df-d91f-4ac0-86cc-40e76b318b24">


## Test Plan

- [x] Disablement methods via config shows a warning
	- [x] `select`, `extend-select`
	- [ ] ~`ignore`~ _doesn't show any message_
- [ ] ~`per-file-ignores`, `extend-per-file-ignores`~ _doesn't show any
message_
- [x] Disablement methods via command-line flag shows a warning
	- [x] `--select`, `--extend-select`
	- [ ] ~`--ignore`~ _doesn't show any message_
- [ ] ~`--per-file-ignores`, `--extend-per-file-ignores`~ _doesn't show
any message_
- [x] File with syntax errors should exit with code 1
- [x] Language server
	- [x] Should show diagnostics for syntax errors
	- [x] Should not recommend a quick fix edit for adding `noqa` comment
	- [x] Same for `ruff-lsp`

resolves: #8447
2024-06-27 13:44:11 +02:00
Charlie Marsh
c98d8a040f [pyflakes] Stabilize detection of is comparisons to lists, etc. (F632) (#12049)
## Summary

See: https://github.com/astral-sh/ruff/pull/8607. Rare but
uncontroversial.
2024-06-27 13:44:11 +02:00
Charlie Marsh
6f2e024cc6 [flake8-simplify] Stabilize implicit-else simplifications in needless-bool (SIM103) (#12048)
## Summary

See: https://github.com/astral-sh/ruff/pull/10414.

This is a good and intuitive change; we just put it in preview because
it expanded scope a bit.
2024-06-27 13:44:11 +02:00
Charlie Marsh
fb1d7610ac Stabilize allowance of os.environ modifications between imports (#12047)
## Summary

See: https://github.com/astral-sh/ruff/pull/10066.
2024-06-27 13:44:11 +02:00
Alex Waygood
bd845812c7 [Ruff 0.5] Stabilise 11 FURB rules (#12043) 2024-06-27 13:44:11 +02:00
Alex Waygood
c7b2f2b788 [Ruff 0.5] Stabilise manual-dict-comprehension (PERF403) (#12045) 2024-06-27 13:44:11 +02:00
Auguste Lalande
8cc96d7868 Re-code flake8-trio and flake8-async rules to match upstream (#10416)
Co-authored-by: Micha Reiser <micha@reiser.io>
2024-06-27 13:44:11 +02:00
Micha Reiser
4b3278fe0b refactor: Compile time enforcement that all top level lint options are checked for deprecation (#12037) 2024-06-27 13:44:11 +02:00
Micha Reiser
41203ea208 Remove output format text and use format full by default (#12010)
Resolves #7349
2024-06-27 13:44:11 +02:00
Alex Waygood
c0d2f439b7 Stabilise django-extra (S610) for release 0.5 (#12029)
The motivation for this rule is solid; it's been in preview for a long
time; the implementation and tests seem sound; there are no open issues
regarding it, and as far as I can tell there never have been any.

The only issue I see is that the docs don't really describe the rule
accurately right now; I fix that in this PR.
2024-06-27 13:44:11 +02:00
Charlie Marsh
b0b68a5601 Migrate release workflow to cargo-dist (#9559)
## Summary

This PR migrates our release workflow to
[`cargo-dist`](https://github.com/axodotdev/cargo-dist). The primary
motivation here is that we want to ship dedicated installers for Ruff
that work across platforms, and `cargo-dist` gives us those installers
out-of-the-box. The secondary motivation is that `cargo-dist` formalizes
some of the patterns that we've built up over time in our own release
process.

At a high level:

- The `release.yml` file is generated by `cargo-dist` with `cargo dist
generate`. It doesn't contain any modifications vis-a-vis the generated
file. (If it's edited out of band from generation, the release fails.)
- Our customizations are inserted as custom steps within the
`cargo-dist` workflow. Specifically, `build-binaries` builds the wheels
and packages them into binaries (as on `main`), while `build-docker.yml`
builds the Docker image. `publish-pypi.yml` publishes the wheels to
PyPI. This is effectively our `release.yaml` (on `main`), broken down
into individual workflows rather than steps within a single workflow.

### Changes from `main`

The workflow is _nearly_ unchanged. We kick off a release manually via
the GitHub Action by providing a tag. If the tag doesn't match the
`Cargo.toml`, the release fails. If the tag matches an already-existing
release, the release fails.

The release proceeds by (in order):

0. Doing some upfront validation via `cargo-dist`.
1. Creating the wheels and archives.
2. Building and pushing the Docker image.
3. Publishing to PyPI (if it's not a "dry run").
4. Creating the GitHub Release (if it's not a "dry run").
5. Notifying `ruff-pre-commit` (if it's not a "dry run").

There are a few changes in the workflow as compared to `main`:

- **We no longer validate the SHA** (just the tag). It's not an input to
the job. The Axo team is considering whether / how to support this.
- **Releases are now published directly** (rather than as draft). Again,
the Axo team is considering whether / how to support this. The downside
of drafts is that the URLs aren't stable, so the installers don't work
_as long as the release is in draft_. This is fine for our workflow. It
seems like the Axo team will add it.
- Releases already contain the latest entry from the changelog (we don't
need to copy it over). This "Just Works", which is nice, though we'll
still want to edit them to add contributors.

There are also a few **breaking changes** for consumers of the binaries:

- **We no longer include the version tag in the file name**. This
enables users to install via `/latest` URLs on GitHub, and is part of
the cargo-dist paradigm.
- **Archives now include an extra level of nesting,** which you can
remove with `--strip-components=1` when untarring.

Here's an example release that I created -- I omitted all the artifacts
since I was just testing a workflow, so none of the installers or links
work, but it gives you a sense for what the release looks like:
https://github.com/charliermarsh/cargodisttest/releases/tag/0.1.13.

### Test Plan

I ran a successful release to completion last night, and installed Ruff
via the installer:

![Screenshot 2024-01-17 at 12 12
53 AM](https://github.com/astral-sh/ruff/assets/1309177/a5334466-2ca3-4279-a453-e912a0805df2)

![Screenshot 2024-01-17 at 12 12
48 AM](https://github.com/astral-sh/ruff/assets/1309177/63ac969e-69a1-488c-8367-4cb783526ca7)

The piece I'm least confident about is the Docker push. We build the
image, but the push fails in my test repo since I haven't wired up the
credentials.
2024-06-27 13:44:11 +02:00
Auguste Lalande
c9a283a5ad [pycodestyle] Remove deprecated functionality from type-comparison (E721) (#11220)
## Summary

Stabilizes `E721` behavior implemented in #7905.

The functionality change in `E721` was implemented in #7905, released in
[v0.1.2](https://github.com/astral-sh/ruff/releases/tag/v0.1.2). And
seems functionally stable since #9676, without an explicit release but
would correspond to
[v0.2.0](https://github.com/astral-sh/ruff/releases/tag/v0.2.0). So the
deprecated functionally should be removable in the next minor release.

resolves: #6465
2024-06-27 13:44:11 +02:00
Alex Waygood
c54bf0c734 Stabilise rules RUF024 and RUF026 (#12026) 2024-06-27 13:44:11 +02:00
Dhruv Manilawala
1968332d93 Redirect PLR1701 to SIM101 (#12021)
## Summary

This rule removes `PLR1701` and redirects it to `SIM101`.

In addition to that, the `SIM101` autofix has been fixed to add padding
if required.

### `PLR1701` has bugs

It also seems that the implementation of `PLR1701` is incorrect in
multiple scenarios. For example, the following code snippet:
```py
# There are two _different_ variables `a` and `b`
if isinstance(a, int) or isinstance(b, bool) or isinstance(a, float):
    pass
# There's another condition `or 1`
if isinstance(self.k, int) or isinstance(self.k, float) or 1:
    pass
```
is fixed to:
```py
# Fixed to only considering variable `a`
if isinstance(a, (float, int)):
    pass
# The additional condition is not present in the fix
if isinstance(self.k, (float, int)):
    pass
```

Playground: https://play.ruff.rs/6cfbdfb7-f183-43b0-b59e-31e728b34190

## Documentation Preview

### `PLR1701`

<img width="1397" alt="Screenshot 2024-06-25 at 11 14 40"
src="https://github.com/astral-sh/ruff/assets/67177269/779ee84d-7c4d-4bb8-a3a4-c2b23a313eba">

## Test Plan

Remove the test cases for `PLR1701`, port the padding test case to
`SIM101` and update the snapshot.
2024-06-27 13:44:11 +02:00
Alex Waygood
0a24d70bfd [Ruff v0.5] Fix ZeroDivisionErrors in the ecosystem check (#12027)
Seen in CI in https://github.com/astral-sh/ruff/pull/12026
2024-06-27 13:44:11 +02:00
Charlie Marsh
a4d711f25f Modify diagnostic ranges for shell-related bandit rules (#10667)
Closes https://github.com/astral-sh/ruff/issues/9994.
2024-06-27 13:44:11 +02:00
Sergey Chudov
c46ae3a3cf Added ignoring deprecated rules for --select=ALL (#10497)
Co-authored-by: Micha Reiser <micha@reiser.io>
2024-06-27 13:44:11 +02:00
Micha Reiser
9e8a45f343 Error when using the tab-size option (#12006) 2024-06-27 13:44:11 +02:00
Micha Reiser
36a9efdb48 Remove check, --explain, --clean, --generate-shell-completion aliases (#12011) 2024-06-27 13:44:11 +02:00
T-256
d6a2cad9c2 Drop deprecated nursery rule group (#10172)
Co-authored-by: Micha Reiser <micha@reiser.io>
Resolves https://github.com/astral-sh/ruff/issues/7992
2024-06-27 13:44:11 +02:00
Charlie Marsh
117203f713 Read user configuration from ~/.config/ruff/ruff.toml on macOS (#11115)
Co-authored-by: Micha Reiser <micha@reiser.io>
Closes https://github.com/astral-sh/ruff/issues/10739.
2024-06-27 13:44:11 +02:00
renovate[bot]
12effb897c Update Rust crate unicode-width to v0.1.13 (#11194)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
Co-authored-by: Micha Reiser <micha@reiser.io>
2024-06-27 13:44:11 +02:00
Charlie Marsh
bfe36b9584 Use rule name rather than message in --statistics (#11697)
Co-authored-by: Micha Reiser <micha@reiser.io>
Closes https://github.com/astral-sh/ruff/issues/11097.
2024-06-27 13:44:11 +02:00
Tibor Reiss
b24e4473c5 Remove deprecated configuration '--show-source` (#9814)
Co-authored-by: Micha Reiser <micha@reiser.io>
Fixes parts of https://github.com/astral-sh/ruff/issues/7650
2024-06-27 13:44:11 +02:00
Dhruv Manilawala
a4688aebe9 Use TokenSource to find new location for re-lexing (#12060)
## Summary

This PR splits the re-lexing logic into two parts:
1. `TokenSource`: The token source will be responsible to find the
position the lexer needs to be moved to
2. `Lexer`: The lexer will be responsible to reduce the nesting level
and move itself to the new position if recovered from a parenthesized
context

This split makes it easy to find the new lexer position without needing
to implement the backwards lexing logic again which would need to handle
cases involving:
* Different kinds of newlines
* Line continuation character(s)
* Comments
* Whitespaces

### F-strings

This change did reveal one thing about re-lexing f-strings. Consider the
following example:
```py
f'{'
#  ^
f'foo'
```

Here, the quote as highlighted by the caret (`^`) is the start of a
string inside an f-string expression. This is unterminated string which
means the token emitted is actually `Unknown`. The parser tries to
recover from it but there's no newline token in the vector so the new
logic doesn't recover from it. The previous logic does recover because
it's looking at the raw characters instead.

The parser would be at `FStringStart` (the one for the second line) when
it calls into the re-lexing logic to recover from an unterminated
f-string on the first line. So, moving backwards the first character
encountered is a newline character but the first token encountered is an
`Unknown` token.

This is improved with #12067 

fixes: #12046 
fixes: #12036

## Test Plan

Update the snapshot and validate the changes.
2024-06-27 17:12:39 +05:30
Dhruv Manilawala
e137c824c3 Avoid consuming newline for unterminated string (#12067)
## Summary

This PR fixes the lexer logic to **not** consume the newline character
for an unterminated string literal.

Currently, the lexer would consume it to be part of the string itself
but that would be bad for recovery because then the lexer wouldn't emit
the newline token ever. This PR fixes that to avoid consuming the
newline character in that case.

This was discovered during https://github.com/astral-sh/ruff/pull/12060.

## Test Plan

Update the snapshots and validate them.
2024-06-27 17:02:48 +05:30
baggiponte
55f4812051 docs: add and formatter to CLI startup message (#12042)
Co-authored-by: Micha Reiser <micha@reiser.io>
2024-06-26 10:57:10 +00:00
Dhruv Manilawala
47c9ed07f2 Consider 2-character EOL before line continuation (#12035)
## Summary

This PR fixes a bug introduced in
https://github.com/astral-sh/ruff/pull/12008 which didn't consider the
two character newline after the line continuation character.

For example, consider the following code highlighted with whitespaces:
```py
call(foo # comment \\r\n
\r\n
def bar():\r\n
....pass\r\n
```
The lexer is at `def` when it's running the re-lexing logic and trying
to move back to a newline character. It encounters `\n` and it's being
escaped (incorrect) but `\r` is being escaped, so it moves the lexer to
`\n` character. This creates an overlap in token ranges which causes the
panic.

```
Name 0..4
Lpar 4..5
Name 5..8
Comment 9..20
NonLogicalNewline 20..22 <-- overlap between
Newline 21..22           <-- these two tokens
NonLogicalNewline 22..23
Def 23..26
...
```

fixes: #12028 

## Test Plan

Add a test case with line continuation and windows style newline
character.
2024-06-26 14:00:48 +05:30
Dhruv Manilawala
7cb2619ef5 Add syntax error for empty type parameter list (#12030)
## Summary

(I'm pretty sure I added this in the parser re-write but must've got
lost in the rebase?)

This PR raises a syntax error if the type parameter list is empty.

As per the grammar, there should be at least one type parameter:
```
type_params: 
    | invalid_type_params
    | '[' type_param_seq ']' 

type_param_seq: ','.type_param+ [','] 
```

Verified via the builtin `ast` module as well:
```console    
$ python3.13 -m ast parser/_.py
Traceback (most recent call last):
  [..]
  File "parser/_.py", line 1
    def foo[]():
            ^
SyntaxError: Type parameter list cannot be empty
```

## Test Plan

Add inline test cases and update the snapshots.
2024-06-26 08:10:35 +05:30
Charlie Marsh
83fe44728b Match import name ignores against both name and alias (#12033)
## Summary

Right now, it's inconsistent... We sometimes match against the name, and
sometimes against the alias (`asname`). I could see a case for always
matching against the name, but matching against both seems fine too,
since the rule is really about the combination of the two?

Closes https://github.com/astral-sh/ruff/issues/12031.
2024-06-25 18:47:19 -04:00
Alex Waygood
00e456ead4 Fix RUF027 false positives if gettext is imported using an alias (#12025) 2024-06-25 19:10:25 +01:00
Dhruv Manilawala
2853751344 Avoid E203 for f-string debug expression (#12024)
## Summary

This PR fixes a bug where Ruff would raise `E203` for f-string debug
expression. This isn't valid because whitespaces are important for debug
expressions.

fixes: #12023

## Test Plan

Add test case and make sure there are no snapshot changes.
2024-06-25 15:00:31 +05:30
Dhruv Manilawala
7109214b57 Update parser tests to validate token ranges (#12019)
## Summary

This PR updates the parser test infrastructure to validate the token
ranges.

From the code documentation:
```
/// Verifies that:
/// * the ranges are strictly increasing when loop the tokens in insertion order
/// * all ranges are within the length of the source code
```

Follow-up from #12016 and #12017
resolves: #11938

## Test Plan

Make sure that there are no failures.
2024-06-25 08:14:28 +00:00
Dhruv Manilawala
d930e97212 Do not include newline for unterminated string range (#12017)
## Summary

This PR updates the unterminated string error range to not include the
final newline character.

This is a follow-up to #12016 and required for #12019

This is not done for when the unterminated string goes till the end of
file (not a newline character). The unterminated f-string range is
correct.

### Why is this required for #12019 ?

Because otherwise the token ranges will overlap. For example:
```py
f"{"
f"{foo!r"
```

Here, the re-lexing logic recovers from an unterminated f-string and
thus emitting a `Newline` token for the one at the end of the first
line. But, currently the `Unknown` and the `Newline` token would overlap
because the `Unknown` token (unterminated string literal) range would
include the newline character.

## Test Plan

Update and validate the snapshot.
2024-06-25 08:10:07 +00:00
Dhruv Manilawala
9c1b6ec411 Use correct range to highlight line continuation error (#12016)
## Summary

This PR fixes the range highlighted for the line continuation error.

Previously, it would highlight an incorrect range:
```
1 | call(a, b, \\\
  |           ^^ Syntax Error: unexpected character after line continuation character
2 | 
3 | def bar():
  |
```

And now:
```
  |
1 | call(a, b, \\\
  |             ^ Syntax Error: unexpected character after line continuation character
2 | 
3 | def bar():
  |
```

This is implemented by avoiding to update the token range for the
`Unknown` token which is emitted when there's a lexical error. Instead,
the `push_error` helper method will be responsible to update the range
to the error location.

This actually becomes a requirement which can be seen in follow-up PRs.

## Test Plan

Update and validate the snapshot.
2024-06-25 13:35:24 +05:30
Micha Reiser
692309ebd7 [red-knot] Fix tests in release builds (#12022) 2024-06-25 06:34:35 +00:00
Dhruv Manilawala
68a8978454 Consider line continuation character for re-lexing (#12008)
## Summary

This PR fixes a bug where the re-lexing logic didn't consider the line
continuation character being present before the newline character. This
meant that the lexer was being moved back to the newline character which
is actually ignored via `\`.

Considering the following code:
```py
f'middle {'string':\
        'format spec'}

```

The old token stream is:
```
...
Colon 18..19
FStringMiddle 19..29 (flags = F_STRING)
Newline 20..21
Indent 21..29
String 29..42
Rbrace 42..43
...
```

Notice how the ranges are overlapping between the `FStringMiddle` token
and the tokens emitted after moving the lexer backwards.

After this fix, the new token stream which is without moving the lexer
backwards in this scenario:
```
FStringStart 0..2 (flags = F_STRING)
FStringMiddle 2..9 (flags = F_STRING)
Lbrace 9..10
String 10..18
Colon 18..19
FStringMiddle 19..29 (flags = F_STRING)
FStringEnd 29..30 (flags = F_STRING)
Name 30..36
Name 37..41
Unknown 41..44
Newline 44..45
```

fixes: #12004 

## Test Plan

Add test cases and update the snapshots.
2024-06-25 02:13:54 +00:00
Alex Waygood
cd2af3be73 [red-knot] Reduce allocations when normalizing VendoredPaths (#11992) 2024-06-24 13:08:01 +01:00
Micha Reiser
e2e98d005c Fix missing related settings header (#12013) 2024-06-24 12:29:10 +02:00
renovate[bot]
32ccc38365 Update NPM Development dependencies (#11999) 2024-06-23 20:50:01 -04:00
renovate[bot]
35151080b1 Update pre-commit dependencies (#11998) 2024-06-23 20:49:55 -04:00
renovate[bot]
53a80a5c11 Update Rust crate rustc-hash to v2 (#12001) 2024-06-23 20:46:42 -04:00
renovate[bot]
446ad0ba44 Update docker/build-push-action action to v6 (#12002) 2024-06-24 00:29:47 +00:00
renovate[bot]
d897811f00 Update Rust crate mimalloc to v0.1.43 (#11993) 2024-06-23 20:29:02 -04:00
renovate[bot]
5d6b26ed33 Update dependency monaco-editor to ^0.50.0 (#12000) 2024-06-23 20:28:52 -04:00
renovate[bot]
49e5357dac Update Rust crate syn to v2.0.68 (#11996) 2024-06-24 00:21:36 +00:00
renovate[bot]
e02c44e46c Update Rust crate url to v2.5.2 (#11997) 2024-06-24 00:21:23 +00:00
renovate[bot]
86cbf2d594 Update Rust crate strum to v0.26.3 (#11995) 2024-06-24 00:19:51 +00:00
renovate[bot]
b79f1ed7f5 Update Rust crate proc-macro2 to v1.0.86 (#11994) 2024-06-24 00:19:06 +00:00
ukyen
068b75cc8e [pyflakes] Detect assignments that shadow definitions (F811) (#11961)
## Summary
This PR updates `F811` rule to include assignment as possible shadowed
binding. This will fix issue: #11828 .

## Test Plan

Add a test file, F811_30.py, which includes a redefinition after an
assignment and a verified snapshot file.
2024-06-23 13:29:32 -04:00
Denny Wong
c3f61a012e [ruff] Add assert-with-print-expression rule (#11974) (#11981)
## Summary

Addresses #11974 to add a `RUF` rule to replace `print` expressions in
`assert` statements with the inner message.

An autofix is available, but is considered unsafe as it changes
behaviour of the execution, notably:
- removal of the printout in `stdout`, and
- `AssertionError` instance containing a different message.

While the detection of the condition is a straightforward matter,
deciding how to resolve the print arguments into a string literal can be
a relatively subjective matter. The implementation of this PR chooses to
be as tolerant as possible, and will attempt to reformat any number of
`print` arguments containing single or concatenated strings or variables
into either a string literal, or a f-string if any variables or
placeholders are detected.

## Test Plan

`cargo test`.

## Examples
For ease of discussion, this is the diff for the tests:

```diff
 # Standard Case
 # Expects:
 # - single StringLiteral
-assert True, print("This print is not intentional.")
+assert True, "This print is not intentional."
 
 # Concatenated string literals
 # Expects:
 # - single StringLiteral
-assert True, print("This print" " is not intentional.")
+assert True, "This print is not intentional."
 
 # Positional arguments, string literals
 # Expects:
 # - single StringLiteral concatenated with " "
-assert True, print("This print", "is not intentional")
+assert True, "This print is not intentional"
 
 # Concatenated string literals combined with Positional arguments
 # Expects:
 # - single stringliteral concatenated with " " only between `print` and `is`
-assert True, print("This " "print", "is not intentional.")
+assert True, "This print is not intentional."
 
 # Positional arguments, string literals with a variable
 # Expects:
 # - single FString concatenated with " "
-assert True, print("This", print.__name__, "is not intentional.")
+assert True, f"This {print.__name__} is not intentional."

 # Mixed brackets string literals
 # Expects:
 # - single StringLiteral concatenated with " "
-assert True, print("This print", 'is not intentional', """and should be removed""")
+assert True, "This print is not intentional and should be removed"
 
 # Mixed brackets with other brackets inside
 # Expects:
 # - single StringLiteral concatenated with " " and escaped brackets
-assert True, print("This print", 'is not "intentional"', """and "should" be 'removed'""")
+assert True, "This print is not \"intentional\" and \"should\" be 'removed'"
 
 # Positional arguments, string literals with a separator
 # Expects:
 # - single StringLiteral concatenated with "|"
-assert True, print("This print", "is not intentional", sep="|")
+assert True, "This print|is not intentional"
 
 # Positional arguments, string literals with None as separator
 # Expects:
 # - single StringLiteral concatenated with " "
-assert True, print("This print", "is not intentional", sep=None)
+assert True, "This print is not intentional"
 
 # Positional arguments, string literals with variable as separator, needs f-string
 # Expects:
 # - single FString concatenated with "{U00A0}"
-assert True, print("This print", "is not intentional", sep=U00A0)
+assert True, f"This print{U00A0}is not intentional"
 
 # Unnecessary f-string
 # Expects:
 # - single StringLiteral
-assert True, print(f"This f-string is just a literal.")
+assert True, "This f-string is just a literal."
 
 # Positional arguments, string literals and f-strings
 # Expects:
 # - single FString concatenated with " "
-assert True, print("This print", f"is not {'intentional':s}")
+assert True, f"This print is not {'intentional':s}"
 
 # Positional arguments, string literals and f-strings with a separator
 # Expects:
 # - single FString concatenated with "|"
-assert True, print("This print", f"is not {'intentional':s}", sep="|")
+assert True, f"This print|is not {'intentional':s}"
 
 # A single f-string
 # Expects:
 # - single FString
-assert True, print(f"This print is not {'intentional':s}")
+assert True, f"This print is not {'intentional':s}"
 
 # A single f-string with a redundant separator
 # Expects:
 # - single FString
-assert True, print(f"This print is not {'intentional':s}", sep="|")
+assert True, f"This print is not {'intentional':s}"
 
 # Complex f-string with variable as separator
 # Expects:
 # - single FString concatenated with "{U00A0}", all placeholders preserved
 condition = "True is True"
 maintainer = "John Doe"
-assert True, print("Unreachable due to", condition, f", ask {maintainer} for advice", sep=U00A0)
+assert True, f"Unreachable due to{U00A0}{condition}{U00A0}, ask {maintainer} for advice"
 
 # Empty print
 # Expects:
 # - `msg` entirely removed from assertion
-assert True, print()
+assert True
 
 # Empty print with separator
 # Expects:
 # - `msg` entirely removed from assertion
-assert True, print(sep=" ")
+assert True
 
 # Custom print function that actually returns a string
 # Expects:
@@ -100,4 +100,4 @@
 # Use of `builtins.print`
 # Expects:
 # - single StringLiteral
-assert True, builtins.print("This print should be removed.")
+assert True, "This print should be removed."
```

## Known Issues

The current implementation resolves all arguments and separators of the
`print` expression into a single string, be it
`StringLiteralValue::single` or a `FStringValue::single`. This:

- potentially joins together strings well beyond the ideal character
limit for each line, and
- does not preserve multi-line strings in their original format, in
favour of a single line `"...\n...\n..."` format.

These are purely formatting issues only occurring in unusual scenarios.

Additionally, the autofix will tolerate `print` calls that were
previously invalid:

```python
assert True, print("this", "should not be allowed", sep=42)
```

This will be transformed into
```python
assert True, f"this{42}should not be allowed"
```
which some could argue is an alteration of behaviour.

---------

Co-authored-by: Charlie Marsh <charlie.r.marsh@gmail.com>
2024-06-23 16:54:55 +00:00
Gilles Peiffer
0c8b5eb17a Clarify special control flow parameters for PLR0917: too-many-positional (#11978) 2024-06-23 11:16:09 -04:00
Alex Waygood
375d2c87b2 [red-knot] Simplify conversions from std::path::Path to VendoredPath(Buf) (#11988) 2024-06-23 15:52:26 +01:00
Alex Waygood
f846fc9e07 [red-knot] Once again, add more tests asserting that the VendoredFileSystem and the VERSIONS parser work with the vendored typeshed stubs (#11987) 2024-06-23 14:57:43 +01:00
Alex Waygood
92b145e56a [red-knot] Manually implement Debug for VendoredFileSystem (#11983) 2024-06-23 14:25:56 +01:00
Eric Nielsen
715609663a Update PEP reference in future_rewritable_type_annotation.rs (#11985)
<!--
Thank you for contributing to Ruff! To help us out with reviewing,
please consider the following:

- Does this pull request include a summary of the change? (See below.)
- Does this pull request include a descriptive title?
- Does this pull request include references to any relevant issues?
-->

## Summary

Documentation mentions:

> PEP 563 enabled the use of a number of convenient type annotations,
such as `list[str]` instead of `List[str]`

but it meant [PEP 585](https://peps.python.org/pep-0585/) instead.

[PEP 563](https://peps.python.org/pep-0563/) is the one defining `from
__future__ import annotations`.

## Test Plan

No automated test required, just verify that
https://peps.python.org/pep-0585/ is the correct reference.
2024-06-22 20:15:12 -05:00
Micha Reiser
519a278899 [red-knot] Remove itertools dependency from ruff_db (#11984) 2024-06-22 18:37:51 +00:00
Alex Waygood
91d091bb81 [red-knot] Use POSIX representations of paths when creating the typeshed zip file (#11982) 2024-06-22 17:54:19 +01:00
Rune Lausen
79d72e6479 docs(integrations): fix link to python-lsp-server (#11980)
Co-authored-by: Rune Lausen <rune@lausennet.dk>
2024-06-22 13:17:50 +01:00
Dhruv Manilawala
81160320de Manual impl of Debug on Token (#11958)
## Summary

I look at the token stream a lot, not specifically in the playground but
in the terminal output and it's annoying to scroll a lot to find
specific location. Most of the information is also redundant.

The final format we end up with is: `<kind> <range> (flags = ...)` e.g.,
`String 0..4 (flags = BYTE_STRING)` where the flags part is only
populated if there are any flags set.
2024-06-22 04:18:24 +00:00
R1kaB3rN
b1e7bf76da Add Open Wine Components to "Who's Using Ruff?" (#11976) 2024-06-21 19:59:40 +00:00
Jane Lewis
ad4a88657b Remove usage of std::path::absolute from snapshot test (#11973) 2024-06-21 20:21:12 +01:00
Alex Waygood
611f4e5c5f Revert "[red-knot] Add more tests asserting that the VendoredFileSystem and the VERSIONS parser work with the vendored typeshed stubs" (#11975) 2024-06-21 19:14:24 +00:00
Jane Lewis
791f6a1820 ruff server: Closing an untitled, unsaved notebook document no longer throws an error (#11942)
## Summary

Fixes #11651.
Fixes #11851.

We were double-closing a notebook document from the index, once in
`textDocument/didClose` and then in the `notebookDocument/didClose`
handler. The second time this happens, taking a snapshot fails.

I've rewritten how we handle snapshots for closing notebooks / notebook
cells so that any failure is simply logged instead of propagating
upwards. This implementation works consistently even if we don't receive
`textDocument/didClose` notifications for each specific cell, since they
get closed (and the diagnostics get cleared) in the notebook document
removal process.

## Test Plan

1. Open an untitled, unsaved notebook with the `Create: New Jupyter
Notebook` command from the VS Code command palette (`Ctrl/Cmd + Shift +
P`)
2. Without saving the document, close it.
3. No error popup should appear.
4. Run the debug command (`Ruff: print debug information`) to confirm
that there are no open documents
2024-06-21 10:53:30 -07:00
Alex Waygood
3d0230f469 [red-knot] Add more tests asserting that the VendoredFileSystem and the VERSIONS parser work with the vendored typeshed stubs (#11970) 2024-06-21 16:53:10 +00:00
Alex Waygood
da79bac33c [red-knot] Make the VERSIONS parser use ModuleName as its key type (#11968) 2024-06-21 15:46:45 +00:00
Alex Waygood
8de0cd6565 [red-knot] Move typeshed VERSIONS parser to the module resolver crate (#11967) 2024-06-21 16:41:08 +01:00
Alex Waygood
3277d031f8 [red-knot] Move the vendored typeshed stubs to the module resolver crate (#11966) 2024-06-21 13:47:54 +00:00
Alex Waygood
736a4ead14 [red-knot] Move module-resolution logic to its own crate (#11964) 2024-06-21 13:25:44 +00:00
Dhruv Manilawala
27ebff36ec Remove Token::is_trivia method (#11962)
Sorry, a leftover from my rebase
2024-06-21 10:24:42 +00:00
Dhruv Manilawala
96da136e6a Move token and error structs into related modules (#11957)
## Summary

This PR does some housekeeping into moving certain structs into related
modules. Specifically,
1. Move `LexicalError` from `lexer.rs` to `error.rs` which also contains
the `ParseError`
2. Move `Token`, `TokenFlags` and `TokenValue` from `lexer.rs` to
`token.rs`
2024-06-21 10:07:19 +00:00
Dhruv Manilawala
4667d8697c Remove duplication around is_trivia functions (#11956)
## Summary

This PR removes the duplication around `is_trivia` functions.

There are two of them in the codebase:
1. In `pycodestyle`, it's for newline, indent, dedent, non-logical
newline and comment
2. In the parser, it's for non-logical newline and comment

The `TokenKind::is_trivia` method used (1) but that's not correct in
that context. So, this PR introduces a new `is_non_logical_token` helper
method for the `pycodestyle` crate and updates the
`TokenKind::is_trivia` implementation with (2).

This also means we can remove `Token::is_trivia` method and the
standalone `token_source::is_trivia` function and use the one on
`TokenKind`.

## Test Plan

`cargo insta test`
2024-06-21 10:02:40 +00:00
Will Yardley
690e94f4fb ruff-check: update docs for fix_only (#11959) 2024-06-21 08:13:04 +02:00
dedebenui
9fd84e63bc Update trapz and in1d deprecation for NPY201 (#11948) 2024-06-21 08:08:00 +02:00
Jane Lewis
3ab7a8da73 Add Jupyter Notebook document change snapshot test (#11944)
## Summary

Closes #11914.

This PR introduces a snapshot test that replays the LSP requests made
during a document formatting request, and confirms that the notebook
document is updated in the expected way.
2024-06-21 05:29:27 +00:00
Micha Reiser
927069c12f [red-knot] Upgrade to Salsa 3.0 (#11952) 2024-06-20 20:19:16 +01:00
Jane Lewis
c8ff89c73c ruff server: Support the usage of tildes and environment variables in logFile (#11945)
## Summary

Fixes #11911.

`shellexpand` is now used on `logFile` to expand the file path, allowing
the usage of `~` and environment variables.

## Test Plan

1. Set `logFile` in either Neovim or Helix to a file path that needs
expansion, like `~/.config/helix/ruff_logs.txt`.
2. Ensure that `RUFF_TRACE` is set to `messages` or `verbose`
3. Open a Python file in Neovim/Helix
4. Confirm that a file at the path specified was created, with the
expected logs.
2024-06-20 18:51:46 +00:00
Dhruv Manilawala
4c05d7a6d4 Provide link on how to re-run all failed jobs (#11954) 2024-06-20 23:19:43 +05:30
2277 changed files with 64450 additions and 33719 deletions

1
.gitattributes vendored
View File

@@ -8,6 +8,7 @@ crates/ruff_linter/resources/test/fixtures/pycodestyle/W391_3.py text eol=crlf
crates/ruff_python_formatter/resources/test/fixtures/ruff/docstring_code_examples_crlf.py text eol=crlf
crates/ruff_python_formatter/tests/snapshots/format@docstring_code_examples_crlf.py.snap text eol=crlf
crates/ruff_python_parser/resources/invalid/re_lexing/line_continuation_windows_eol.py text eol=crlf
crates/ruff_python_parser/resources/invalid/re_lex_logical_token_windows_eol.py text eol=crlf
crates/ruff_python_parser/resources/invalid/re_lex_logical_token_mac_eol.py text eol=cr

3
.github/CODEOWNERS vendored
View File

@@ -17,4 +17,5 @@
/scripts/fuzz-parser/ @AlexWaygood
# red-knot
/crates/red_knot/ @carljm @MichaReiser
/crates/red_knot* @carljm @MichaReiser @AlexWaygood
/crates/ruff_db/ @carljm @MichaReiser @AlexWaygood

View File

@@ -8,7 +8,7 @@
semanticCommits: "disabled",
separateMajorMinor: false,
prHourlyLimit: 10,
enabledManagers: ["github-actions", "pre-commit", "cargo", "pep621", "npm"],
enabledManagers: ["github-actions", "pre-commit", "cargo", "pep621", "pip_requirements", "npm"],
cargo: {
// See https://docs.renovatebot.com/configuration-options/#rangestrategy
rangeStrategy: "update-lockfile",
@@ -16,6 +16,9 @@
pep621: {
fileMatch: ["^(python|scripts)/.*pyproject\\.toml$"],
},
pip_requirements: {
fileMatch: ["^docs/requirements.*\\.txt$"],
},
npm: {
fileMatch: ["^playground/.*package\\.json$"],
},
@@ -48,6 +51,14 @@
matchManagers: ["cargo"],
enabled: false,
},
{
// `mkdocs-material` requires a manual update to keep the version in sync
// with `mkdocs-material-insider`.
// See: https://squidfunk.github.io/mkdocs-material/insiders/upgrade/
matchManagers: ["pip_requirements"],
matchPackagePatterns: ["mkdocs-material"],
enabled: false,
},
{
groupName: "pre-commit dependencies",
matchManagers: ["pre-commit"],

View File

@@ -1,21 +1,23 @@
name: "[ruff] Release"
# Build ruff on all platforms.
#
# Generates both wheels (for PyPI) and archived binaries (for GitHub releases).
#
# Assumed to run as a subworkflow of .github/workflows/release.yml; specifically, as a local
# artifacts job within `cargo-dist`.
name: "Build binaries"
on:
workflow_dispatch:
workflow_call:
inputs:
tag:
description: "The version to tag, without the leading 'v'. If omitted, will initiate a dry run (no uploads)."
type: string
sha:
description: "The full sha of the commit to be released. If omitted, the latest commit on the default branch will be used."
default: ""
plan:
required: true
type: string
pull_request:
paths:
# When we change pyproject.toml, we want to ensure that the maturin builds still work
# When we change pyproject.toml, we want to ensure that the maturin builds still work.
- pyproject.toml
# And when we change this workflow itself...
- .github/workflows/release.yaml
- .github/workflows/build-binaries.yml
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
@@ -23,6 +25,7 @@ concurrency:
env:
PACKAGE_NAME: ruff
MODULE_NAME: ruff
PYTHON_VERSION: "3.11"
CARGO_INCREMENTAL: 0
CARGO_NET_RETRY: 10
@@ -31,11 +34,12 @@ env:
jobs:
sdist:
if: ${{ !contains(github.event.pull_request.labels.*.name, 'no-build') }}
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
ref: ${{ inputs.sha }}
submodules: recursive
- uses: actions/setup-python@v5
with:
python-version: ${{ env.PYTHON_VERSION }}
@@ -49,8 +53,8 @@ jobs:
- name: "Test sdist"
run: |
pip install dist/${{ env.PACKAGE_NAME }}-*.tar.gz --force-reinstall
ruff --help
python -m ruff --help
${{ env.MODULE_NAME }} --help
python -m ${{ env.MODULE_NAME }} --help
- name: "Upload sdist"
uses: actions/upload-artifact@v4
with:
@@ -58,11 +62,12 @@ jobs:
path: dist
macos-x86_64:
if: ${{ !contains(github.event.pull_request.labels.*.name, 'no-build') }}
runs-on: macos-12
steps:
- uses: actions/checkout@v4
with:
ref: ${{ inputs.sha }}
submodules: recursive
- uses: actions/setup-python@v5
with:
python-version: ${{ env.PYTHON_VERSION }}
@@ -74,11 +79,6 @@ jobs:
with:
target: x86_64
args: --release --locked --out dist
- name: "Test wheel - x86_64"
run: |
pip install dist/${{ env.PACKAGE_NAME }}-*.whl --force-reinstall
ruff --help
python -m ruff --help
- name: "Upload wheels"
uses: actions/upload-artifact@v4
with:
@@ -86,23 +86,29 @@ jobs:
path: dist
- name: "Archive binary"
run: |
ARCHIVE_FILE=ruff-${{ inputs.tag }}-x86_64-apple-darwin.tar.gz
tar czvf $ARCHIVE_FILE -C target/x86_64-apple-darwin/release ruff
TARGET=x86_64-apple-darwin
ARCHIVE_NAME=ruff-$TARGET
ARCHIVE_FILE=$ARCHIVE_NAME.tar.gz
mkdir -p $ARCHIVE_NAME
cp target/$TARGET/release/ruff $ARCHIVE_NAME/ruff
tar czvf $ARCHIVE_FILE $ARCHIVE_NAME
shasum -a 256 $ARCHIVE_FILE > $ARCHIVE_FILE.sha256
- name: "Upload binary"
uses: actions/upload-artifact@v4
with:
name: binaries-macos-x86_64
name: artifacts-macos-x86_64
path: |
*.tar.gz
*.sha256
macos-aarch64:
if: ${{ !contains(github.event.pull_request.labels.*.name, 'no-build') }}
runs-on: macos-14
steps:
- uses: actions/checkout@v4
with:
ref: ${{ inputs.sha }}
submodules: recursive
- uses: actions/setup-python@v5
with:
python-version: ${{ env.PYTHON_VERSION }}
@@ -126,18 +132,24 @@ jobs:
path: dist
- name: "Archive binary"
run: |
ARCHIVE_FILE=ruff-${{ inputs.tag }}-aarch64-apple-darwin.tar.gz
tar czvf $ARCHIVE_FILE -C target/aarch64-apple-darwin/release ruff
TARGET=aarch64-apple-darwin
ARCHIVE_NAME=ruff-$TARGET
ARCHIVE_FILE=$ARCHIVE_NAME.tar.gz
mkdir -p $ARCHIVE_NAME
cp target/$TARGET/release/ruff $ARCHIVE_NAME/ruff
tar czvf $ARCHIVE_FILE $ARCHIVE_NAME
shasum -a 256 $ARCHIVE_FILE > $ARCHIVE_FILE.sha256
- name: "Upload binary"
uses: actions/upload-artifact@v4
with:
name: binaries-aarch64-apple-darwin
name: artifacts-aarch64-apple-darwin
path: |
*.tar.gz
*.sha256
windows:
if: ${{ !contains(github.event.pull_request.labels.*.name, 'no-build') }}
runs-on: windows-latest
strategy:
matrix:
@@ -151,7 +163,7 @@ jobs:
steps:
- uses: actions/checkout@v4
with:
ref: ${{ inputs.sha }}
submodules: recursive
- uses: actions/setup-python@v5
with:
python-version: ${{ env.PYTHON_VERSION }}
@@ -171,8 +183,8 @@ jobs:
shell: bash
run: |
python -m pip install dist/${{ env.PACKAGE_NAME }}-*.whl --force-reinstall
ruff --help
python -m ruff --help
${{ env.MODULE_NAME }} --help
python -m ${{ env.MODULE_NAME }} --help
- name: "Upload wheels"
uses: actions/upload-artifact@v4
with:
@@ -181,18 +193,19 @@ jobs:
- name: "Archive binary"
shell: bash
run: |
ARCHIVE_FILE=ruff-${{ inputs.tag }}-${{ matrix.platform.target }}.zip
ARCHIVE_FILE=ruff-${{ matrix.platform.target }}.zip
7z a $ARCHIVE_FILE ./target/${{ matrix.platform.target }}/release/ruff.exe
sha256sum $ARCHIVE_FILE > $ARCHIVE_FILE.sha256
- name: "Upload binary"
uses: actions/upload-artifact@v4
with:
name: binaries-${{ matrix.platform.target }}
name: artifacts-${{ matrix.platform.target }}
path: |
*.zip
*.sha256
linux:
if: ${{ !contains(github.event.pull_request.labels.*.name, 'no-build') }}
runs-on: ubuntu-latest
strategy:
matrix:
@@ -202,7 +215,7 @@ jobs:
steps:
- uses: actions/checkout@v4
with:
ref: ${{ inputs.sha }}
submodules: recursive
- uses: actions/setup-python@v5
with:
python-version: ${{ env.PYTHON_VERSION }}
@@ -219,27 +232,36 @@ jobs:
if: ${{ startsWith(matrix.target, 'x86_64') }}
run: |
pip install dist/${{ env.PACKAGE_NAME }}-*.whl --force-reinstall
ruff --help
python -m ruff --help
${{ env.MODULE_NAME }} --help
python -m ${{ env.MODULE_NAME }} --help
- name: "Upload wheels"
uses: actions/upload-artifact@v4
with:
name: wheels-${{ matrix.target }}
path: dist
- name: "Archive binary"
shell: bash
run: |
ARCHIVE_FILE=ruff-${{ inputs.tag }}-${{ matrix.target }}.tar.gz
tar czvf $ARCHIVE_FILE -C target/${{ matrix.target }}/release ruff
set -euo pipefail
TARGET=${{ matrix.target }}
ARCHIVE_NAME=ruff-$TARGET
ARCHIVE_FILE=$ARCHIVE_NAME.tar.gz
mkdir -p $ARCHIVE_NAME
cp target/$TARGET/release/ruff $ARCHIVE_NAME/ruff
tar czvf $ARCHIVE_FILE $ARCHIVE_NAME
shasum -a 256 $ARCHIVE_FILE > $ARCHIVE_FILE.sha256
- name: "Upload binary"
uses: actions/upload-artifact@v4
with:
name: binaries-${{ matrix.target }}
name: artifacts-${{ matrix.target }}
path: |
*.tar.gz
*.sha256
linux-cross:
if: ${{ !contains(github.event.pull_request.labels.*.name, 'no-build') }}
runs-on: ubuntu-latest
strategy:
matrix:
@@ -261,11 +283,13 @@ jobs:
arch: ppc64
# see https://github.com/astral-sh/ruff/issues/10073
maturin_docker_options: -e JEMALLOC_SYS_WITH_LG_PAGE=16
- target: arm-unknown-linux-musleabihf
arch: arm
steps:
- uses: actions/checkout@v4
with:
ref: ${{ inputs.sha }}
submodules: recursive
- uses: actions/setup-python@v5
with:
python-version: ${{ env.PYTHON_VERSION }}
@@ -282,8 +306,8 @@ jobs:
if: matrix.platform.arch != 'ppc64'
name: Test wheel
with:
arch: ${{ matrix.platform.arch }}
distro: ubuntu20.04
arch: ${{ matrix.platform.arch == 'arm' && 'armv6' || matrix.platform.arch }}
distro: ${{ matrix.platform.arch == 'arm' && 'bullseye' || 'ubuntu20.04' }}
githubToken: ${{ github.token }}
install: |
apt-get update
@@ -298,19 +322,28 @@ jobs:
name: wheels-${{ matrix.platform.target }}
path: dist
- name: "Archive binary"
shell: bash
run: |
ARCHIVE_FILE=ruff-${{ inputs.tag }}-${{ matrix.platform.target }}.tar.gz
tar czvf $ARCHIVE_FILE -C target/${{ matrix.platform.target }}/release ruff
set -euo pipefail
TARGET=${{ matrix.platform.target }}
ARCHIVE_NAME=ruff-$TARGET
ARCHIVE_FILE=$ARCHIVE_NAME.tar.gz
mkdir -p $ARCHIVE_NAME
cp target/$TARGET/release/ruff $ARCHIVE_NAME/ruff
tar czvf $ARCHIVE_FILE $ARCHIVE_NAME
shasum -a 256 $ARCHIVE_FILE > $ARCHIVE_FILE.sha256
- name: "Upload binary"
uses: actions/upload-artifact@v4
with:
name: binaries-${{ matrix.platform.target }}
name: artifacts-${{ matrix.platform.target }}
path: |
*.tar.gz
*.sha256
musllinux:
if: ${{ !contains(github.event.pull_request.labels.*.name, 'no-build') }}
runs-on: ubuntu-latest
strategy:
matrix:
@@ -320,7 +353,7 @@ jobs:
steps:
- uses: actions/checkout@v4
with:
ref: ${{ inputs.sha }}
submodules: recursive
- uses: actions/setup-python@v5
with:
python-version: ${{ env.PYTHON_VERSION }}
@@ -343,26 +376,35 @@ jobs:
apk add python3
python -m venv .venv
.venv/bin/pip3 install ${{ env.PACKAGE_NAME }} --no-index --find-links dist/ --force-reinstall
.venv/bin/ruff check --help
.venv/bin/${{ env.MODULE_NAME }} --help
- name: "Upload wheels"
uses: actions/upload-artifact@v4
with:
name: wheels-${{ matrix.target }}
path: dist
- name: "Archive binary"
shell: bash
run: |
ARCHIVE_FILE=ruff-${{ inputs.tag }}-${{ matrix.target }}.tar.gz
tar czvf $ARCHIVE_FILE -C target/${{ matrix.target }}/release ruff
set -euo pipefail
TARGET=${{ matrix.target }}
ARCHIVE_NAME=ruff-$TARGET
ARCHIVE_FILE=$ARCHIVE_NAME.tar.gz
mkdir -p $ARCHIVE_NAME
cp target/$TARGET/release/ruff $ARCHIVE_NAME/ruff
tar czvf $ARCHIVE_FILE $ARCHIVE_NAME
shasum -a 256 $ARCHIVE_FILE > $ARCHIVE_FILE.sha256
- name: "Upload binary"
uses: actions/upload-artifact@v4
with:
name: binaries-${{ matrix.target }}
name: artifacts-${{ matrix.target }}
path: |
*.tar.gz
*.sha256
musllinux-cross:
if: ${{ !contains(github.event.pull_request.labels.*.name, 'no-build') }}
runs-on: ubuntu-latest
strategy:
matrix:
@@ -376,7 +418,7 @@ jobs:
steps:
- uses: actions/checkout@v4
with:
ref: ${{ inputs.sha }}
submodules: recursive
- uses: actions/setup-python@v5
with:
python-version: ${{ env.PYTHON_VERSION }}
@@ -400,204 +442,29 @@ jobs:
run: |
python -m venv .venv
.venv/bin/pip3 install ${{ env.PACKAGE_NAME }} --no-index --find-links dist/ --force-reinstall
.venv/bin/ruff check --help
.venv/bin/${{ env.MODULE_NAME }} --help
- name: "Upload wheels"
uses: actions/upload-artifact@v4
with:
name: wheels-${{ matrix.platform.target }}
path: dist
- name: "Archive binary"
shell: bash
run: |
ARCHIVE_FILE=ruff-${{ inputs.tag }}-${{ matrix.platform.target }}.tar.gz
tar czvf $ARCHIVE_FILE -C target/${{ matrix.platform.target }}/release ruff
set -euo pipefail
TARGET=${{ matrix.platform.target }}
ARCHIVE_NAME=ruff-$TARGET
ARCHIVE_FILE=$ARCHIVE_NAME.tar.gz
mkdir -p $ARCHIVE_NAME
cp target/$TARGET/release/ruff $ARCHIVE_NAME/ruff
tar czvf $ARCHIVE_FILE $ARCHIVE_NAME
shasum -a 256 $ARCHIVE_FILE > $ARCHIVE_FILE.sha256
- name: "Upload binary"
uses: actions/upload-artifact@v4
with:
name: binaries-${{ matrix.platform.target }}
name: artifacts-${{ matrix.platform.target }}
path: |
*.tar.gz
*.sha256
validate-tag:
name: Validate tag
runs-on: ubuntu-latest
# If you don't set an input tag, it's a dry run (no uploads).
if: ${{ inputs.tag }}
steps:
- uses: actions/checkout@v4
with:
ref: main # We checkout the main branch to check for the commit
- name: Check main branch
if: ${{ inputs.sha }}
run: |
# Fetch the main branch since a shallow checkout is used by default
git fetch origin main --unshallow
if ! git branch --contains ${{ inputs.sha }} | grep -E '(^|\s)main$'; then
echo "The specified sha is not on the main branch" >&2
exit 1
fi
- name: Check tag consistency
run: |
# Switch to the commit we want to release
git checkout ${{ inputs.sha }}
version=$(grep "version = " pyproject.toml | sed -e 's/version = "\(.*\)"/\1/g')
if [ "${{ inputs.tag }}" != "${version}" ]; then
echo "The input tag does not match the version from pyproject.toml:" >&2
echo "${{ inputs.tag }}" >&2
echo "${version}" >&2
exit 1
else
echo "Releasing ${version}"
fi
upload-release:
name: Upload to PyPI
runs-on: ubuntu-latest
needs:
- macos-aarch64
- macos-x86_64
- windows
- linux
- linux-cross
- musllinux
- musllinux-cross
- validate-tag
# If you don't set an input tag, it's a dry run (no uploads).
if: ${{ inputs.tag }}
environment:
name: release
permissions:
# For pypi trusted publishing
id-token: write
steps:
- uses: actions/download-artifact@v4
with:
pattern: wheels-*
path: wheels
merge-multiple: true
- name: Publish to PyPi
uses: pypa/gh-action-pypi-publish@release/v1
with:
skip-existing: true
packages-dir: wheels
verbose: true
tag-release:
name: Tag release
runs-on: ubuntu-latest
needs: upload-release
# If you don't set an input tag, it's a dry run (no uploads).
if: ${{ inputs.tag }}
permissions:
# For git tag
contents: write
steps:
- uses: actions/checkout@v4
with:
ref: ${{ inputs.sha }}
- name: git tag
run: |
git config user.email "hey@astral.sh"
git config user.name "Ruff Release CI"
git tag -m "v${{ inputs.tag }}" "v${{ inputs.tag }}"
# If there is duplicate tag, this will fail. The publish to pypi action will have been a noop (due to skip
# existing), so we make a non-destructive exit here
git push --tags
publish-release:
name: Publish to GitHub
runs-on: ubuntu-latest
needs: tag-release
# If you don't set an input tag, it's a dry run (no uploads).
if: ${{ inputs.tag }}
permissions:
# For GitHub release publishing
contents: write
steps:
- uses: actions/download-artifact@v4
with:
pattern: binaries-*
path: binaries
merge-multiple: true
- name: "Publish to GitHub"
uses: softprops/action-gh-release@v2
with:
draft: true
files: binaries/*
tag_name: v${{ inputs.tag }}
docker-publish:
# This action doesn't need to wait on any other task, it's easy to re-tag if something failed and we're validating
# the tag here also
name: Push Docker image ghcr.io/astral-sh/ruff
runs-on: ubuntu-latest
environment:
name: release
permissions:
# For the docker push
packages: write
steps:
- uses: actions/checkout@v4
with:
ref: ${{ inputs.sha }}
- uses: docker/setup-buildx-action@v3
- uses: docker/login-action@v3
with:
registry: ghcr.io
username: ${{ github.repository_owner }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Extract metadata (tags, labels) for Docker
id: meta
uses: docker/metadata-action@v5
with:
images: ghcr.io/astral-sh/ruff
- name: Check tag consistency
# Unlike validate-tag we don't check if the commit is on the main branch, but it seems good enough since we can
# change docker tags
if: ${{ inputs.tag }}
run: |
version=$(grep "version = " pyproject.toml | sed -e 's/version = "\(.*\)"/\1/g')
if [ "${{ inputs.tag }}" != "${version}" ]; then
echo "The input tag does not match the version from pyproject.toml:" >&2
echo "${{ inputs.tag }}" >&2
echo "${version}" >&2
exit 1
else
echo "Releasing ${version}"
fi
- name: "Build and push Docker image"
uses: docker/build-push-action@v5
with:
context: .
platforms: linux/amd64,linux/arm64
# Reuse the builder
cache-from: type=gha
cache-to: type=gha,mode=max
push: ${{ inputs.tag != '' }}
tags: ghcr.io/astral-sh/ruff:latest,ghcr.io/astral-sh/ruff:${{ inputs.tag || 'dry-run' }}
labels: ${{ steps.meta.outputs.labels }}
# After the release has been published, we update downstream repositories
# This is separate because if this fails the release is still fine, we just need to do some manual workflow triggers
update-dependents:
name: Update dependents
runs-on: ubuntu-latest
needs: publish-release
steps:
- name: "Update pre-commit mirror"
uses: actions/github-script@v7
with:
github-token: ${{ secrets.RUFF_PRE_COMMIT_PAT }}
script: |
github.rest.actions.createWorkflowDispatch({
owner: 'astral-sh',
repo: 'ruff-pre-commit',
workflow_id: 'main.yml',
ref: 'main',
})

68
.github/workflows/build-docker.yml vendored Normal file
View File

@@ -0,0 +1,68 @@
# Build and publish a Docker image.
#
# Assumed to run as a subworkflow of .github/workflows/release.yml; specifically, as a local
# artifacts job within `cargo-dist`.
#
# TODO(charlie): Ideally, the publish step would happen as a publish job within `cargo-dist`, but
# sharing the built image as an artifact between jobs is challenging.
name: "[ruff] Build Docker image"
on:
workflow_call:
inputs:
plan:
required: true
type: string
pull_request:
paths:
- .github/workflows/build-docker.yml
jobs:
docker-publish:
name: Build Docker image (ghcr.io/astral-sh/ruff)
runs-on: ubuntu-latest
environment:
name: release
steps:
- uses: actions/checkout@v4
with:
submodules: recursive
- uses: docker/setup-buildx-action@v3
- uses: docker/login-action@v3
with:
registry: ghcr.io
username: ${{ github.repository_owner }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Extract metadata (tags, labels) for Docker
id: meta
uses: docker/metadata-action@v5
with:
images: ghcr.io/astral-sh/ruff
- name: Check tag consistency
if: ${{ inputs.plan != '' && !fromJson(inputs.plan).announcement_tag_is_implicit }}
run: |
version=$(grep "version = " pyproject.toml | sed -e 's/version = "\(.*\)"/\1/g')
if [ "${{ fromJson(inputs.plan).announcement_tag }}" != "${version}" ]; then
echo "The input tag does not match the version from pyproject.toml:" >&2
echo "${{ fromJson(inputs.plan).announcement_tag }}" >&2
echo "${version}" >&2
exit 1
else
echo "Releasing ${version}"
fi
- name: "Build and push Docker image"
uses: docker/build-push-action@v6
with:
context: .
platforms: linux/amd64,linux/arm64
# Reuse the builder
cache-from: type=gha
cache-to: type=gha,mode=max
push: ${{ inputs.plan != '' && !fromJson(inputs.plan).announcement_tag_is_implicit }}
tags: ghcr.io/astral-sh/ruff:latest,ghcr.io/astral-sh/ruff:${{ (inputs.plan != '' && fromJson(inputs.plan).announcement_tag) || 'dry-run' }}
labels: ${{ steps.meta.outputs.labels }}

View File

@@ -111,7 +111,7 @@ jobs:
- name: "Clippy"
run: cargo clippy --workspace --all-targets --all-features --locked -- -D warnings
- name: "Clippy (wasm)"
run: cargo clippy -p ruff_wasm --target wasm32-unknown-unknown --all-features --locked -- -D warnings
run: cargo clippy -p ruff_wasm -p red_knot_wasm --target wasm32-unknown-unknown --all-features --locked -- -D warnings
cargo-test-linux:
name: "cargo test (linux)"
@@ -191,10 +191,14 @@ jobs:
cache-dependency-path: playground/package-lock.json
- uses: jetli/wasm-pack-action@v0.4.0
- uses: Swatinem/rust-cache@v2
- name: "Run wasm-pack"
- name: "Test ruff_wasm"
run: |
cd crates/ruff_wasm
wasm-pack test --node
- name: "Test red_knot_wasm"
run: |
cd crates/red_knot_wasm
wasm-pack test --node
cargo-build-release:
name: "cargo build (release)"
@@ -619,7 +623,7 @@ jobs:
run: cargo codspeed build --features codspeed -p ruff_benchmark
- name: "Run benchmarks"
uses: CodSpeedHQ/action@v2
uses: CodSpeedHQ/action@v3
with:
run: cargo codspeed run
token: ${{ secrets.CODSPEED_TOKEN }}

View File

@@ -1,55 +0,0 @@
name: mkdocs
on:
workflow_dispatch:
inputs:
ref:
description: "The commit SHA, tag, or branch to publish. Uses the default branch if not specified."
default: ""
type: string
release:
types: [published]
jobs:
mkdocs:
runs-on: ubuntu-latest
env:
CF_API_TOKEN_EXISTS: ${{ secrets.CF_API_TOKEN != '' }}
MKDOCS_INSIDERS_SSH_KEY_EXISTS: ${{ secrets.MKDOCS_INSIDERS_SSH_KEY != '' }}
steps:
- uses: actions/checkout@v4
with:
ref: ${{ inputs.ref }}
- uses: actions/setup-python@v5
- name: "Add SSH key"
if: ${{ env.MKDOCS_INSIDERS_SSH_KEY_EXISTS == 'true' }}
uses: webfactory/ssh-agent@v0.9.0
with:
ssh-private-key: ${{ secrets.MKDOCS_INSIDERS_SSH_KEY }}
- name: "Install Rust toolchain"
run: rustup show
- uses: Swatinem/rust-cache@v2
- name: "Install Insiders dependencies"
if: ${{ env.MKDOCS_INSIDERS_SSH_KEY_EXISTS == 'true' }}
run: pip install -r docs/requirements-insiders.txt
- name: "Install dependencies"
if: ${{ env.MKDOCS_INSIDERS_SSH_KEY_EXISTS != 'true' }}
run: pip install -r docs/requirements.txt
- name: "Copy README File"
run: |
python scripts/transform_readme.py --target mkdocs
python scripts/generate_mkdocs.py
- name: "Build Insiders docs"
if: ${{ env.MKDOCS_INSIDERS_SSH_KEY_EXISTS == 'true' }}
run: mkdocs build --strict -f mkdocs.insiders.yml
- name: "Build docs"
if: ${{ env.MKDOCS_INSIDERS_SSH_KEY_EXISTS != 'true' }}
run: mkdocs build --strict -f mkdocs.public.yml
- name: "Deploy to Cloudflare Pages"
if: ${{ env.CF_API_TOKEN_EXISTS == 'true' }}
uses: cloudflare/wrangler-action@v3.6.1
with:
apiToken: ${{ secrets.CF_API_TOKEN }}
accountId: ${{ secrets.CF_ACCOUNT_ID }}
# `github.head_ref` is only set during pull requests and for manual runs or tags we use `main` to deploy to production
command: pages deploy site --project-name=astral-docs --branch ${{ github.head_ref || 'main' }} --commit-hash ${GITHUB_SHA}

29
.github/workflows/notify-dependents.yml vendored Normal file
View File

@@ -0,0 +1,29 @@
# Notify downstream repositories of a new release.
#
# Assumed to run as a subworkflow of .github/workflows/release.yml; specifically, as a post-announce
# job within `cargo-dist`.
name: "[ruff] Notify dependents"
on:
workflow_call:
inputs:
plan:
required: true
type: string
jobs:
update-dependents:
name: Notify dependents
runs-on: ubuntu-latest
steps:
- name: "Update pre-commit mirror"
uses: actions/github-script@v7
with:
github-token: ${{ secrets.RUFF_PRE_COMMIT_PAT }}
script: |
github.rest.actions.createWorkflowDispatch({
owner: 'astral-sh',
repo: 'ruff-pre-commit',
workflow_id: 'main.yml',
ref: 'main',
})

View File

@@ -23,6 +23,7 @@ jobs:
name: pr-number
run_id: ${{ github.event.workflow_run.id || github.event.inputs.workflow_run_id }}
if_no_artifact_found: ignore
allow_forks: true
- name: Parse pull request number
id: pr-number
@@ -43,6 +44,7 @@ jobs:
path: pr/ecosystem
workflow_conclusion: completed
if_no_artifact_found: ignore
allow_forks: true
- name: Generate comment content
id: generate-comment

151
.github/workflows/publish-docs.yml vendored Normal file
View File

@@ -0,0 +1,151 @@
# Publish the Ruff documentation.
#
# Assumed to run as a subworkflow of .github/workflows/release.yml; specifically, as a post-announce
# job within `cargo-dist`.
name: mkdocs
on:
workflow_dispatch:
inputs:
ref:
description: "The commit SHA, tag, or branch to publish. Uses the default branch if not specified."
default: ""
type: string
workflow_call:
inputs:
plan:
required: true
type: string
jobs:
mkdocs:
runs-on: ubuntu-latest
env:
MKDOCS_INSIDERS_SSH_KEY_EXISTS: ${{ secrets.MKDOCS_INSIDERS_SSH_KEY != '' }}
steps:
- uses: actions/checkout@v4
with:
ref: ${{ inputs.ref }}
- uses: actions/setup-python@v5
with:
python-version: 3.12
- name: "Set docs version"
run: |
version="${{ (inputs.plan != '' && fromJson(inputs.plan).announcement_tag) || inputs.ref }}"
# if version is missing, exit with error
if [[ -z "$version" ]]; then
echo "Can't build docs without a version."
exit 1
fi
# Use version as display name for now
display_name="$version"
echo "version=$version" >> $GITHUB_ENV
echo "display_name=$display_name" >> $GITHUB_ENV
- name: "Set branch name"
run: |
version="${{ env.version }}"
display_name="${{ env.display_name }}"
timestamp="$(date +%s)"
# create branch_display_name from display_name by replacing all
# characters disallowed in git branch names with hyphens
branch_display_name="$(echo "$display_name" | tr -c '[:alnum:]._' '-' | tr -s '-')"
echo "branch_name=update-docs-$branch_display_name-$timestamp" >> $GITHUB_ENV
echo "timestamp=$timestamp" >> $GITHUB_ENV
- name: "Add SSH key"
if: ${{ env.MKDOCS_INSIDERS_SSH_KEY_EXISTS == 'true' }}
uses: webfactory/ssh-agent@v0.9.0
with:
ssh-private-key: ${{ secrets.MKDOCS_INSIDERS_SSH_KEY }}
- name: "Install Rust toolchain"
run: rustup show
- uses: Swatinem/rust-cache@v2
- name: "Install Insiders dependencies"
if: ${{ env.MKDOCS_INSIDERS_SSH_KEY_EXISTS == 'true' }}
run: pip install -r docs/requirements-insiders.txt
- name: "Install dependencies"
if: ${{ env.MKDOCS_INSIDERS_SSH_KEY_EXISTS != 'true' }}
run: pip install -r docs/requirements.txt
- name: "Copy README File"
run: |
python scripts/transform_readme.py --target mkdocs
python scripts/generate_mkdocs.py
- name: "Build Insiders docs"
if: ${{ env.MKDOCS_INSIDERS_SSH_KEY_EXISTS == 'true' }}
run: mkdocs build --strict -f mkdocs.insiders.yml
- name: "Build docs"
if: ${{ env.MKDOCS_INSIDERS_SSH_KEY_EXISTS != 'true' }}
run: mkdocs build --strict -f mkdocs.public.yml
- name: "Clone docs repo"
run: |
version="${{ env.version }}"
git clone https://${{ secrets.ASTRAL_DOCS_PAT }}@github.com/astral-sh/docs.git astral-docs
- name: "Copy docs"
run: rm -rf astral-docs/site/ruff && mkdir -p astral-docs/site && cp -r site/ruff astral-docs/site/
- name: "Commit docs"
working-directory: astral-docs
run: |
branch_name="${{ env.branch_name }}"
git config user.name "astral-docs-bot"
git config user.email "176161322+astral-docs-bot@users.noreply.github.com"
git checkout -b $branch_name
git add site/ruff
git commit -m "Update ruff documentation for $version"
- name: "Create Pull Request"
working-directory: astral-docs
env:
GITHUB_TOKEN: ${{ secrets.ASTRAL_DOCS_PAT }}
run: |
version="${{ env.version }}"
display_name="${{ env.display_name }}"
branch_name="${{ env.branch_name }}"
# set the PR title
pull_request_title="Update ruff documentation for $display_name"
# Delete any existing pull requests that are open for this version
# by checking against pull_request_title because the new PR will
# supersede the old one.
gh pr list --state open --json title --jq '.[] | select(.title == "$pull_request_title") | .number' | \
xargs -I {} gh pr close {}
# push the branch to GitHub
git push origin $branch_name
# create the PR
gh pr create --base main --head $branch_name \
--title "$pull_request_title" \
--body "Automated documentation update for $display_name" \
--label "documentation"
- name: "Merge Pull Request"
if: ${{ inputs.plan != '' && !fromJson(inputs.plan).announcement_tag_is_implicit }}
working-directory: astral-docs
env:
GITHUB_TOKEN: ${{ secrets.ASTRAL_DOCS_PAT }}
run: |
branch_name="${{ env.branch_name }}"
# auto-merge the PR if the build was triggered by a release. Manual builds should be reviewed by a human.
# give the PR a few seconds to be created before trying to auto-merge it
sleep 10
gh pr merge --squash $branch_name

View File

@@ -1,9 +1,16 @@
# Publish the Ruff playground.
#
# Assumed to run as a subworkflow of .github/workflows/release.yml; specifically, as a post-announce
# job within `cargo-dist`.
name: "[Playground] Release"
on:
workflow_dispatch:
release:
types: [published]
workflow_call:
inputs:
plan:
required: true
type: string
env:
CARGO_INCREMENTAL: 0
@@ -40,7 +47,7 @@ jobs:
working-directory: playground
- name: "Deploy to Cloudflare Pages"
if: ${{ env.CF_API_TOKEN_EXISTS == 'true' }}
uses: cloudflare/wrangler-action@v3.6.1
uses: cloudflare/wrangler-action@v3.7.0
with:
apiToken: ${{ secrets.CF_API_TOKEN }}
accountId: ${{ secrets.CF_ACCOUNT_ID }}

34
.github/workflows/publish-pypi.yml vendored Normal file
View File

@@ -0,0 +1,34 @@
# Publish a release to PyPI.
#
# Assumed to run as a subworkflow of .github/workflows/release.yml; specifically, as a publish job
# within `cargo-dist`.
name: "[ruff] Publish to PyPI"
on:
workflow_call:
inputs:
plan:
required: true
type: string
jobs:
pypi-publish:
name: Upload to PyPI
runs-on: ubuntu-latest
environment:
name: release
permissions:
# For PyPI's trusted publishing.
id-token: write
steps:
- uses: actions/download-artifact@v4
with:
pattern: wheels-*
path: wheels
merge-multiple: true
- name: Publish to PyPi
uses: pypa/gh-action-pypi-publish@release/v1
with:
skip-existing: true
packages-dir: wheels
verbose: true

55
.github/workflows/publish-wasm.yml vendored Normal file
View File

@@ -0,0 +1,55 @@
# Build and publish ruff-api for wasm.
#
# Assumed to run as a subworkflow of .github/workflows/release.yml; specifically, as a publish
# job within `cargo-dist`.
name: "Build and publish wasm"
on:
workflow_dispatch:
workflow_call:
inputs:
plan:
required: true
type: string
env:
CARGO_INCREMENTAL: 0
CARGO_NET_RETRY: 10
CARGO_TERM_COLOR: always
RUSTUP_MAX_RETRIES: 10
jobs:
ruff_wasm:
runs-on: ubuntu-latest
permissions:
contents: read
id-token: write
strategy:
matrix:
target: [web, bundler, nodejs]
fail-fast: false
steps:
- uses: actions/checkout@v4
- name: "Install Rust toolchain"
run: rustup target add wasm32-unknown-unknown
- uses: jetli/wasm-pack-action@v0.4.0
- uses: jetli/wasm-bindgen-action@v0.2.0
- name: "Run wasm-pack build"
run: wasm-pack build --target ${{ matrix.target }} crates/ruff_wasm
- name: "Rename generated package"
run: | # Replace the package name w/ jq
jq '.name="@astral-sh/ruff-wasm-${{ matrix.target }}"' crates/ruff_wasm/pkg/package.json > /tmp/package.json
mv /tmp/package.json crates/ruff_wasm/pkg
- run: cp LICENSE crates/ruff_wasm/pkg # wasm-pack does not put the LICENSE file in the pkg
- uses: actions/setup-node@v4
with:
node-version: 18
registry-url: "https://registry.npmjs.org"
- name: "Publish (dry-run)"
if: ${{ inputs.plan == '' || fromJson(inputs.plan).announcement_tag_is_implicit }}
run: npm publish --dry-run crates/ruff_wasm/pkg
- name: "Publish"
if: ${{ inputs.plan != '' && !fromJson(inputs.plan).announcement_tag_is_implicit }}
run: npm publish --provenance --access public crates/ruff_wasm/pkg
env:
NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}

298
.github/workflows/release.yml vendored Normal file
View File

@@ -0,0 +1,298 @@
# Copyright 2022-2024, axodotdev
# SPDX-License-Identifier: MIT or Apache-2.0
#
# CI that:
#
# * checks for a Git Tag that looks like a release
# * builds artifacts with cargo-dist (archives, installers, hashes)
# * uploads those artifacts to temporary workflow zip
# * on success, uploads the artifacts to a GitHub Release
#
# Note that the GitHub Release will be created with a generated
# title/body based on your changelogs.
name: Release
permissions:
"contents": "write"
# This task will run whenever you workflow_dispatch with a tag that looks like a version
# like "1.0.0", "v0.1.0-prerelease.1", "my-app/0.1.0", "releases/v1.0.0", etc.
# Various formats will be parsed into a VERSION and an optional PACKAGE_NAME, where
# PACKAGE_NAME must be the name of a Cargo package in your workspace, and VERSION
# must be a Cargo-style SemVer Version (must have at least major.minor.patch).
#
# If PACKAGE_NAME is specified, then the announcement will be for that
# package (erroring out if it doesn't have the given version or isn't cargo-dist-able).
#
# If PACKAGE_NAME isn't specified, then the announcement will be for all
# (cargo-dist-able) packages in the workspace with that version (this mode is
# intended for workspaces with only one dist-able package, or with all dist-able
# packages versioned/released in lockstep).
#
# If you push multiple tags at once, separate instances of this workflow will
# spin up, creating an independent announcement for each one. However, GitHub
# will hard limit this to 3 tags per commit, as it will assume more tags is a
# mistake.
#
# If there's a prerelease-style suffix to the version, then the release(s)
# will be marked as a prerelease.
on:
workflow_dispatch:
inputs:
tag:
description: Release Tag
required: true
default: dry-run
type: string
jobs:
# Run 'cargo dist plan' (or host) to determine what tasks we need to do
plan:
runs-on: "ubuntu-20.04"
outputs:
val: ${{ steps.plan.outputs.manifest }}
tag: ${{ (inputs.tag != 'dry-run' && inputs.tag) || '' }}
tag-flag: ${{ inputs.tag && inputs.tag != 'dry-run' && format('--tag={0}', inputs.tag) || '' }}
publishing: ${{ inputs.tag && inputs.tag != 'dry-run' }}
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
steps:
- uses: actions/checkout@v4
with:
submodules: recursive
- name: Install cargo-dist
# we specify bash to get pipefail; it guards against the `curl` command
# failing. otherwise `sh` won't catch that `curl` returned non-0
shell: bash
run: "curl --proto '=https' --tlsv1.2 -LsSf https://github.com/axodotdev/cargo-dist/releases/download/v0.18.0/cargo-dist-installer.sh | sh"
- name: Cache cargo-dist
uses: actions/upload-artifact@v4
with:
name: cargo-dist-cache
path: ~/.cargo/bin/cargo-dist
# sure would be cool if github gave us proper conditionals...
# so here's a doubly-nested ternary-via-truthiness to try to provide the best possible
# functionality based on whether this is a pull_request, and whether it's from a fork.
# (PRs run on the *source* but secrets are usually on the *target* -- that's *good*
# but also really annoying to build CI around when it needs secrets to work right.)
- id: plan
run: |
cargo dist ${{ (inputs.tag && inputs.tag != 'dry-run' && format('host --steps=create --tag={0}', inputs.tag)) || 'plan' }} --output-format=json > plan-dist-manifest.json
echo "cargo dist ran successfully"
cat plan-dist-manifest.json
echo "manifest=$(jq -c "." plan-dist-manifest.json)" >> "$GITHUB_OUTPUT"
- name: "Upload dist-manifest.json"
uses: actions/upload-artifact@v4
with:
name: artifacts-plan-dist-manifest
path: plan-dist-manifest.json
custom-build-binaries:
needs:
- plan
if: ${{ needs.plan.outputs.publishing == 'true' || fromJson(needs.plan.outputs.val).ci.github.pr_run_mode == 'upload' || inputs.tag == 'dry-run' }}
uses: ./.github/workflows/build-binaries.yml
with:
plan: ${{ needs.plan.outputs.val }}
secrets: inherit
custom-build-docker:
needs:
- plan
if: ${{ needs.plan.outputs.publishing == 'true' || fromJson(needs.plan.outputs.val).ci.github.pr_run_mode == 'upload' || inputs.tag == 'dry-run' }}
uses: ./.github/workflows/build-docker.yml
with:
plan: ${{ needs.plan.outputs.val }}
secrets: inherit
permissions:
"contents": "read"
"packages": "write"
# Build and package all the platform-agnostic(ish) things
build-global-artifacts:
needs:
- plan
- custom-build-binaries
- custom-build-docker
runs-on: "ubuntu-20.04"
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
BUILD_MANIFEST_NAME: target/distrib/global-dist-manifest.json
steps:
- uses: actions/checkout@v4
with:
submodules: recursive
- name: Install cached cargo-dist
uses: actions/download-artifact@v4
with:
name: cargo-dist-cache
path: ~/.cargo/bin/
- run: chmod +x ~/.cargo/bin/cargo-dist
# Get all the local artifacts for the global tasks to use (for e.g. checksums)
- name: Fetch local artifacts
uses: actions/download-artifact@v4
with:
pattern: artifacts-*
path: target/distrib/
merge-multiple: true
- id: cargo-dist
shell: bash
run: |
cargo dist build ${{ needs.plan.outputs.tag-flag }} --output-format=json "--artifacts=global" > dist-manifest.json
echo "cargo dist ran successfully"
# Parse out what we just built and upload it to scratch storage
echo "paths<<EOF" >> "$GITHUB_OUTPUT"
jq --raw-output ".upload_files[]" dist-manifest.json >> "$GITHUB_OUTPUT"
echo "EOF" >> "$GITHUB_OUTPUT"
cp dist-manifest.json "$BUILD_MANIFEST_NAME"
- name: "Upload artifacts"
uses: actions/upload-artifact@v4
with:
name: artifacts-build-global
path: |
${{ steps.cargo-dist.outputs.paths }}
${{ env.BUILD_MANIFEST_NAME }}
# Determines if we should publish/announce
host:
needs:
- plan
- custom-build-binaries
- custom-build-docker
- build-global-artifacts
# Only run if we're "publishing", and only if local and global didn't fail (skipped is fine)
if: ${{ always() && needs.plan.outputs.publishing == 'true' && (needs.build-global-artifacts.result == 'skipped' || needs.build-global-artifacts.result == 'success') && (needs.custom-build-binaries.result == 'skipped' || needs.custom-build-binaries.result == 'success') && (needs.custom-build-docker.result == 'skipped' || needs.custom-build-docker.result == 'success') }}
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
runs-on: "ubuntu-20.04"
outputs:
val: ${{ steps.host.outputs.manifest }}
steps:
- uses: actions/checkout@v4
with:
submodules: recursive
- name: Install cached cargo-dist
uses: actions/download-artifact@v4
with:
name: cargo-dist-cache
path: ~/.cargo/bin/
- run: chmod +x ~/.cargo/bin/cargo-dist
# Fetch artifacts from scratch-storage
- name: Fetch artifacts
uses: actions/download-artifact@v4
with:
pattern: artifacts-*
path: target/distrib/
merge-multiple: true
# This is a harmless no-op for GitHub Releases, hosting for that happens in "announce"
- id: host
shell: bash
run: |
cargo dist host ${{ needs.plan.outputs.tag-flag }} --steps=upload --steps=release --output-format=json > dist-manifest.json
echo "artifacts uploaded and released successfully"
cat dist-manifest.json
echo "manifest=$(jq -c "." dist-manifest.json)" >> "$GITHUB_OUTPUT"
- name: "Upload dist-manifest.json"
uses: actions/upload-artifact@v4
with:
# Overwrite the previous copy
name: artifacts-dist-manifest
path: dist-manifest.json
custom-publish-pypi:
needs:
- plan
- host
if: ${{ !fromJson(needs.plan.outputs.val).announcement_is_prerelease || fromJson(needs.plan.outputs.val).publish_prereleases }}
uses: ./.github/workflows/publish-pypi.yml
with:
plan: ${{ needs.plan.outputs.val }}
secrets: inherit
# publish jobs get escalated permissions
permissions:
"id-token": "write"
"packages": "write"
custom-publish-wasm:
needs:
- plan
- host
if: ${{ !fromJson(needs.plan.outputs.val).announcement_is_prerelease || fromJson(needs.plan.outputs.val).publish_prereleases }}
uses: ./.github/workflows/publish-wasm.yml
with:
plan: ${{ needs.plan.outputs.val }}
secrets: inherit
# publish jobs get escalated permissions
permissions:
"contents": "read"
"id-token": "write"
"packages": "write"
# Create a GitHub Release while uploading all files to it
announce:
needs:
- plan
- host
- custom-publish-pypi
- custom-publish-wasm
# use "always() && ..." to allow us to wait for all publish jobs while
# still allowing individual publish jobs to skip themselves (for prereleases).
# "host" however must run to completion, no skipping allowed!
if: ${{ always() && needs.host.result == 'success' && (needs.custom-publish-pypi.result == 'skipped' || needs.custom-publish-pypi.result == 'success') && (needs.custom-publish-wasm.result == 'skipped' || needs.custom-publish-wasm.result == 'success') }}
runs-on: "ubuntu-20.04"
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
steps:
- uses: actions/checkout@v4
with:
submodules: recursive
# Create a GitHub Release while uploading all files to it
- name: "Download GitHub Artifacts"
uses: actions/download-artifact@v4
with:
pattern: artifacts-*
path: artifacts
merge-multiple: true
- name: Cleanup
run: |
# Remove the granular manifests
rm -f artifacts/*-dist-manifest.json
- name: Create GitHub Release
env:
PRERELEASE_FLAG: "${{ fromJson(needs.host.outputs.val).announcement_is_prerelease && '--prerelease' || '' }}"
ANNOUNCEMENT_TITLE: "${{ fromJson(needs.host.outputs.val).announcement_title }}"
ANNOUNCEMENT_BODY: "${{ fromJson(needs.host.outputs.val).announcement_github_body }}"
RELEASE_COMMIT: "${{ github.sha }}"
run: |
# Write and read notes from a file to avoid quoting breaking things
echo "$ANNOUNCEMENT_BODY" > $RUNNER_TEMP/notes.txt
gh release create "${{ needs.plan.outputs.tag }}" --target "$RELEASE_COMMIT" $PRERELEASE_FLAG --title "$ANNOUNCEMENT_TITLE" --notes-file "$RUNNER_TEMP/notes.txt" artifacts/*
custom-notify-dependents:
needs:
- plan
- announce
uses: ./.github/workflows/notify-dependents.yml
with:
plan: ${{ needs.plan.outputs.val }}
secrets: inherit
custom-publish-docs:
needs:
- plan
- announce
uses: ./.github/workflows/publish-docs.yml
with:
plan: ${{ needs.plan.outputs.val }}
secrets: inherit
custom-publish-playground:
needs:
- plan
- announce
uses: ./.github/workflows/publish-playground.yml
with:
plan: ${{ needs.plan.outputs.val }}
secrets: inherit

View File

@@ -37,13 +37,13 @@ jobs:
- name: Sync typeshed
id: sync
run: |
rm -rf ruff/crates/red_knot/vendor/typeshed
mkdir ruff/crates/red_knot/vendor/typeshed
cp typeshed/README.md ruff/crates/red_knot/vendor/typeshed
cp typeshed/LICENSE ruff/crates/red_knot/vendor/typeshed
cp -r typeshed/stdlib ruff/crates/red_knot/vendor/typeshed/stdlib
rm -rf ruff/crates/red_knot/vendor/typeshed/stdlib/@tests
git -C typeshed rev-parse HEAD > ruff/crates/red_knot/vendor/typeshed/source_commit.txt
rm -rf ruff/crates/red_knot_python_semantic/vendor/typeshed
mkdir ruff/crates/red_knot_python_semantic/vendor/typeshed
cp typeshed/README.md ruff/crates/red_knot_python_semantic/vendor/typeshed
cp typeshed/LICENSE ruff/crates/red_knot_python_semantic/vendor/typeshed
cp -r typeshed/stdlib ruff/crates/red_knot_python_semantic/vendor/typeshed/stdlib
rm -rf ruff/crates/red_knot_python_semantic/vendor/typeshed/stdlib/@tests
git -C typeshed rev-parse HEAD > ruff/crates/red_knot_python_semantic/vendor/typeshed/source_commit.txt
- name: Commit the changes
id: commit
if: ${{ steps.sync.outcome == 'success' }}

8
.gitignore vendored
View File

@@ -21,6 +21,14 @@ flamegraph.svg
# `CARGO_TARGET_DIR=target-llvm-lines RUSTFLAGS="-Csymbol-mangling-version=v0" cargo llvm-lines -p ruff --lib`
/target*
# samply profiles
profile.json
# tracing-flame traces
tracing.folded
tracing-flamechart.svg
tracing-flamegraph.svg
###
# Rust.gitignore
###

View File

@@ -2,7 +2,8 @@ fail_fast: true
exclude: |
(?x)^(
crates/red_knot/vendor/.*|
crates/red_knot_python_semantic/vendor/.*|
crates/red_knot_workspace/resources/.*|
crates/ruff_linter/resources/.*|
crates/ruff_linter/src/rules/.*/snapshots/.*|
crates/ruff/resources/.*|
@@ -42,7 +43,7 @@ repos:
)$
- repo: https://github.com/crate-ci/typos
rev: v1.22.7
rev: v1.23.6
hooks:
- id: typos
@@ -56,18 +57,13 @@ repos:
pass_filenames: false # This makes it a lot faster
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.4.9
rev: v0.5.6
hooks:
- id: ruff-format
- id: ruff
args: [--fix, --exit-non-zero-on-fix]
types_or: [python, pyi]
require_serial: true
exclude: |
(?x)^(
crates/ruff_linter/resources/.*|
crates/ruff_python_formatter/resources/.*
)$
# Prettier
- repo: https://github.com/pre-commit/mirrors-prettier

2
.prettierignore Normal file
View File

@@ -0,0 +1,2 @@
# Auto-generated by `cargo-dist`.
.github/workflows/release.yml

View File

@@ -1,5 +1,12 @@
# Breaking Changes
## 0.5.0
- Follow the XDG specification to discover user-level configurations on macOS (same as on other Unix platforms)
- Selecting `ALL` now excludes deprecated rules
- The released archives now include an extra level of nesting, which can be removed with `--strip-components=1` when untarring.
- The release artifact's file name no longer includes the version tag. This enables users to install via `/latest` URLs on GitHub.
## 0.3.0
### Ruff 2024.2 style

View File

@@ -1,5 +1,406 @@
# Changelog
## 0.5.7
### Preview features
- \[`flake8-comprehensions`\] Account for list and set comprehensions in `unnecessary-literal-within-tuple-call` (`C409`) ([#12657](https://github.com/astral-sh/ruff/pull/12657))
- \[`flake8-pyi`\] Add autofix for `future-annotations-in-stub` (`PYI044`) ([#12676](https://github.com/astral-sh/ruff/pull/12676))
- \[`flake8-return`\] Avoid syntax error when auto-fixing `RET505` with mixed indentation (space and tabs) ([#12740](https://github.com/astral-sh/ruff/pull/12740))
- \[`pydoclint`\] Add `docstring-missing-yields` (`DOC402`) and `docstring-extraneous-yields` (`DOC403`) ([#12538](https://github.com/astral-sh/ruff/pull/12538))
- \[`pydoclint`\] Avoid `DOC201` if docstring begins with "Return", "Returns", "Yield", or "Yields" ([#12675](https://github.com/astral-sh/ruff/pull/12675))
- \[`pydoclint`\] Deduplicate collected exceptions after traversing function bodies (`DOC501`) ([#12642](https://github.com/astral-sh/ruff/pull/12642))
- \[`pydoclint`\] Ignore `DOC` errors for stub functions ([#12651](https://github.com/astral-sh/ruff/pull/12651))
- \[`pydoclint`\] Teach rules to understand reraised exceptions as being explicitly raised (`DOC501`, `DOC502`) ([#12639](https://github.com/astral-sh/ruff/pull/12639))
- \[`ruff`\] Implement `incorrectly-parenthesized-tuple-in-subscript` (`RUF031`) ([#12480](https://github.com/astral-sh/ruff/pull/12480))
- \[`ruff`\] Mark `RUF023` fix as unsafe if `__slots__` is not a set and the binding is used elsewhere ([#12692](https://github.com/astral-sh/ruff/pull/12692))
### Rule changes
- \[`refurb`\] Add autofix for `implicit-cwd` (`FURB177`) ([#12708](https://github.com/astral-sh/ruff/pull/12708))
- \[`ruff`\] Add autofix for `zip-instead-of-pairwise` (`RUF007`) ([#12663](https://github.com/astral-sh/ruff/pull/12663))
- \[`tryceratops`\] Add `BaseException` to `raise-vanilla-class` rule (`TRY002`) ([#12620](https://github.com/astral-sh/ruff/pull/12620))
### Server
- Ignore non-file workspace URL; Ruff will display a warning notification in this case ([#12725](https://github.com/astral-sh/ruff/pull/12725))
### CLI
- Fix cache invalidation for nested `pyproject.toml` files ([#12727](https://github.com/astral-sh/ruff/pull/12727))
### Bug fixes
- \[`flake8-async`\] Fix false positives with multiple `async with` items (`ASYNC100`) ([#12643](https://github.com/astral-sh/ruff/pull/12643))
- \[`flake8-bandit`\] Avoid false-positives for list concatenations in SQL construction (`S608`) ([#12720](https://github.com/astral-sh/ruff/pull/12720))
- \[`flake8-bugbear`\] Treat `return` as equivalent to `break` (`B909`) ([#12646](https://github.com/astral-sh/ruff/pull/12646))
- \[`flake8-comprehensions`\] Set comprehensions not a violation for `sum` in `unnecessary-comprehension-in-call` (`C419`) ([#12691](https://github.com/astral-sh/ruff/pull/12691))
- \[`flake8-simplify`\] Parenthesize conditions based on precedence when merging if arms (`SIM114`) ([#12737](https://github.com/astral-sh/ruff/pull/12737))
- \[`pydoclint`\] Try both 'Raises' section styles when convention is unspecified (`DOC501`) ([#12649](https://github.com/astral-sh/ruff/pull/12649))
## 0.5.6
Ruff 0.5.6 automatically enables linting and formatting of notebooks in *preview mode*.
You can opt-out of this behavior by adding `*.ipynb` to the `extend-exclude` setting.
```toml
[tool.ruff]
extend-exclude = ["*.ipynb"]
```
### Preview features
- Enable notebooks by default in preview mode ([#12621](https://github.com/astral-sh/ruff/pull/12621))
- \[`flake8-builtins`\] Implement import, lambda, and module shadowing ([#12546](https://github.com/astral-sh/ruff/pull/12546))
- \[`pydoclint`\] Add `docstring-missing-returns` (`DOC201`) and `docstring-extraneous-returns` (`DOC202`) ([#12485](https://github.com/astral-sh/ruff/pull/12485))
### Rule changes
- \[`flake8-return`\] Exempt cached properties and other property-like decorators from explicit return rule (`RET501`) ([#12563](https://github.com/astral-sh/ruff/pull/12563))
### Server
- Make server panic hook more error resilient ([#12610](https://github.com/astral-sh/ruff/pull/12610))
- Use `$/logTrace` for server trace logs in Zed and VS Code ([#12564](https://github.com/astral-sh/ruff/pull/12564))
- Keep track of deleted cells for reorder change request ([#12575](https://github.com/astral-sh/ruff/pull/12575))
### Configuration
- \[`flake8-implicit-str-concat`\] Always allow explicit multi-line concatenations when implicit concatenations are banned ([#12532](https://github.com/astral-sh/ruff/pull/12532))
### Bug fixes
- \[`flake8-async`\] Avoid flagging `asyncio.timeout`s as unused when the context manager includes `asyncio.TaskGroup` ([#12605](https://github.com/astral-sh/ruff/pull/12605))
- \[`flake8-slots`\] Avoid recommending `__slots__` for classes that inherit from more than `namedtuple` ([#12531](https://github.com/astral-sh/ruff/pull/12531))
- \[`isort`\] Avoid marking required imports as unused ([#12537](https://github.com/astral-sh/ruff/pull/12537))
- \[`isort`\] Preserve trailing inline comments on import-from statements ([#12498](https://github.com/astral-sh/ruff/pull/12498))
- \[`pycodestyle`\] Add newlines before comments (`E305`) ([#12606](https://github.com/astral-sh/ruff/pull/12606))
- \[`pycodestyle`\] Don't attach comments with mismatched indents ([#12604](https://github.com/astral-sh/ruff/pull/12604))
- \[`pyflakes`\] Fix preview-mode bugs in `F401` when attempting to autofix unused first-party submodule imports in an `__init__.py` file ([#12569](https://github.com/astral-sh/ruff/pull/12569))
- \[`pylint`\] Respect start index in `unnecessary-list-index-lookup` ([#12603](https://github.com/astral-sh/ruff/pull/12603))
- \[`pyupgrade`\] Avoid recommending no-argument super in `slots=True` dataclasses ([#12530](https://github.com/astral-sh/ruff/pull/12530))
- \[`pyupgrade`\] Use colon rather than dot formatting for integer-only types ([#12534](https://github.com/astral-sh/ruff/pull/12534))
- Fix NFKC normalization bug when removing unused imports ([#12571](https://github.com/astral-sh/ruff/pull/12571))
### Other changes
- Consider more stdlib decorators to be property-like ([#12583](https://github.com/astral-sh/ruff/pull/12583))
- Improve handling of metaclasses in various linter rules ([#12579](https://github.com/astral-sh/ruff/pull/12579))
- Improve consistency between linter rules in determining whether a function is property ([#12581](https://github.com/astral-sh/ruff/pull/12581))
## 0.5.5
### Preview features
- \[`fastapi`\] Implement `fastapi-redundant-response-model` (`FAST001`) and `fastapi-non-annotated-dependency`(`FAST002`) ([#11579](https://github.com/astral-sh/ruff/pull/11579))
- \[`pydoclint`\] Implement `docstring-missing-exception` (`DOC501`) and `docstring-extraneous-exception` (`DOC502`) ([#11471](https://github.com/astral-sh/ruff/pull/11471))
### Rule changes
- \[`numpy`\] Fix NumPy 2.0 rule for `np.alltrue` and `np.sometrue` ([#12473](https://github.com/astral-sh/ruff/pull/12473))
- \[`numpy`\] Ignore `NPY201` inside `except` blocks for compatibility with older numpy versions ([#12490](https://github.com/astral-sh/ruff/pull/12490))
- \[`pep8-naming`\] Avoid applying `ignore-names` to `self` and `cls` function names (`N804`, `N805`) ([#12497](https://github.com/astral-sh/ruff/pull/12497))
### Formatter
- Fix incorrect placement of leading function comment with type params ([#12447](https://github.com/astral-sh/ruff/pull/12447))
### Server
- Do not bail code action resolution when a quick fix is requested ([#12462](https://github.com/astral-sh/ruff/pull/12462))
### Bug fixes
- Fix `Ord` implementation of `cmp_fix` ([#12471](https://github.com/astral-sh/ruff/pull/12471))
- Raise syntax error for unparenthesized generator expression in multi-argument call ([#12445](https://github.com/astral-sh/ruff/pull/12445))
- \[`pydoclint`\] Fix panic in `DOC501` reported in [#12428](https://github.com/astral-sh/ruff/pull/12428) ([#12435](https://github.com/astral-sh/ruff/pull/12435))
- \[`flake8-bugbear`\] Allow singleton tuples with starred expressions in `B013` ([#12484](https://github.com/astral-sh/ruff/pull/12484))
### Documentation
- Add Eglot setup guide for Emacs editor ([#12426](https://github.com/astral-sh/ruff/pull/12426))
- Add note about the breaking change in `nvim-lspconfig` ([#12507](https://github.com/astral-sh/ruff/pull/12507))
- Add note to include notebook files for native server ([#12449](https://github.com/astral-sh/ruff/pull/12449))
- Add setup docs for Zed editor ([#12501](https://github.com/astral-sh/ruff/pull/12501))
## 0.5.4
### Rule changes
- \[`ruff`\] Rename `RUF007` to `zip-instead-of-pairwise` ([#12399](https://github.com/astral-sh/ruff/pull/12399))
### Bug fixes
- \[`flake8-builtins`\] Avoid shadowing diagnostics for `@override` methods ([#12415](https://github.com/astral-sh/ruff/pull/12415))
- \[`flake8-comprehensions`\] Insert parentheses for multi-argument generators ([#12422](https://github.com/astral-sh/ruff/pull/12422))
- \[`pydocstyle`\] Handle escaped docstrings within docstring (`D301`) ([#12192](https://github.com/astral-sh/ruff/pull/12192))
### Documentation
- Fix GitHub link to Neovim setup ([#12410](https://github.com/astral-sh/ruff/pull/12410))
- Fix `output-format` default in settings reference ([#12409](https://github.com/astral-sh/ruff/pull/12409))
## 0.5.3
**Ruff 0.5.3 marks the stable release of the Ruff language server and introduces revamped
[documentation](https://docs.astral.sh/ruff/editors), including [setup guides for your editor of
choice](https://docs.astral.sh/ruff/editors/setup) and [the language server
itself](https://docs.astral.sh/ruff/editors/settings)**.
### Preview features
- Formatter: Insert empty line between suite and alternative branch after function/class definition ([#12294](https://github.com/astral-sh/ruff/pull/12294))
- \[`pyupgrade`\] Implement `unnecessary-default-type-args` (`UP043`) ([#12371](https://github.com/astral-sh/ruff/pull/12371))
### Rule changes
- \[`flake8-bugbear`\] Detect enumerate iterations in `loop-iterator-mutation` (`B909`) ([#12366](https://github.com/astral-sh/ruff/pull/12366))
- \[`flake8-bugbear`\] Remove `discard`, `remove`, and `pop` allowance for `loop-iterator-mutation` (`B909`) ([#12365](https://github.com/astral-sh/ruff/pull/12365))
- \[`pylint`\] Allow `repeated-equality-comparison` for mixed operations (`PLR1714`) ([#12369](https://github.com/astral-sh/ruff/pull/12369))
- \[`pylint`\] Ignore `self` and `cls` when counting arguments (`PLR0913`) ([#12367](https://github.com/astral-sh/ruff/pull/12367))
- \[`pylint`\] Use UTF-8 as default encoding in `unspecified-encoding` fix (`PLW1514`) ([#12370](https://github.com/astral-sh/ruff/pull/12370))
### Server
- Build settings index in parallel for the native server ([#12299](https://github.com/astral-sh/ruff/pull/12299))
- Use fallback settings when indexing the project ([#12362](https://github.com/astral-sh/ruff/pull/12362))
- Consider `--preview` flag for `server` subcommand for the linter and formatter ([#12208](https://github.com/astral-sh/ruff/pull/12208))
### Bug fixes
- \[`flake8-comprehensions`\] Allow additional arguments for `sum` and `max` comprehensions (`C419`) ([#12364](https://github.com/astral-sh/ruff/pull/12364))
- \[`pylint`\] Avoid dropping extra boolean operations in `repeated-equality-comparison` (`PLR1714`) ([#12368](https://github.com/astral-sh/ruff/pull/12368))
- \[`pylint`\] Consider expression before statement when determining binding kind (`PLR1704`) ([#12346](https://github.com/astral-sh/ruff/pull/12346))
### Documentation
- Add docs for Ruff language server ([#12344](https://github.com/astral-sh/ruff/pull/12344))
- Migrate to standalone docs repo ([#12341](https://github.com/astral-sh/ruff/pull/12341))
- Update versioning policy for editor integration ([#12375](https://github.com/astral-sh/ruff/pull/12375))
### Other changes
- Publish Wasm API to npm ([#12317](https://github.com/astral-sh/ruff/pull/12317))
## 0.5.2
### Preview features
- Use `space` separator before parenthesized expressions in comprehensions with leading comments ([#12282](https://github.com/astral-sh/ruff/pull/12282))
- \[`flake8-async`\] Update `ASYNC100` to include `anyio` and `asyncio` ([#12221](https://github.com/astral-sh/ruff/pull/12221))
- \[`flake8-async`\] Update `ASYNC109` to include `anyio` and `asyncio` ([#12236](https://github.com/astral-sh/ruff/pull/12236))
- \[`flake8-async`\] Update `ASYNC110` to include `anyio` and `asyncio` ([#12261](https://github.com/astral-sh/ruff/pull/12261))
- \[`flake8-async`\] Update `ASYNC115` to include `anyio` and `asyncio` ([#12262](https://github.com/astral-sh/ruff/pull/12262))
- \[`flake8-async`\] Update `ASYNC116` to include `anyio` and `asyncio` ([#12266](https://github.com/astral-sh/ruff/pull/12266))
### Rule changes
- \[`flake8-return`\] Exempt properties from explicit return rule (`RET501`) ([#12243](https://github.com/astral-sh/ruff/pull/12243))
- \[`numpy`\] Add `np.NAN`-to-`np.nan` diagnostic ([#12292](https://github.com/astral-sh/ruff/pull/12292))
- \[`refurb`\] Make `list-reverse-copy` an unsafe fix ([#12303](https://github.com/astral-sh/ruff/pull/12303))
### Server
- Consider `include` and `extend-include` settings in native server ([#12252](https://github.com/astral-sh/ruff/pull/12252))
- Include nested configurations in settings reloading ([#12253](https://github.com/astral-sh/ruff/pull/12253))
### CLI
- Omit code frames for fixes with empty ranges ([#12304](https://github.com/astral-sh/ruff/pull/12304))
- Warn about formatter incompatibility for `D203` ([#12238](https://github.com/astral-sh/ruff/pull/12238))
### Bug fixes
- Make cache-write failures non-fatal on Windows ([#12302](https://github.com/astral-sh/ruff/pull/12302))
- Treat `not` operations as boolean tests ([#12301](https://github.com/astral-sh/ruff/pull/12301))
- \[`flake8-bandit`\] Avoid `S310` violations for HTTP-safe f-strings ([#12305](https://github.com/astral-sh/ruff/pull/12305))
- \[`flake8-bandit`\] Support explicit string concatenations in S310 HTTP detection ([#12315](https://github.com/astral-sh/ruff/pull/12315))
- \[`flake8-bandit`\] fix S113 false positive for httpx without `timeout` argument ([#12213](https://github.com/astral-sh/ruff/pull/12213))
- \[`pycodestyle`\] Remove "non-obvious" allowance for E721 ([#12300](https://github.com/astral-sh/ruff/pull/12300))
- \[`pyflakes`\] Consider `with` blocks as single-item branches for redefinition analysis ([#12311](https://github.com/astral-sh/ruff/pull/12311))
- \[`refurb`\] Restrict forwarding for `newline` argument in `open()` calls to Python versions >= 3.10 ([#12244](https://github.com/astral-sh/ruff/pull/12244))
### Documentation
- Update help and documentation to reflect `--output-format full` default ([#12248](https://github.com/astral-sh/ruff/pull/12248))
### Performance
- Use more threads when discovering Python files ([#12258](https://github.com/astral-sh/ruff/pull/12258))
## 0.5.1
### Preview features
- \[`flake8-bugbear`\] Implement mutable-contextvar-default (B039) ([#12113](https://github.com/astral-sh/ruff/pull/12113))
- \[`pycodestyle`\] Whitespace after decorator (`E204`) ([#12140](https://github.com/astral-sh/ruff/pull/12140))
- \[`pytest`\] Reverse `PT001` and `PT0023` defaults ([#12106](https://github.com/astral-sh/ruff/pull/12106))
### Rule changes
- Enable token-based rules on source with syntax errors ([#11950](https://github.com/astral-sh/ruff/pull/11950))
- \[`flake8-bandit`\] Detect `httpx` for `S113` ([#12174](https://github.com/astral-sh/ruff/pull/12174))
- \[`numpy`\] Update `NPY201` to include exception deprecations ([#12065](https://github.com/astral-sh/ruff/pull/12065))
- \[`pylint`\] Generate autofix for `duplicate-bases` (`PLE0241`) ([#12105](https://github.com/astral-sh/ruff/pull/12105))
### Server
- Avoid syntax error notification for source code actions ([#12148](https://github.com/astral-sh/ruff/pull/12148))
- Consider the content of the new cells during notebook sync ([#12203](https://github.com/astral-sh/ruff/pull/12203))
- Fix replacement edit range computation ([#12171](https://github.com/astral-sh/ruff/pull/12171))
### Bug fixes
- Disable auto-fix when source has syntax errors ([#12134](https://github.com/astral-sh/ruff/pull/12134))
- Fix cache key collisions for paths with separators ([#12159](https://github.com/astral-sh/ruff/pull/12159))
- Make `requires-python` inference robust to `==` ([#12091](https://github.com/astral-sh/ruff/pull/12091))
- Use char-wise width instead of `str`-width ([#12135](https://github.com/astral-sh/ruff/pull/12135))
- \[`pycodestyle`\] Avoid `E275` if keyword followed by comma ([#12136](https://github.com/astral-sh/ruff/pull/12136))
- \[`pycodestyle`\] Avoid `E275` if keyword is followed by a semicolon ([#12095](https://github.com/astral-sh/ruff/pull/12095))
- \[`pylint`\] Skip [dummy variables](https://docs.astral.sh/ruff/settings/#lint_dummy-variable-rgx) for `PLR1704` ([#12190](https://github.com/astral-sh/ruff/pull/12190))
### Performance
- Remove allocation in `parse_identifier` ([#12103](https://github.com/astral-sh/ruff/pull/12103))
- Use `CompactString` for `Identifier` AST node ([#12101](https://github.com/astral-sh/ruff/pull/12101))
## 0.5.0
Check out the [blog post](https://astral.sh/blog/ruff-v0.5.0) for a migration guide and overview of the changes!
### Breaking changes
See also, the "Remapped rules" section which may result in disabled rules.
- Follow the XDG specification to discover user-level configurations on macOS (same as on other Unix platforms)
- Selecting `ALL` now excludes deprecated rules
- The released archives now include an extra level of nesting, which can be removed with `--strip-components=1` when untarring.
- The release artifact's file name no longer includes the version tag. This enables users to install via `/latest` URLs on GitHub.
- The diagnostic ranges for some `flake8-bandit` rules were modified ([#10667](https://github.com/astral-sh/ruff/pull/10667)).
### Deprecations
The following rules are now deprecated:
- [`syntax-error`](https://docs.astral.sh/ruff/rules/syntax-error/) (`E999`): Syntax errors are now always shown
### Remapped rules
The following rules have been remapped to new rule codes:
- [`blocking-http-call-in-async-function`](https://docs.astral.sh/ruff/rules/blocking-http-call-in-async-function/): `ASYNC100` to `ASYNC210`
- [`open-sleep-or-subprocess-in-async-function`](https://docs.astral.sh/ruff/rules/open-sleep-or-subprocess-in-async-function/): `ASYNC101` split into `ASYNC220`, `ASYNC221`, `ASYNC230`, and `ASYNC251`
- [`blocking-os-call-in-async-function`](https://docs.astral.sh/ruff/rules/blocking-os-call-in-async-function/): `ASYNC102` has been merged into `ASYNC220` and `ASYNC221`
- [`trio-timeout-without-await`](https://docs.astral.sh/ruff/rules/trio-timeout-without-await/): `TRIO100` to `ASYNC100`
- [`trio-sync-call`](https://docs.astral.sh/ruff/rules/trio-sync-call/): `TRIO105` to `ASYNC105`
- [`trio-async-function-with-timeout`](https://docs.astral.sh/ruff/rules/trio-async-function-with-timeout/): `TRIO109` to `ASYNC109`
- [`trio-unneeded-sleep`](https://docs.astral.sh/ruff/rules/trio-unneeded-sleep/): `TRIO110` to `ASYNC110`
- [`trio-zero-sleep-call`](https://docs.astral.sh/ruff/rules/trio-zero-sleep-call/): `TRIO115` to `ASYNC115`
- [`repeated-isinstance-calls`](https://docs.astral.sh/ruff/rules/repeated-isinstance-calls/): `PLR1701` to `SIM101`
### Stabilization
The following rules have been stabilized and are no longer in preview:
- [`mutable-fromkeys-value`](https://docs.astral.sh/ruff/rules/mutable-fromkeys-value/) (`RUF024`)
- [`default-factory-kwarg`](https://docs.astral.sh/ruff/rules/default-factory-kwarg/) (`RUF026`)
- [`django-extra`](https://docs.astral.sh/ruff/rules/django-extra/) (`S610`)
- [`manual-dict-comprehension`](https://docs.astral.sh/ruff/rules/manual-dict-comprehension/) (`PERF403`)
- [`print-empty-string`](https://docs.astral.sh/ruff/rules/print-empty-string/) (`FURB105`)
- [`readlines-in-for`](https://docs.astral.sh/ruff/rules/readlines-in-for/) (`FURB129`)
- [`if-expr-min-max`](https://docs.astral.sh/ruff/rules/if-expr-min-max/) (`FURB136`)
- [`bit-count`](https://docs.astral.sh/ruff/rules/bit-count/) (`FURB161`)
- [`redundant-log-base`](https://docs.astral.sh/ruff/rules/redundant-log-base/) (`FURB163`)
- [`regex-flag-alias`](https://docs.astral.sh/ruff/rules/regex-flag-alias/) (`FURB167`)
- [`isinstance-type-none`](https://docs.astral.sh/ruff/rules/isinstance-type-none/) (`FURB168`)
- [`type-none-comparison`](https://docs.astral.sh/ruff/rules/type-none-comparison/) (`FURB169`)
- [`implicit-cwd`](https://docs.astral.sh/ruff/rules/implicit-cwd/) (`FURB177`)
- [`hashlib-digest-hex`](https://docs.astral.sh/ruff/rules/hashlib-digest-hex/) (`FURB181`)
- [`list-reverse-copy`](https://docs.astral.sh/ruff/rules/list-reverse-copy/) (`FURB187`)
- [`bad-open-mode`](https://docs.astral.sh/ruff/rules/bad-open-mode/) (`PLW1501`)
- [`empty-comment`](https://docs.astral.sh/ruff/rules/empty-comment/) (`PLR2044`)
- [`global-at-module-level`](https://docs.astral.sh/ruff/rules/global-at-module-level/) (`PLW0604`)
- [`misplaced-bare-raise`](https://docs.astral.sh/ruff/rules/misplaced-bare-raise/) (`PLE0744`)
- [`non-ascii-import-name`](https://docs.astral.sh/ruff/rules/non-ascii-import-name/) (`PLC2403`)
- [`non-ascii-name`](https://docs.astral.sh/ruff/rules/non-ascii-name/) (`PLC2401`)
- [`nonlocal-and-global`](https://docs.astral.sh/ruff/rules/nonlocal-and-global/) (`PLE0115`)
- [`potential-index-error`](https://docs.astral.sh/ruff/rules/potential-index-error/) (`PLE0643`)
- [`redeclared-assigned-name`](https://docs.astral.sh/ruff/rules/redeclared-assigned-name/) (`PLW0128`)
- [`redefined-argument-from-local`](https://docs.astral.sh/ruff/rules/redefined-argument-from-local/) (`PLR1704`)
- [`repeated-keyword-argument`](https://docs.astral.sh/ruff/rules/repeated-keyword-argument/) (`PLE1132`)
- [`super-without-brackets`](https://docs.astral.sh/ruff/rules/super-without-brackets/) (`PLW0245`)
- [`unnecessary-list-index-lookup`](https://docs.astral.sh/ruff/rules/unnecessary-list-index-lookup/) (`PLR1736`)
- [`useless-exception-statement`](https://docs.astral.sh/ruff/rules/useless-exception-statement/) (`PLW0133`)
- [`useless-with-lock`](https://docs.astral.sh/ruff/rules/useless-with-lock/) (`PLW2101`)
The following behaviors have been stabilized:
- [`is-literal`](https://docs.astral.sh/ruff/rules/is-literal/) (`F632`) now warns for identity checks against list, set or dictionary literals
- [`needless-bool`](https://docs.astral.sh/ruff/rules/needless-bool/) (`SIM103`) now detects `if` expressions with implicit `else` branches
- [`module-import-not-at-top-of-file`](https://docs.astral.sh/ruff/rules/module-import-not-at-top-of-file/) (`E402`) now allows `os.environ` modifications between import statements
- [`type-comparison`](https://docs.astral.sh/ruff/rules/type-comparison/) (`E721`) now allows idioms such as `type(x) is int`
- [`yoda-condition`](https://docs.astral.sh/ruff/rules/yoda-conditions/) (`SIM300`) now flags a wider range of expressions
### Removals
The following deprecated settings have been removed:
- `output-format=text`; use `output-format=concise` or `output-format=full`
- `tab-size`; use `indent-width`
The following deprecated CLI options have been removed:
- `--show-source`; use `--output-format=full`
- `--no-show-source`; use `--output-format=concise`
The following deprecated CLI commands have been removed:
- `ruff <path>`; use `ruff check <path>`
- `ruff --clean`; use `ruff clean`
- `ruff --generate-shell-completion`; use `ruff generate-shell-completion`
### Preview features
- \[`ruff`\] Add `assert-with-print-message` rule ([#11981](https://github.com/astral-sh/ruff/pull/11981))
### CLI
- Use rule name rather than message in `--statistics` ([#11697](https://github.com/astral-sh/ruff/pull/11697))
- Use the output format `full` by default ([#12010](https://github.com/astral-sh/ruff/pull/12010))
- Don't log syntax errors to the console ([#11902](https://github.com/astral-sh/ruff/pull/11902))
### Rule changes
- \[`ruff`\] Fix false positives if `gettext` is imported using an alias (`RUF027`) ([#12025](https://github.com/astral-sh/ruff/pull/12025))
- \[`numpy`\] Update `trapz` and `in1d` deprecation (`NPY201`) ([#11948](https://github.com/astral-sh/ruff/pull/11948))
- \[`flake8-bandit`\] Modify diagnostic ranges for shell-related rules ([#10667](https://github.com/astral-sh/ruff/pull/10667))
### Server
- Closing an untitled, unsaved notebook document no longer throws an error ([#11942](https://github.com/astral-sh/ruff/pull/11942))
- Support the usage of tildes and environment variables in `logFile` ([#11945](https://github.com/astral-sh/ruff/pull/11945))
- Add option to configure whether to show syntax errors ([#12059](https://github.com/astral-sh/ruff/pull/12059))
### Bug fixes
- \[`pycodestyle`\] Avoid `E203` for f-string debug expression ([#12024](https://github.com/astral-sh/ruff/pull/12024))
- \[`pep8-naming`\] Match import-name ignores against both name and alias (`N812`, `N817`) ([#12033](https://github.com/astral-sh/ruff/pull/12033))
- \[`pyflakes`\] Detect assignments that shadow definitions (`F811`) ([#11961](https://github.com/astral-sh/ruff/pull/11961))
### Parser
- Emit a syntax error for an empty type parameter list ([#12030](https://github.com/astral-sh/ruff/pull/12030))
- Avoid consuming the newline for unterminated strings ([#12067](https://github.com/astral-sh/ruff/pull/12067))
- Do not include the newline in the unterminated string range ([#12017](https://github.com/astral-sh/ruff/pull/12017))
- Use the correct range to highlight line continuation errors ([#12016](https://github.com/astral-sh/ruff/pull/12016))
- Consider 2-character EOL before line continuations ([#12035](https://github.com/astral-sh/ruff/pull/12035))
- Consider line continuation character for re-lexing ([#12008](https://github.com/astral-sh/ruff/pull/12008))
### Other changes
- Upgrade the Unicode table used for measuring the line-length ([#11194](https://github.com/astral-sh/ruff/pull/11194))
- Remove the deprecation error message for the nursery selector ([#10172](https://github.com/astral-sh/ruff/pull/10172))
## 0.4.10
### Parser

View File

@@ -280,7 +280,7 @@ These represent, respectively: the schema used to parse the `pyproject.toml` fil
intermediate representation; and the final, internal representation used to power Ruff.
To add a new configuration option, you'll likely want to modify these latter few files (along with
`arg.rs`, if appropriate). If you want to pattern-match against an existing example, grep for
`args.rs`, if appropriate). If you want to pattern-match against an existing example, grep for
`dummy_variable_rgx`, which defines a regular expression to match against acceptable unused
variables (e.g., `_`).
@@ -333,7 +333,7 @@ even patch releases may contain [non-backwards-compatible changes](https://semve
### Creating a new release
1. Install `uv`: `curl -LsSf https://astral.sh/uv/install.sh | sh`
1. Run `./scripts/release/bump.sh`; this command will:
1. Run `./scripts/release.sh`; this command will:
- Generate a temporary virtual environment with `rooster`
- Generate a changelog entry in `CHANGELOG.md`
- Update versions in `pyproject.toml` and `Cargo.toml`
@@ -346,22 +346,21 @@ even patch releases may contain [non-backwards-compatible changes](https://semve
1. Run `cargo check`. This should update the lock file with new versions.
1. Create a pull request with the changelog and version updates
1. Merge the PR
1. Run the [release workflow](https://github.com/astral-sh/ruff/actions/workflows/release.yaml) with:
1. Run the [release workflow](https://github.com/astral-sh/ruff/actions/workflows/release.yml) with:
- The new version number (without starting `v`)
- The commit hash of the merged release pull request on `main`
1. The release workflow will do the following:
1. Build all the assets. If this fails (even though we tested in step 4), we haven't tagged or
uploaded anything, you can restart after pushing a fix.
uploaded anything, you can restart after pushing a fix. If you just need to rerun the build,
make sure you're [re-running all the failed
jobs](https://docs.github.com/en/actions/managing-workflow-runs/re-running-workflows-and-jobs#re-running-failed-jobs-in-a-workflow) and not just a single failed job.
1. Upload to PyPI.
1. Create and push the Git tag (as extracted from `pyproject.toml`). We create the Git tag only
after building the wheels and uploading to PyPI, since we can't delete or modify the tag ([#4468](https://github.com/astral-sh/ruff/issues/4468)).
1. Attach artifacts to draft GitHub release
1. Trigger downstream repositories. This can fail non-catastrophically, as we can run any
downstream jobs manually if needed.
1. Publish the GitHub release
1. Open the draft release in the GitHub release section
1. Copy the changelog for the release into the GitHub release
- See previous releases for formatting of section headers
1. Verify the GitHub release:
1. The Changelog should match the content of `CHANGELOG.md`
1. Append the contributors from the `bump.sh` script
1. If needed, [update the schemastore](https://github.com/astral-sh/ruff/blob/main/scripts/update_schemastore.py).
1. One can determine if an update is needed when
@@ -906,7 +905,7 @@ There are three ways in which an import can be categorized as "first-party":
package (e.g., `from foo import bar` or `import foo.bar`), they'll be classified as first-party
automatically. This check is as simple as comparing the first segment of the current file's
module path to the first segment of the import.
1. **Source roots**: Ruff supports a `[src](https://docs.astral.sh/ruff/settings/#src)` setting, which
1. **Source roots**: Ruff supports a [`src`](https://docs.astral.sh/ruff/settings/#src) setting, which
sets the directories to scan when identifying first-party imports. The algorithm is
straightforward: given an import, like `import foo`, iterate over the directories enumerated in
the `src` setting and, for each directory, check for the existence of a subdirectory `foo` or a

895
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -4,7 +4,7 @@ resolver = "2"
[workspace.package]
edition = "2021"
rust-version = "1.75"
rust-version = "1.76"
homepage = "https://docs.astral.sh/ruff"
documentation = "https://docs.astral.sh/ruff"
repository = "https://github.com/astral-sh/ruff"
@@ -36,6 +36,8 @@ ruff_text_size = { path = "crates/ruff_text_size" }
ruff_workspace = { path = "crates/ruff_workspace" }
red_knot_python_semantic = { path = "crates/red_knot_python_semantic" }
red_knot_server = { path = "crates/red_knot_server" }
red_knot_workspace = { path = "crates/red_knot_workspace" }
aho-corasick = { version = "1.1.3" }
annotate-snippets = { version = "0.9.2", features = ["color"] }
@@ -48,19 +50,20 @@ cachedir = { version = "0.3.1" }
camino = { version = "1.1.7" }
chrono = { version = "0.4.35", default-features = false, features = ["clock"] }
clap = { version = "4.5.3", features = ["derive"] }
clap_complete_command = { version = "0.5.1" }
clap_complete_command = { version = "0.6.0" }
clearscreen = { version = "3.0.0" }
codspeed-criterion-compat = { version = "2.6.0", default-features = false }
colored = { version = "2.1.0" }
console_error_panic_hook = { version = "0.1.7" }
console_log = { version = "1.0.0" }
countme = { version = "3.0.1" }
compact_str = "0.8.0"
criterion = { version = "0.5.1", default-features = false }
crossbeam = { version = "0.8.4" }
dashmap = { version = "5.5.3" }
dirs = { version = "5.0.0" }
dashmap = { version = "6.0.1" }
drop_bomb = { version = "0.1.5" }
env_logger = { version = "0.11.0" }
etcetera = { version = "0.8.0" }
fern = { version = "0.6.1" }
filetime = { version = "0.2.23" }
glob = { version = "0.3.1" }
@@ -69,7 +72,6 @@ hashbrown = "0.14.3"
ignore = { version = "0.4.22" }
imara-diff = { version = "0.1.5" }
imperative = { version = "1.0.4" }
indexmap = { version = "2.2.6" }
indicatif = { version = "0.17.8" }
indoc = { version = "2.0.4" }
insta = { version = "1.35.1" }
@@ -92,10 +94,10 @@ mimalloc = { version = "0.1.39" }
natord = { version = "1.0.9" }
notify = { version = "6.1.1" }
once_cell = { version = "1.19.0" }
ordermap = { version = "0.5.0" }
path-absolutize = { version = "3.1.1" }
path-slash = { version = "0.2.1" }
pathdiff = { version = "0.2.1" }
parking_lot = "0.12.1"
pep440_rs = { version = "0.6.0", features = ["serde"] }
pretty_assertions = "1.3.0"
proc-macro2 = { version = "1.0.79" }
@@ -105,8 +107,8 @@ quote = { version = "1.0.23" }
rand = { version = "0.8.5" }
rayon = { version = "1.10.0" }
regex = { version = "1.10.2" }
rustc-hash = { version = "1.1.0" }
salsa = { git = "https://github.com/salsa-rs/salsa.git", package = "salsa-2022", rev = "05b4e3ebdcdc47730cdd359e7e97fb2470527279" }
rustc-hash = { version = "2.0.0" }
salsa = { git = "https://github.com/MichaReiser/salsa.git", tag = "red-knot-0.0.1" }
schemars = { version = "0.8.16" }
seahash = { version = "4.1.0" }
serde = { version = "1.0.197", features = ["derive"] }
@@ -119,7 +121,6 @@ serde_with = { version = "3.6.0", default-features = false, features = [
shellexpand = { version = "3.0.0" }
similar = { version = "2.4.0", features = ["inline"] }
smallvec = { version = "1.13.2" }
smol_str = { version = "0.2.2" }
static_assertions = "1.1.0"
strum = { version = "0.26.0", features = ["strum_macros"] }
strum_macros = { version = "0.26.0" }
@@ -127,12 +128,13 @@ syn = { version = "2.0.55" }
tempfile = { version = "3.9.0" }
test-case = { version = "3.3.1" }
thiserror = { version = "1.0.58" }
tikv-jemallocator = { version = "0.5.0" }
tikv-jemallocator = { version = "0.6.0" }
toml = { version = "0.8.11" }
tracing = { version = "0.1.40" }
tracing-flame = { version = "0.2.0" }
tracing-indicatif = { version = "0.3.6" }
tracing-subscriber = { version = "0.3.18", features = ["env-filter"] }
tracing-tree = { version = "0.3.0" }
tracing-subscriber = { version = "0.3.18", default-features = false, features = ["env-filter", "fmt"] }
tracing-tree = { version = "0.4.0" }
typed-arena = { version = "2.0.2" }
unic-ucd-category = { version = "0.9" }
unicode-ident = { version = "1.0.12" }
@@ -151,11 +153,12 @@ walkdir = { version = "2.3.2" }
wasm-bindgen = { version = "0.2.92" }
wasm-bindgen-test = { version = "0.3.42" }
wild = { version = "2" }
zip = { version = "0.6.6", default-features = false, features = ["zstd"] }
zip = { version = "0.6.6", default-features = false }
[workspace.lints.rust]
unsafe_code = "warn"
unreachable_pub = "warn"
unexpected_cfgs = { level = "warn", check-cfg = ["cfg(fuzzing)", "cfg(codspeed)"] }
[workspace.lints.clippy]
pedantic = { level = "warn", priority = -2 }
@@ -219,3 +222,62 @@ opt-level = 1
[profile.profiling]
inherits = "release"
debug = 1
# The profile that 'cargo dist' will build with.
[profile.dist]
inherits = "release"
# Config for 'cargo dist'
[workspace.metadata.dist]
# The preferred cargo-dist version to use in CI (Cargo.toml SemVer syntax)
cargo-dist-version = "0.18.0"
# CI backends to support
ci = ["github"]
# The installers to generate for each app
installers = ["shell", "powershell"]
# The archive format to use for windows builds (defaults .zip)
windows-archive = ".zip"
# The archive format to use for non-windows builds (defaults .tar.xz)
unix-archive = ".tar.gz"
# Target platforms to build apps for (Rust target-triple syntax)
targets = [
"aarch64-apple-darwin",
"aarch64-pc-windows-msvc",
"aarch64-unknown-linux-gnu",
"aarch64-unknown-linux-musl",
"arm-unknown-linux-musleabihf",
"armv7-unknown-linux-gnueabihf",
"armv7-unknown-linux-musleabihf",
"i686-pc-windows-msvc",
"i686-unknown-linux-gnu",
"i686-unknown-linux-musl",
"powerpc64-unknown-linux-gnu",
"powerpc64le-unknown-linux-gnu",
"s390x-unknown-linux-gnu",
"x86_64-apple-darwin",
"x86_64-pc-windows-msvc",
"x86_64-unknown-linux-gnu",
"x86_64-unknown-linux-musl",
]
# Whether to auto-include files like READMEs, LICENSEs, and CHANGELOGs (default true)
auto-includes = false
# Whether cargo-dist should create a GitHub Release or use an existing draft
create-release = true
# Publish jobs to run in CI
pr-run-mode = "skip"
# Whether CI should trigger releases with dispatches instead of tag pushes
dispatch-releases = true
# The stage during which the GitHub Release should be created
github-release = "announce"
# Whether CI should include auto-generated code to build local artifacts
build-local-artifacts = false
# Local artifacts jobs to run in CI
local-artifacts-jobs = ["./build-binaries", "./build-docker"]
# Publish jobs to run in CI
publish-jobs = ["./publish-pypi", "./publish-wasm"]
# Announcement jobs to run in CI
post-announce-jobs = ["./notify-dependents", "./publish-docs", "./publish-playground"]
# Custom permissions for GitHub Jobs
github-custom-job-permissions = { "build-docker" = { packages = "write", contents = "read" }, "publish-wasm" = { contents = "read", id-token = "write", packages = "write" } }
# Whether to install an updater program
install-updater = false

25
LICENSE
View File

@@ -1371,3 +1371,28 @@ are:
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
"""
- pydoclint, licensed as follows:
"""
MIT License
Copyright (c) 2023 jsh9
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
"""

View File

@@ -29,14 +29,14 @@ An extremely fast Python linter and code formatter, written in Rust.
- 🐍 Installable via `pip`
- 🛠️ `pyproject.toml` support
- 🤝 Python 3.13 compatibility
- ⚖️ Drop-in parity with [Flake8](https://docs.astral.sh/ruff/faq/#how-does-ruff-compare-to-flake8), isort, and Black
- ⚖️ Drop-in parity with [Flake8](https://docs.astral.sh/ruff/faq/#how-does-ruffs-linter-compare-to-flake8), isort, and [Black](https://docs.astral.sh/ruff/faq/#how-does-ruffs-formatter-compare-to-black)
- 📦 Built-in caching, to avoid re-analyzing unchanged files
- 🔧 Fix support, for automatic error correction (e.g., automatically remove unused imports)
- 📏 Over [800 built-in rules](https://docs.astral.sh/ruff/rules/), with native re-implementations
of popular Flake8 plugins, like flake8-bugbear
- ⌨️ First-party [editor integrations](https://docs.astral.sh/ruff/integrations/) for
[VS Code](https://github.com/astral-sh/ruff-vscode) and [more](https://github.com/astral-sh/ruff-lsp)
- 🌎 Monorepo-friendly, with [hierarchical and cascading configuration](https://docs.astral.sh/ruff/configuration/#pyprojecttoml-discovery)
[VS Code](https://github.com/astral-sh/ruff-vscode) and [more](https://docs.astral.sh/ruff/editors/setup)
- 🌎 Monorepo-friendly, with [hierarchical and cascading configuration](https://docs.astral.sh/ruff/configuration/#config-file-discovery)
Ruff aims to be orders of magnitude faster than alternative tools while integrating more
functionality behind a single, common interface.
@@ -119,7 +119,25 @@ For more, see the [documentation](https://docs.astral.sh/ruff/).
Ruff is available as [`ruff`](https://pypi.org/project/ruff/) on PyPI:
```shell
# With pip.
pip install ruff
# With pipx.
pipx install ruff
```
Starting with version `0.5.0`, Ruff can be installed with our standalone installers:
```shell
# On macOS and Linux.
curl -LsSf https://astral.sh/ruff/install.sh | sh
# On Windows.
powershell -c "irm https://astral.sh/ruff/install.ps1 | iex"
# For a specific version.
curl -LsSf https://astral.sh/ruff/0.5.7/install.sh | sh
powershell -c "irm https://astral.sh/ruff/0.5.7/install.ps1 | iex"
```
You can also install Ruff via [Homebrew](https://formulae.brew.sh/formula/ruff), [Conda](https://anaconda.org/conda-forge/ruff),
@@ -152,7 +170,7 @@ Ruff can also be used as a [pre-commit](https://pre-commit.com/) hook via [`ruff
```yaml
- repo: https://github.com/astral-sh/ruff-pre-commit
# Ruff version.
rev: v0.4.10
rev: v0.5.7
hooks:
# Run the linter.
- id: ruff
@@ -161,8 +179,7 @@ Ruff can also be used as a [pre-commit](https://pre-commit.com/) hook via [`ruff
- id: ruff-format
```
Ruff can also be used as a [VS Code extension](https://github.com/astral-sh/ruff-vscode) or
alongside any other editor through the [Ruff LSP](https://github.com/astral-sh/ruff-lsp).
Ruff can also be used as a [VS Code extension](https://github.com/astral-sh/ruff-vscode) or with [various other editors](https://docs.astral.sh/ruff/editors/setup).
Ruff can also be used as a [GitHub Action](https://github.com/features/actions) via
[`ruff-action`](https://github.com/chartboost/ruff-action):
@@ -334,7 +351,6 @@ quality tools, including:
- [flake8-super](https://pypi.org/project/flake8-super/)
- [flake8-tidy-imports](https://pypi.org/project/flake8-tidy-imports/)
- [flake8-todos](https://pypi.org/project/flake8-todos/)
- [flake8-trio](https://pypi.org/project/flake8-trio/)
- [flake8-type-checking](https://pypi.org/project/flake8-type-checking/)
- [flake8-use-pathlib](https://pypi.org/project/flake8-use-pathlib/)
- [flynt](https://pypi.org/project/flynt/) ([#2102](https://github.com/astral-sh/ruff/issues/2102))
@@ -407,6 +423,7 @@ Ruff is used by a number of major open-source projects and companies, including:
- [Dagger](https://github.com/dagger/dagger)
- [Dagster](https://github.com/dagster-io/dagster)
- Databricks ([MLflow](https://github.com/mlflow/mlflow))
- [Dify](https://github.com/langgenius/dify)
- [FastAPI](https://github.com/tiangolo/fastapi)
- [Godot](https://github.com/godotengine/godot)
- [Gradio](https://github.com/gradio-app/gradio)
@@ -417,6 +434,7 @@ Ruff is used by a number of major open-source projects and companies, including:
- Hugging Face ([Transformers](https://github.com/huggingface/transformers),
[Datasets](https://github.com/huggingface/datasets),
[Diffusers](https://github.com/huggingface/diffusers))
- IBM ([Qiskit](https://github.com/Qiskit/qiskit))
- ING Bank ([popmon](https://github.com/ing-bank/popmon), [probatus](https://github.com/ing-bank/probatus))
- [Ibis](https://github.com/ibis-project/ibis)
- [ivy](https://github.com/unifyai/ivy)
@@ -442,6 +460,7 @@ Ruff is used by a number of major open-source projects and companies, including:
- [NumPyro](https://github.com/pyro-ppl/numpyro)
- [ONNX](https://github.com/onnx/onnx)
- [OpenBB](https://github.com/OpenBB-finance/OpenBBTerminal)
- [Open Wine Components](https://github.com/Open-Wine-Components/umu-launcher)
- [PDM](https://github.com/pdm-project/pdm)
- [PaddlePaddle](https://github.com/PaddlePaddle/Paddle)
- [Pandas](https://github.com/pandas-dev/pandas)

View File

@@ -1,6 +1,6 @@
[files]
# https://github.com/crate-ci/typos/issues/868
extend-exclude = ["crates/red_knot/vendor/**/*", "**/resources/**/*", "**/snapshots/**/*"]
extend-exclude = ["crates/red_knot_python_semantic/vendor/**/*", "**/resources/**/*", "**/snapshots/**/*"]
[default.extend-words]
"arange" = "arange" # e.g. `numpy.arange`
@@ -16,5 +16,6 @@ jod = "jod" # e.g., `jod-thread`
[default]
extend-ignore-re = [
# Line ignore with trailing "spellchecker:disable-line"
"(?Rm)^.*#\\s*spellchecker:disable-line$"
"(?Rm)^.*#\\s*spellchecker:disable-line$",
"LICENSEs",
]

View File

@@ -10,4 +10,12 @@ doc-valid-idents = [
"SCREAMING_SNAKE_CASE",
"SQLAlchemy",
"StackOverflow",
"PyCharm",
]
ignore-interior-mutability = [
# Interned is read-only. The wrapped `Rc` never gets updated.
"ruff_formatter::format_element::Interned",
# The expression is read-only.
"ruff_python_ast::hashable::HashableExpr",
]

View File

@@ -1,6 +1,6 @@
[package]
name = "red_knot"
version = "0.1.0"
version = "0.0.0"
edition.workspace = true
rust-version.workspace = true
homepage.workspace = true
@@ -13,38 +13,27 @@ license.workspace = true
[dependencies]
red_knot_python_semantic = { workspace = true }
red_knot_workspace = { workspace = true }
red_knot_server = { workspace = true }
ruff_python_parser = { workspace = true }
ruff_python_ast = { workspace = true }
ruff_python_stdlib = { workspace = true }
ruff_text_size = { workspace = true }
ruff_index = { workspace = true }
ruff_notebook = { workspace = true }
ruff_db = { workspace = true, features = ["os", "cache"] }
anyhow = { workspace = true }
bitflags = { workspace = true }
chrono = { workspace = true }
clap = { workspace = true, features = ["wrap_help"] }
colored = { workspace = true }
countme = { workspace = true, features = ["enable"] }
crossbeam = { workspace = true }
ctrlc = { version = "3.4.4" }
dashmap = { workspace = true }
hashbrown = { workspace = true }
indexmap = { workspace = true }
is-macro = { workspace = true }
notify = { workspace = true }
parking_lot = { workspace = true }
rayon = { workspace = true }
rustc-hash = { workspace = true }
smol_str = { version = "0.2.1" }
tracing = { workspace = true }
tracing-subscriber = { workspace = true }
salsa = { workspace = true }
tracing = { workspace = true, features = ["release_max_level_debug"] }
tracing-subscriber = { workspace = true, features = ["env-filter", "fmt"] }
tracing-flame = { workspace = true }
tracing-tree = { workspace = true }
zip = { workspace = true }
[build-dependencies]
zip = { workspace = true }
walkdir = { workspace = true }
[dev-dependencies]
insta = { workspace = true }
filetime = { workspace = true }
tempfile = { workspace = true }
[lints]

View File

@@ -1,9 +0,0 @@
# Red Knot
The Red Knot crate contains code working towards multifile analysis, type inference and, ultimately, type-checking. It's very much a work in progress for now.
## Vendored types for the stdlib
Red Knot vendors [typeshed](https://github.com/python/typeshed)'s stubs for the standard library. The vendored stubs can be found in `crates/red_knot/vendor/typeshed`. The file `crates/red_knot/vendor/typeshed/source_commit.txt` tells you the typeshed commit that our vendored stdlib stubs currently correspond to.
The typeshed stubs are updated every two weeks via an automated PR using the `sync_typeshed.yaml` workflow in the `.github/workflows` directory. This workflow can also be triggered at any time via [workflow dispatch](https://docs.github.com/en/actions/using-workflows/manually-running-a-workflow#running-a-workflow).

Binary file not shown.

After

Width:  |  Height:  |  Size: 40 KiB

View File

@@ -0,0 +1,123 @@
# Tracing
Traces are a useful tool to narrow down the location of a bug or, at least, to understand why the compiler is doing a particular thing.
Note, tracing messages with severity `debug` or greater are user-facing. They should be phrased accordingly.
Tracing spans are only shown when using `-vvv`.
## Verbosity levels
The CLI supports different verbosity levels.
- default: Only show errors and warnings.
- `-v` activates `info!`: Show generally useful information such as paths of configuration files, detected platform, etc., but it's not a lot of messages, it's something you'll activate in CI by default. cargo build e.g. shows you which packages are fresh.
- `-vv` activates `debug!` and timestamps: This should be enough information to get to the bottom of bug reports. When you're processing many packages or files, you'll get pages and pages of output, but each line is link to a specific action or state change.
- `-vvv` activates `trace!` (only in debug builds) and shows tracing-spans: At this level, you're logging everything. Most of this is wasted, it's really slow, we dump e.g. the entire resolution graph. Only useful to developers, and you almost certainly want to use `RED_KNOT_LOG` to filter it down to the area your investigating.
## `RED_KNOT_LOG`
By default, the CLI shows messages from the `ruff` and `red_knot` crates. Tracing messages from other crates are not shown.
The `RED_KNOT_LOG` environment variable allows you to customize which messages are shown by specifying one
or more [filter directives](https://docs.rs/tracing-subscriber/latest/tracing_subscriber/filter/struct.EnvFilter.html#directives).
### Examples
#### Show all debug messages
Shows debug messages from all crates.
```bash
RED_KNOT_LOG=debug
```
#### Show salsa query execution messages
Show the salsa `execute: my_query` messages in addition to all red knot messages.
```bash
RED_KNOT_LOG=ruff=trace,red_knot=trace,salsa=info
```
#### Show typing traces
Only show traces for the `red_knot_python_semantic::types` module.
```bash
RED_KNOT_LOG="red_knot_python_semantic::types"
```
Note: Ensure that you use `-vvv` to see tracing spans.
#### Show messages for a single file
Shows all messages that are inside of a span for a specific file.
```bash
RED_KNOT_LOG=red_knot[{file=/home/micha/astral/test/x.py}]=trace
```
**Note**: Tracing still shows all spans because tracing can't know at the time of entering the span
whether one if its children has the file `x.py`.
**Note**: Salsa currently logs the entire memoized values. In our case, the source text and parsed AST.
This very quickly leads to extremely long outputs.
## Tracing and Salsa
Be mindful about using `tracing` in Salsa queries, especially when using `warn` or `error` because it isn't guaranteed
that the query will execute after restoring from a persistent cache. In which case the user won't see the message.
For example, don't use `tracing` to show the user a message when generating a lint violation failed
because the message would only be shown when linting the file the first time, but not on subsequent analysis
runs or when restoring from a persistent cache. This can be confusing for users because they
don't understand why a specific lint violation isn't raised. Instead, change your
query to return the failure as part of the query's result or use a Salsa accumulator.
## Tracing in tests
You can use `ruff_db::testing::setup_logging` or `ruff_db::testing::setup_logging_with_filter` to set up logging in tests.
```rust
use ruff_db::testing::setup_logging;
#[test]
fn test() {
let _logging = setup_logging();
tracing::info!("This message will be printed to stderr");
}
```
Note: Most test runners capture stderr and only show its output when a test fails.
Note also that `setup_logging` only sets up logging for the current thread because [`set_global_default`](https://docs.rs/tracing/latest/tracing/subscriber/fn.set_global_default.html) can only be
called **once**.
## Release builds
`trace!` events are removed in release builds.
## Profiling
Red Knot generates a folded stack trace to the current directory named `tracing.folded` when setting the environment variable `RED_KNOT_LOG_PROFILE` to `1` or `true`.
```bash
RED_KNOT_LOG_PROFILE=1 red_knot -- --current-directory=../test -vvv
```
You can convert the textual representation into a visual one using `inferno`.
```shell
cargo install inferno
```
```shell
# flamegraph
cat tracing.folded | inferno-flamegraph > tracing-flamegraph.svg
# flamechart
cat tracing.folded | inferno-flamegraph --flamechart > tracing-flamechart.svg
```
![Example flamegraph](./tracing-flamegraph.png)
See [`tracing-flame`](https://crates.io/crates/tracing-flame) for more details.

View File

@@ -1,418 +0,0 @@
use std::any::type_name;
use std::fmt::{Debug, Formatter};
use std::hash::{Hash, Hasher};
use std::marker::PhantomData;
use rustc_hash::FxHashMap;
use ruff_index::{Idx, IndexVec};
use ruff_python_ast::visitor::source_order;
use ruff_python_ast::visitor::source_order::{SourceOrderVisitor, TraversalSignal};
use ruff_python_ast::{
AnyNodeRef, AstNode, ExceptHandler, ExceptHandlerExceptHandler, Expr, MatchCase, ModModule,
NodeKind, Parameter, Stmt, StmtAnnAssign, StmtAssign, StmtAugAssign, StmtClassDef,
StmtFunctionDef, StmtGlobal, StmtImport, StmtImportFrom, StmtNonlocal, StmtTypeAlias,
TypeParam, TypeParamParamSpec, TypeParamTypeVar, TypeParamTypeVarTuple, WithItem,
};
use ruff_text_size::{Ranged, TextRange};
/// A type agnostic ID that uniquely identifies an AST node in a file.
#[ruff_index::newtype_index]
pub struct AstId;
/// A typed ID that uniquely identifies an AST node in a file.
///
/// This is different from [`AstId`] in that it is a combination of ID and the type of the node the ID identifies.
/// Typing the ID prevents mixing IDs of different node types and allows to restrict the API to only accept
/// nodes for which an ID has been created (not all AST nodes get an ID).
pub struct TypedAstId<N: HasAstId> {
erased: AstId,
_marker: PhantomData<fn() -> N>,
}
impl<N: HasAstId> TypedAstId<N> {
/// Upcasts this ID from a more specific node type to a more general node type.
pub fn upcast<M: HasAstId>(self) -> TypedAstId<M>
where
N: Into<M>,
{
TypedAstId {
erased: self.erased,
_marker: PhantomData,
}
}
}
impl<N: HasAstId> Copy for TypedAstId<N> {}
impl<N: HasAstId> Clone for TypedAstId<N> {
fn clone(&self) -> Self {
*self
}
}
impl<N: HasAstId> PartialEq for TypedAstId<N> {
fn eq(&self, other: &Self) -> bool {
self.erased == other.erased
}
}
impl<N: HasAstId> Eq for TypedAstId<N> {}
impl<N: HasAstId> Hash for TypedAstId<N> {
fn hash<H: Hasher>(&self, state: &mut H) {
self.erased.hash(state);
}
}
impl<N: HasAstId> Debug for TypedAstId<N> {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
f.debug_tuple("TypedAstId")
.field(&self.erased)
.field(&type_name::<N>())
.finish()
}
}
pub struct AstIds {
ids: IndexVec<AstId, NodeKey>,
reverse: FxHashMap<NodeKey, AstId>,
}
impl AstIds {
// TODO rust analyzer doesn't allocate an ID for every node. It only allocates ids for
// nodes with a corresponding HIR element, that is nodes that are definitions.
pub fn from_module(module: &ModModule) -> Self {
let mut visitor = AstIdsVisitor::default();
// TODO: visit_module?
// Make sure we visit the root
visitor.create_id(module);
visitor.visit_body(&module.body);
while let Some(deferred) = visitor.deferred.pop() {
match deferred {
DeferredNode::FunctionDefinition(def) => {
def.visit_source_order(&mut visitor);
}
DeferredNode::ClassDefinition(def) => def.visit_source_order(&mut visitor),
}
}
AstIds {
ids: visitor.ids,
reverse: visitor.reverse,
}
}
/// Returns the ID to the root node.
pub fn root(&self) -> NodeKey {
self.ids[AstId::new(0)]
}
/// Returns the [`TypedAstId`] for a node.
pub fn ast_id<N: HasAstId>(&self, node: &N) -> TypedAstId<N> {
let key = node.syntax_node_key();
TypedAstId {
erased: self.reverse.get(&key).copied().unwrap(),
_marker: PhantomData,
}
}
/// Returns the [`TypedAstId`] for the node identified with the given [`TypedNodeKey`].
pub fn ast_id_for_key<N: HasAstId>(&self, node: &TypedNodeKey<N>) -> TypedAstId<N> {
let ast_id = self.ast_id_for_node_key(node.inner);
TypedAstId {
erased: ast_id,
_marker: PhantomData,
}
}
/// Returns the untyped [`AstId`] for the node identified by the given `node` key.
pub fn ast_id_for_node_key(&self, node: NodeKey) -> AstId {
self.reverse
.get(&node)
.copied()
.expect("Can't find node in AstIds map.")
}
/// Returns the [`TypedNodeKey`] for the node identified by the given [`TypedAstId`].
pub fn key<N: HasAstId>(&self, id: TypedAstId<N>) -> TypedNodeKey<N> {
let syntax_key = self.ids[id.erased];
TypedNodeKey::new(syntax_key).unwrap()
}
pub fn node_key<H: HasAstId>(&self, id: TypedAstId<H>) -> NodeKey {
self.ids[id.erased]
}
}
impl std::fmt::Debug for AstIds {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
let mut map = f.debug_map();
for (key, value) in self.ids.iter_enumerated() {
map.entry(&key, &value);
}
map.finish()
}
}
impl PartialEq for AstIds {
fn eq(&self, other: &Self) -> bool {
self.ids == other.ids
}
}
impl Eq for AstIds {}
#[derive(Default)]
struct AstIdsVisitor<'a> {
ids: IndexVec<AstId, NodeKey>,
reverse: FxHashMap<NodeKey, AstId>,
deferred: Vec<DeferredNode<'a>>,
}
impl<'a> AstIdsVisitor<'a> {
fn create_id<A: HasAstId>(&mut self, node: &A) {
let node_key = node.syntax_node_key();
let id = self.ids.push(node_key);
self.reverse.insert(node_key, id);
}
}
impl<'a> SourceOrderVisitor<'a> for AstIdsVisitor<'a> {
fn visit_stmt(&mut self, stmt: &'a Stmt) {
match stmt {
Stmt::FunctionDef(def) => {
self.create_id(def);
self.deferred.push(DeferredNode::FunctionDefinition(def));
return;
}
// TODO defer visiting the assignment body, type alias parameters etc?
Stmt::ClassDef(def) => {
self.create_id(def);
self.deferred.push(DeferredNode::ClassDefinition(def));
return;
}
Stmt::Expr(_) => {
// Skip
return;
}
Stmt::Return(_) => {}
Stmt::Delete(_) => {}
Stmt::Assign(assignment) => self.create_id(assignment),
Stmt::AugAssign(assignment) => {
self.create_id(assignment);
}
Stmt::AnnAssign(assignment) => self.create_id(assignment),
Stmt::TypeAlias(assignment) => self.create_id(assignment),
Stmt::For(_) => {}
Stmt::While(_) => {}
Stmt::If(_) => {}
Stmt::With(_) => {}
Stmt::Match(_) => {}
Stmt::Raise(_) => {}
Stmt::Try(_) => {}
Stmt::Assert(_) => {}
Stmt::Import(import) => self.create_id(import),
Stmt::ImportFrom(import_from) => self.create_id(import_from),
Stmt::Global(global) => self.create_id(global),
Stmt::Nonlocal(non_local) => self.create_id(non_local),
Stmt::Pass(_) => {}
Stmt::Break(_) => {}
Stmt::Continue(_) => {}
Stmt::IpyEscapeCommand(_) => {}
}
source_order::walk_stmt(self, stmt);
}
fn visit_expr(&mut self, _expr: &'a Expr) {}
fn visit_parameter(&mut self, parameter: &'a Parameter) {
self.create_id(parameter);
source_order::walk_parameter(self, parameter);
}
fn visit_except_handler(&mut self, except_handler: &'a ExceptHandler) {
match except_handler {
ExceptHandler::ExceptHandler(except_handler) => {
self.create_id(except_handler);
}
}
source_order::walk_except_handler(self, except_handler);
}
fn visit_with_item(&mut self, with_item: &'a WithItem) {
self.create_id(with_item);
source_order::walk_with_item(self, with_item);
}
fn visit_match_case(&mut self, match_case: &'a MatchCase) {
self.create_id(match_case);
source_order::walk_match_case(self, match_case);
}
fn visit_type_param(&mut self, type_param: &'a TypeParam) {
self.create_id(type_param);
}
}
enum DeferredNode<'a> {
FunctionDefinition(&'a StmtFunctionDef),
ClassDefinition(&'a StmtClassDef),
}
#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)]
pub struct TypedNodeKey<N: AstNode> {
/// The type erased node key.
inner: NodeKey,
_marker: PhantomData<fn() -> N>,
}
impl<N: AstNode> TypedNodeKey<N> {
pub fn from_node(node: &N) -> Self {
let inner = NodeKey::from_node(node.as_any_node_ref());
Self {
inner,
_marker: PhantomData,
}
}
pub fn new(node_key: NodeKey) -> Option<Self> {
N::can_cast(node_key.kind).then_some(TypedNodeKey {
inner: node_key,
_marker: PhantomData,
})
}
pub fn resolve<'a>(&self, root: AnyNodeRef<'a>) -> Option<N::Ref<'a>> {
let node_ref = self.inner.resolve(root)?;
Some(N::cast_ref(node_ref).unwrap())
}
pub fn resolve_unwrap<'a>(&self, root: AnyNodeRef<'a>) -> N::Ref<'a> {
self.resolve(root).expect("node should resolve")
}
pub fn erased(&self) -> &NodeKey {
&self.inner
}
}
struct FindNodeKeyVisitor<'a> {
key: NodeKey,
result: Option<AnyNodeRef<'a>>,
}
impl<'a> SourceOrderVisitor<'a> for FindNodeKeyVisitor<'a> {
fn enter_node(&mut self, node: AnyNodeRef<'a>) -> TraversalSignal {
if self.result.is_some() {
return TraversalSignal::Skip;
}
if node.range() == self.key.range && node.kind() == self.key.kind {
self.result = Some(node);
TraversalSignal::Skip
} else if node.range().contains_range(self.key.range) {
TraversalSignal::Traverse
} else {
TraversalSignal::Skip
}
}
fn visit_body(&mut self, body: &'a [Stmt]) {
// TODO it would be more efficient to use binary search instead of linear
for stmt in body {
if stmt.range().start() > self.key.range.end() {
break;
}
self.visit_stmt(stmt);
}
}
}
// TODO an alternative to this is to have a `NodeId` on each node (in increasing order depending on the position).
// This would allow to reduce the size of this to a u32.
// What would be nice if we could use an `Arc::weak_ref` here but that only works if we use
// `Arc` internally
// TODO: Implement the logic to resolve a node, given a db (and the correct file).
#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)]
pub struct NodeKey {
kind: NodeKind,
range: TextRange,
}
impl NodeKey {
pub fn from_node(node: AnyNodeRef) -> Self {
NodeKey {
kind: node.kind(),
range: node.range(),
}
}
pub fn resolve<'a>(&self, root: AnyNodeRef<'a>) -> Option<AnyNodeRef<'a>> {
// We need to do a binary search here. Only traverse into a node if the range is withint the node
let mut visitor = FindNodeKeyVisitor {
key: *self,
result: None,
};
if visitor.enter_node(root) == TraversalSignal::Traverse {
root.visit_preorder(&mut visitor);
}
visitor.result
}
}
/// Marker trait implemented by AST nodes for which we extract the `AstId`.
pub trait HasAstId: AstNode {
fn node_key(&self) -> TypedNodeKey<Self>
where
Self: Sized,
{
TypedNodeKey {
inner: self.syntax_node_key(),
_marker: PhantomData,
}
}
fn syntax_node_key(&self) -> NodeKey {
NodeKey {
kind: self.as_any_node_ref().kind(),
range: self.range(),
}
}
}
impl HasAstId for StmtFunctionDef {}
impl HasAstId for StmtClassDef {}
impl HasAstId for StmtAnnAssign {}
impl HasAstId for StmtAugAssign {}
impl HasAstId for StmtAssign {}
impl HasAstId for StmtTypeAlias {}
impl HasAstId for ModModule {}
impl HasAstId for StmtImport {}
impl HasAstId for StmtImportFrom {}
impl HasAstId for Parameter {}
impl HasAstId for TypeParam {}
impl HasAstId for Stmt {}
impl HasAstId for TypeParamTypeVar {}
impl HasAstId for TypeParamTypeVarTuple {}
impl HasAstId for TypeParamParamSpec {}
impl HasAstId for StmtGlobal {}
impl HasAstId for StmtNonlocal {}
impl HasAstId for ExceptHandlerExceptHandler {}
impl HasAstId for WithItem {}
impl HasAstId for MatchCase {}

View File

@@ -1,165 +0,0 @@
use std::fmt::Formatter;
use std::hash::Hash;
use std::sync::atomic::{AtomicUsize, Ordering};
use crate::db::QueryResult;
use dashmap::mapref::entry::Entry;
use crate::FxDashMap;
/// Simple key value cache that locks on a per-key level.
pub struct KeyValueCache<K, V> {
map: FxDashMap<K, V>,
statistics: CacheStatistics,
}
impl<K, V> KeyValueCache<K, V>
where
K: Eq + Hash + Clone,
V: Clone,
{
pub fn try_get(&self, key: &K) -> Option<V> {
if let Some(existing) = self.map.get(key) {
self.statistics.hit();
Some(existing.clone())
} else {
self.statistics.miss();
None
}
}
pub fn get<F>(&self, key: &K, compute: F) -> QueryResult<V>
where
F: FnOnce(&K) -> QueryResult<V>,
{
Ok(match self.map.entry(key.clone()) {
Entry::Occupied(cached) => {
self.statistics.hit();
cached.get().clone()
}
Entry::Vacant(vacant) => {
self.statistics.miss();
let value = compute(key)?;
vacant.insert(value.clone());
value
}
})
}
pub fn set(&mut self, key: K, value: V) {
self.map.insert(key, value);
}
pub fn remove(&mut self, key: &K) -> Option<V> {
self.map.remove(key).map(|(_, value)| value)
}
pub fn clear(&mut self) {
self.map.clear();
self.map.shrink_to_fit();
}
pub fn statistics(&self) -> Option<Statistics> {
self.statistics.to_statistics()
}
}
impl<K, V> Default for KeyValueCache<K, V>
where
K: Eq + Hash,
V: Clone,
{
fn default() -> Self {
Self {
map: FxDashMap::default(),
statistics: CacheStatistics::default(),
}
}
}
impl<K, V> std::fmt::Debug for KeyValueCache<K, V>
where
K: std::fmt::Debug + Eq + Hash,
V: std::fmt::Debug,
{
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
let mut debug = f.debug_map();
for entry in &self.map {
debug.entry(&entry.value(), &entry.key());
}
debug.finish()
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Statistics {
pub hits: usize,
pub misses: usize,
}
impl Statistics {
#[allow(clippy::cast_precision_loss)]
pub fn hit_rate(&self) -> Option<f64> {
if self.hits + self.misses == 0 {
return None;
}
Some((self.hits as f64) / (self.hits + self.misses) as f64)
}
}
#[cfg(debug_assertions)]
pub type CacheStatistics = DebugStatistics;
#[cfg(not(debug_assertions))]
pub type CacheStatistics = ReleaseStatistics;
pub trait StatisticsRecorder {
fn hit(&self);
fn miss(&self);
fn to_statistics(&self) -> Option<Statistics>;
}
#[derive(Debug, Default)]
pub struct DebugStatistics {
hits: AtomicUsize,
misses: AtomicUsize,
}
impl StatisticsRecorder for DebugStatistics {
// TODO figure out appropriate Ordering
fn hit(&self) {
self.hits.fetch_add(1, Ordering::SeqCst);
}
fn miss(&self) {
self.misses.fetch_add(1, Ordering::SeqCst);
}
fn to_statistics(&self) -> Option<Statistics> {
let hits = self.hits.load(Ordering::SeqCst);
let misses = self.misses.load(Ordering::SeqCst);
Some(Statistics { hits, misses })
}
}
#[derive(Debug, Default)]
pub struct ReleaseStatistics;
impl StatisticsRecorder for ReleaseStatistics {
#[inline]
fn hit(&self) {}
#[inline]
fn miss(&self) {}
#[inline]
fn to_statistics(&self) -> Option<Statistics> {
None
}
}

View File

@@ -1,42 +0,0 @@
use std::sync::atomic::AtomicBool;
use std::sync::Arc;
#[derive(Debug, Clone, Default)]
pub struct CancellationTokenSource {
signal: Arc<AtomicBool>,
}
impl CancellationTokenSource {
pub fn new() -> Self {
Self {
signal: Arc::new(AtomicBool::new(false)),
}
}
#[tracing::instrument(level = "trace", skip_all)]
pub fn cancel(&self) {
self.signal.store(true, std::sync::atomic::Ordering::SeqCst);
}
pub fn is_cancelled(&self) -> bool {
self.signal.load(std::sync::atomic::Ordering::SeqCst)
}
pub fn token(&self) -> CancellationToken {
CancellationToken {
signal: self.signal.clone(),
}
}
}
#[derive(Clone, Debug)]
pub struct CancellationToken {
signal: Arc<AtomicBool>,
}
impl CancellationToken {
/// Returns `true` if cancellation has been requested.
pub fn is_cancelled(&self) -> bool {
self.signal.load(std::sync::atomic::Ordering::SeqCst)
}
}

View File

@@ -1,248 +0,0 @@
use std::sync::Arc;
pub use jars::{HasJar, HasJars};
pub use query::{QueryError, QueryResult};
pub use runtime::DbRuntime;
pub use storage::JarsStorage;
use crate::files::FileId;
use crate::lint::{LintSemanticStorage, LintSyntaxStorage};
use crate::module::ModuleResolver;
use crate::parse::ParsedStorage;
use crate::semantic::SemanticIndexStorage;
use crate::semantic::TypeStore;
use crate::source::SourceStorage;
mod jars;
mod query;
mod runtime;
mod storage;
pub trait Database {
/// Returns a reference to the runtime of the current worker.
fn runtime(&self) -> &DbRuntime;
/// Returns a mutable reference to the runtime. Only one worker can hold a mutable reference to the runtime.
fn runtime_mut(&mut self) -> &mut DbRuntime;
/// Returns `Ok` if the queries have not been cancelled and `Err(QueryError::Cancelled)` otherwise.
fn cancelled(&self) -> QueryResult<()> {
self.runtime().cancelled()
}
/// Returns `true` if the queries have been cancelled.
fn is_cancelled(&self) -> bool {
self.runtime().is_cancelled()
}
}
/// Database that supports running queries from multiple threads.
pub trait ParallelDatabase: Database + Send {
/// Creates a snapshot of the database state that can be used to query the database in another thread.
///
/// The snapshot is a read-only view of the database but query results are shared between threads.
/// All queries will be automatically cancelled when applying any mutations (calling [`HasJars::jars_mut`])
/// to the database (not the snapshot, because they're readonly).
///
/// ## Creating a snapshot
///
/// Creating a snapshot of the database's jars is cheap but creating a snapshot of
/// other state stored on the database might require deep-cloning data. That's why you should
/// avoid creating snapshots in a hot function (e.g. don't create a snapshot for each file, instead
/// create a snapshot when scheduling the check of an entire program).
///
/// ## Salsa compatibility
/// Salsa prohibits creating a snapshot while running a local query (it's fine if other workers run a query) [[source](https://github.com/salsa-rs/salsa/issues/80)].
/// We should avoid creating snapshots while running a query because we might want to adopt Salsa in the future (if we can figure out persistent caching).
/// Unfortunately, the infrastructure doesn't provide an automated way of knowing when a query is run, that's
/// why we have to "enforce" this constraint manually.
#[must_use]
fn snapshot(&self) -> Snapshot<Self>;
}
pub trait DbWithJar<Jar>: Database + HasJar<Jar> {}
/// Readonly snapshot of a database.
///
/// ## Dead locks
/// A snapshot should always be dropped as soon as it is no longer necessary to run queries.
/// Storing the snapshot without running a query or periodically checking if cancellation was requested
/// can lead to deadlocks because mutating the [`Database`] requires cancels all pending queries
/// and waiting for all [`Snapshot`]s to be dropped.
#[derive(Debug)]
pub struct Snapshot<DB: ?Sized>
where
DB: ParallelDatabase,
{
db: DB,
}
impl<DB> Snapshot<DB>
where
DB: ParallelDatabase,
{
pub fn new(db: DB) -> Self {
Snapshot { db }
}
}
impl<DB> std::ops::Deref for Snapshot<DB>
where
DB: ParallelDatabase,
{
type Target = DB;
fn deref(&self) -> &DB {
&self.db
}
}
pub trait Upcast<T: ?Sized> {
fn upcast(&self) -> &T;
}
// Red knot specific databases code.
pub trait SourceDb: DbWithJar<SourceJar> {
// queries
fn file_id(&self, path: &std::path::Path) -> FileId;
fn file_path(&self, file_id: FileId) -> Arc<std::path::Path>;
}
pub trait SemanticDb: SourceDb + DbWithJar<SemanticJar> + Upcast<dyn SourceDb> {}
pub trait LintDb: SemanticDb + DbWithJar<LintJar> + Upcast<dyn SemanticDb> {}
pub trait Db: LintDb + Upcast<dyn LintDb> {}
#[derive(Debug, Default)]
pub struct SourceJar {
pub sources: SourceStorage,
pub parsed: ParsedStorage,
}
#[derive(Debug, Default)]
pub struct SemanticJar {
pub module_resolver: ModuleResolver,
pub semantic_indices: SemanticIndexStorage,
pub type_store: TypeStore,
}
#[derive(Debug, Default)]
pub struct LintJar {
pub lint_syntax: LintSyntaxStorage,
pub lint_semantic: LintSemanticStorage,
}
#[cfg(test)]
pub(crate) mod tests {
use std::path::Path;
use std::sync::Arc;
use crate::db::{
Database, DbRuntime, DbWithJar, HasJar, HasJars, JarsStorage, LintDb, LintJar, QueryResult,
SourceDb, SourceJar, Upcast,
};
use crate::files::{FileId, Files};
use super::{SemanticDb, SemanticJar};
// This can be a partial database used in a single crate for testing.
// It would hold fewer data than the full database.
#[derive(Debug, Default)]
pub(crate) struct TestDb {
files: Files,
jars: JarsStorage<Self>,
}
impl HasJar<SourceJar> for TestDb {
fn jar(&self) -> QueryResult<&SourceJar> {
Ok(&self.jars()?.0)
}
fn jar_mut(&mut self) -> &mut SourceJar {
&mut self.jars_mut().0
}
}
impl HasJar<SemanticJar> for TestDb {
fn jar(&self) -> QueryResult<&SemanticJar> {
Ok(&self.jars()?.1)
}
fn jar_mut(&mut self) -> &mut SemanticJar {
&mut self.jars_mut().1
}
}
impl HasJar<LintJar> for TestDb {
fn jar(&self) -> QueryResult<&LintJar> {
Ok(&self.jars()?.2)
}
fn jar_mut(&mut self) -> &mut LintJar {
&mut self.jars_mut().2
}
}
impl SourceDb for TestDb {
fn file_id(&self, path: &Path) -> FileId {
self.files.intern(path)
}
fn file_path(&self, file_id: FileId) -> Arc<Path> {
self.files.path(file_id)
}
}
impl DbWithJar<SourceJar> for TestDb {}
impl Upcast<dyn SourceDb> for TestDb {
fn upcast(&self) -> &(dyn SourceDb + 'static) {
self
}
}
impl SemanticDb for TestDb {}
impl DbWithJar<SemanticJar> for TestDb {}
impl Upcast<dyn SemanticDb> for TestDb {
fn upcast(&self) -> &(dyn SemanticDb + 'static) {
self
}
}
impl LintDb for TestDb {}
impl Upcast<dyn LintDb> for TestDb {
fn upcast(&self) -> &(dyn LintDb + 'static) {
self
}
}
impl DbWithJar<LintJar> for TestDb {}
impl HasJars for TestDb {
type Jars = (SourceJar, SemanticJar, LintJar);
fn jars(&self) -> QueryResult<&Self::Jars> {
self.jars.jars()
}
fn jars_mut(&mut self) -> &mut Self::Jars {
self.jars.jars_mut()
}
}
impl Database for TestDb {
fn runtime(&self) -> &DbRuntime {
self.jars.runtime()
}
fn runtime_mut(&mut self) -> &mut DbRuntime {
self.jars.runtime_mut()
}
}
}

View File

@@ -1,37 +0,0 @@
use crate::db::query::QueryResult;
/// Gives access to a specific jar in the database.
///
/// Nope, the terminology isn't borrowed from Java but from Salsa <https://salsa-rs.github.io/salsa/>,
/// which is an analogy to storing the salsa in different jars.
///
/// The basic idea is that each crate can define its own jar and the jars can be combined to a single
/// database in the top level crate. Each crate also defines its own `Database` trait. The combination of
/// `Database` trait and the jar allows to write queries in isolation without having to know how they get composed at the upper levels.
///
/// Salsa further defines a `HasIngredient` trait which slices the jar to a specific storage (e.g. a specific cache).
/// We don't need this just yet because we write our queries by hand. We may want a similar trait if we decide
/// to use a macro to generate the queries.
pub trait HasJar<T> {
/// Gives a read-only reference to the jar.
fn jar(&self) -> QueryResult<&T>;
/// Gives a mutable reference to the jar.
fn jar_mut(&mut self) -> &mut T;
}
/// Gives access to the jars in a database.
pub trait HasJars {
/// A type storing the jars.
///
/// Most commonly, this is a tuple where each jar is a tuple element.
type Jars: Default;
/// Gives access to the underlying jars but tests if the queries have been cancelled.
///
/// Returns `Err(QueryError::Cancelled)` if the queries have been cancelled.
fn jars(&self) -> QueryResult<&Self::Jars>;
/// Gives mutable access to the underlying jars.
fn jars_mut(&mut self) -> &mut Self::Jars;
}

View File

@@ -1,20 +0,0 @@
use std::fmt::{Display, Formatter};
/// Reason why a db query operation failed.
#[derive(Debug, Clone, Copy)]
pub enum QueryError {
/// The query was cancelled because the DB was mutated or the query was cancelled by the host (e.g. on a file change or when pressing CTRL+C).
Cancelled,
}
impl Display for QueryError {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
match self {
QueryError::Cancelled => f.write_str("query was cancelled"),
}
}
}
impl std::error::Error for QueryError {}
pub type QueryResult<T> = Result<T, QueryError>;

View File

@@ -1,41 +0,0 @@
use crate::cancellation::CancellationTokenSource;
use crate::db::{QueryError, QueryResult};
/// Holds the jar agnostic state of the database.
#[derive(Debug, Default)]
pub struct DbRuntime {
/// The cancellation token source used to signal other works that the queries should be aborted and
/// exit at the next possible point.
cancellation_token: CancellationTokenSource,
}
impl DbRuntime {
pub(super) fn snapshot(&self) -> Self {
Self {
cancellation_token: self.cancellation_token.clone(),
}
}
/// Cancels the pending queries of other workers. The current worker cannot have any pending
/// queries because we're holding a mutable reference to the runtime.
pub(super) fn cancel_other_workers(&mut self) {
self.cancellation_token.cancel();
// Set a new cancellation token so that we're in a non-cancelled state again when running the next
// query.
self.cancellation_token = CancellationTokenSource::default();
}
/// Returns `Ok` if the queries have not been cancelled and `Err(QueryError::Cancelled)` otherwise.
pub(super) fn cancelled(&self) -> QueryResult<()> {
if self.cancellation_token.is_cancelled() {
Err(QueryError::Cancelled)
} else {
Ok(())
}
}
/// Returns `true` if the queries have been cancelled.
pub(super) fn is_cancelled(&self) -> bool {
self.cancellation_token.is_cancelled()
}
}

View File

@@ -1,117 +0,0 @@
use std::fmt::Formatter;
use std::sync::Arc;
use crossbeam::sync::WaitGroup;
use crate::db::query::QueryResult;
use crate::db::runtime::DbRuntime;
use crate::db::{HasJars, ParallelDatabase};
/// Stores the jars of a database and the state for each worker.
///
/// Today, all state is shared across all workers, but it may be desired to store data per worker in the future.
pub struct JarsStorage<T>
where
T: HasJars + Sized,
{
// It's important that `jars_wait_group` is declared after `jars` to ensure that `jars` is dropped first.
// See https://doc.rust-lang.org/reference/destructors.html
/// Stores the jars of the database.
jars: Arc<T::Jars>,
/// Used to count the references to `jars`. Allows implementing `jars_mut` without requiring to clone `jars`.
jars_wait_group: WaitGroup,
/// The data agnostic state.
runtime: DbRuntime,
}
impl<Db> JarsStorage<Db>
where
Db: HasJars,
{
pub(super) fn new() -> Self {
Self {
jars: Arc::new(Db::Jars::default()),
jars_wait_group: WaitGroup::default(),
runtime: DbRuntime::default(),
}
}
/// Creates a snapshot of the jars.
///
/// Creating the snapshot is cheap because it doesn't clone the jars, it only increments a ref counter.
#[must_use]
pub fn snapshot(&self) -> JarsStorage<Db>
where
Db: ParallelDatabase,
{
Self {
jars: self.jars.clone(),
jars_wait_group: self.jars_wait_group.clone(),
runtime: self.runtime.snapshot(),
}
}
pub(crate) fn jars(&self) -> QueryResult<&Db::Jars> {
self.runtime.cancelled()?;
Ok(&self.jars)
}
/// Returns a mutable reference to the jars without cloning their content.
///
/// The method cancels any pending queries of other works and waits for them to complete so that
/// this instance is the only instance holding a reference to the jars.
pub(crate) fn jars_mut(&mut self) -> &mut Db::Jars {
// We have a mutable ref here, so no more workers can be spawned between calling this function and taking the mut ref below.
self.cancel_other_workers();
// Now all other references to `self.jars` should have been released. We can now safely return a mutable reference
// to the Arc's content.
let jars =
Arc::get_mut(&mut self.jars).expect("All references to jars should have been released");
jars
}
pub(crate) fn runtime(&self) -> &DbRuntime {
&self.runtime
}
pub(crate) fn runtime_mut(&mut self) -> &mut DbRuntime {
// Note: This method may need to use a similar trick to `jars_mut` if `DbRuntime` is ever to store data that is shared between workers.
&mut self.runtime
}
#[tracing::instrument(level = "trace", skip(self))]
fn cancel_other_workers(&mut self) {
self.runtime.cancel_other_workers();
// Wait for all other works to complete.
let existing_wait = std::mem::take(&mut self.jars_wait_group);
existing_wait.wait();
}
}
impl<Db> Default for JarsStorage<Db>
where
Db: HasJars,
{
fn default() -> Self {
Self::new()
}
}
impl<T> std::fmt::Debug for JarsStorage<T>
where
T: HasJars,
<T as HasJars>::Jars: std::fmt::Debug,
{
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
f.debug_struct("SharedStorage")
.field("jars", &self.jars)
.field("jars_wait_group", &self.jars_wait_group)
.field("runtime", &self.runtime)
.finish()
}
}

View File

@@ -1,180 +0,0 @@
use std::fmt::{Debug, Formatter};
use std::hash::{Hash, Hasher};
use std::path::Path;
use std::sync::Arc;
use hashbrown::hash_map::RawEntryMut;
use parking_lot::RwLock;
use rustc_hash::FxHasher;
use ruff_index::{newtype_index, IndexVec};
type Map<K, V> = hashbrown::HashMap<K, V, ()>;
#[newtype_index]
pub struct FileId;
// TODO we'll need a higher level virtual file system abstraction that allows testing if a file exists
// or retrieving its content (ideally lazily and in a way that the memory can be retained later)
// I suspect that we'll end up with a FileSystem trait and our own Path abstraction.
#[derive(Default)]
pub struct Files {
inner: Arc<RwLock<FilesInner>>,
}
impl Files {
#[tracing::instrument(level = "debug", skip(self))]
pub fn intern(&self, path: &Path) -> FileId {
self.inner.write().intern(path)
}
pub fn try_get(&self, path: &Path) -> Option<FileId> {
self.inner.read().try_get(path)
}
#[tracing::instrument(level = "debug", skip(self))]
pub fn path(&self, id: FileId) -> Arc<Path> {
self.inner.read().path(id)
}
/// Snapshots files for a new database snapshot.
///
/// This method should not be used outside a database snapshot.
#[must_use]
pub fn snapshot(&self) -> Files {
Files {
inner: self.inner.clone(),
}
}
}
impl Debug for Files {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
let files = self.inner.read();
let mut debug = f.debug_map();
for item in files.iter() {
debug.entry(&item.0, &item.1);
}
debug.finish()
}
}
impl PartialEq for Files {
fn eq(&self, other: &Self) -> bool {
self.inner.read().eq(&other.inner.read())
}
}
impl Eq for Files {}
#[derive(Default)]
struct FilesInner {
by_path: Map<FileId, ()>,
// TODO should we use a map here to reclaim the space for removed files?
// TODO I think we should use our own path abstraction here to avoid having to normalize paths
// and dealing with non-utf paths everywhere.
by_id: IndexVec<FileId, Arc<Path>>,
}
impl FilesInner {
/// Inserts the path and returns a new id for it or returns the id if it is an existing path.
// TODO should this accept Path or PathBuf?
pub(crate) fn intern(&mut self, path: &Path) -> FileId {
let hash = FilesInner::hash_path(path);
let entry = self
.by_path
.raw_entry_mut()
.from_hash(hash, |existing_file| &*self.by_id[*existing_file] == path);
match entry {
RawEntryMut::Occupied(entry) => *entry.key(),
RawEntryMut::Vacant(entry) => {
let id = self.by_id.push(Arc::from(path));
entry.insert_with_hasher(hash, id, (), |file| {
FilesInner::hash_path(&self.by_id[*file])
});
id
}
}
}
fn hash_path(path: &Path) -> u64 {
let mut hasher = FxHasher::default();
path.hash(&mut hasher);
hasher.finish()
}
pub(crate) fn try_get(&self, path: &Path) -> Option<FileId> {
let mut hasher = FxHasher::default();
path.hash(&mut hasher);
let hash = hasher.finish();
Some(
*self
.by_path
.raw_entry()
.from_hash(hash, |existing_file| &*self.by_id[*existing_file] == path)?
.0,
)
}
/// Returns the path for the file with the given id.
pub(crate) fn path(&self, id: FileId) -> Arc<Path> {
self.by_id[id].clone()
}
pub(crate) fn iter(&self) -> impl Iterator<Item = (FileId, Arc<Path>)> + '_ {
self.by_path.keys().map(|id| (*id, self.by_id[*id].clone()))
}
}
impl PartialEq for FilesInner {
fn eq(&self, other: &Self) -> bool {
self.by_id == other.by_id
}
}
impl Eq for FilesInner {}
#[cfg(test)]
mod tests {
use super::*;
use std::path::PathBuf;
#[test]
fn insert_path_twice_same_id() {
let files = Files::default();
let path = PathBuf::from("foo/bar");
let id1 = files.intern(&path);
let id2 = files.intern(&path);
assert_eq!(id1, id2);
}
#[test]
fn insert_different_paths_different_ids() {
let files = Files::default();
let path1 = PathBuf::from("foo/bar");
let path2 = PathBuf::from("foo/bar/baz");
let id1 = files.intern(&path1);
let id2 = files.intern(&path2);
assert_ne!(id1, id2);
}
#[test]
fn four_files() {
let files = Files::default();
let foo_path = PathBuf::from("foo");
let foo_id = files.intern(&foo_path);
let bar_path = PathBuf::from("bar");
files.intern(&bar_path);
let baz_path = PathBuf::from("baz");
files.intern(&baz_path);
let qux_path = PathBuf::from("qux");
files.intern(&qux_path);
let foo_id_2 = files.try_get(&foo_path).expect("foo_path to be found");
assert_eq!(foo_id_2, foo_id);
}
}

View File

@@ -1,67 +0,0 @@
//! Key observations
//!
//! The HIR (High-Level Intermediate Representation) avoids allocations to large extends by:
//! * Using an arena per node type
//! * using ids and id ranges to reference items.
//!
//! Using separate arena per node type has the advantage that the IDs are relatively stable, because
//! they only change when a node of the same kind has been added or removed. (What's unclear is if that matters or if
//! it still triggers a re-compute because the AST-id in the node has changed).
//!
//! The HIR does not store all details. It mainly stores the *public* interface. There's a reference
//! back to the AST node to get more details.
//!
//!
use crate::ast_ids::{HasAstId, TypedAstId};
use crate::files::FileId;
use std::fmt::Formatter;
use std::hash::{Hash, Hasher};
pub struct HirAstId<N: HasAstId> {
file_id: FileId,
node_id: TypedAstId<N>,
}
impl<N: HasAstId> Copy for HirAstId<N> {}
impl<N: HasAstId> Clone for HirAstId<N> {
fn clone(&self) -> Self {
*self
}
}
impl<N: HasAstId> PartialEq for HirAstId<N> {
fn eq(&self, other: &Self) -> bool {
self.file_id == other.file_id && self.node_id == other.node_id
}
}
impl<N: HasAstId> Eq for HirAstId<N> {}
impl<N: HasAstId> std::fmt::Debug for HirAstId<N> {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
f.debug_struct("HirAstId")
.field("file_id", &self.file_id)
.field("node_id", &self.node_id)
.finish()
}
}
impl<N: HasAstId> Hash for HirAstId<N> {
fn hash<H: Hasher>(&self, state: &mut H) {
self.file_id.hash(state);
self.node_id.hash(state);
}
}
impl<N: HasAstId> HirAstId<N> {
pub fn upcast<M: HasAstId>(self) -> HirAstId<M>
where
N: Into<M>,
{
HirAstId {
file_id: self.file_id,
node_id: self.node_id.upcast(),
}
}
}

View File

@@ -1,556 +0,0 @@
use std::ops::{Index, Range};
use ruff_index::{newtype_index, IndexVec};
use ruff_python_ast::visitor::preorder;
use ruff_python_ast::visitor::preorder::PreorderVisitor;
use ruff_python_ast::{
Decorator, ExceptHandler, ExceptHandlerExceptHandler, Expr, MatchCase, ModModule, Stmt,
StmtAnnAssign, StmtAssign, StmtClassDef, StmtFunctionDef, StmtGlobal, StmtImport,
StmtImportFrom, StmtNonlocal, StmtTypeAlias, TypeParam, TypeParamParamSpec, TypeParamTypeVar,
TypeParamTypeVarTuple, WithItem,
};
use crate::ast_ids::{AstIds, HasAstId};
use crate::files::FileId;
use crate::hir::HirAstId;
use crate::Name;
#[newtype_index]
pub struct FunctionId;
#[derive(Debug, Clone, Eq, PartialEq)]
pub struct Function {
ast_id: HirAstId<StmtFunctionDef>,
name: Name,
parameters: Range<ParameterId>,
type_parameters: Range<TypeParameterId>, // TODO: type_parameters, return expression, decorators
}
#[newtype_index]
pub struct ParameterId;
#[derive(Debug, Clone, Eq, PartialEq)]
pub struct Parameter {
kind: ParameterKind,
name: Name,
default: Option<()>, // TODO use expression HIR
ast_id: HirAstId<ruff_python_ast::Parameter>,
}
// TODO or should `Parameter` be an enum?
#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)]
pub enum ParameterKind {
PositionalOnly,
Arguments,
Vararg,
KeywordOnly,
Kwarg,
}
#[newtype_index]
pub struct ClassId;
#[derive(Debug, Clone, Eq, PartialEq)]
pub struct Class {
name: Name,
ast_id: HirAstId<StmtClassDef>,
// TODO type parameters, inheritance, decorators, members
}
#[newtype_index]
pub struct AssignmentId;
// This can have more than one name...
// but that means we can't implement `name()` on `ModuleItem`.
#[derive(Debug, Clone, Eq, PartialEq)]
pub struct Assignment {
// TODO: Handle multiple names / targets
name: Name,
ast_id: HirAstId<StmtAssign>,
}
#[derive(Debug, Clone, Eq, PartialEq)]
pub struct AnnotatedAssignment {
name: Name,
ast_id: HirAstId<StmtAnnAssign>,
}
#[newtype_index]
pub struct AnnotatedAssignmentId;
#[newtype_index]
pub struct TypeAliasId;
#[derive(Debug, Clone, Eq, PartialEq)]
pub struct TypeAlias {
name: Name,
ast_id: HirAstId<StmtTypeAlias>,
parameters: Range<TypeParameterId>,
}
#[newtype_index]
pub struct TypeParameterId;
#[derive(Debug, Clone, Eq, PartialEq)]
pub enum TypeParameter {
TypeVar(TypeParameterTypeVar),
ParamSpec(TypeParameterParamSpec),
TypeVarTuple(TypeParameterTypeVarTuple),
}
impl TypeParameter {
pub fn ast_id(&self) -> HirAstId<TypeParam> {
match self {
TypeParameter::TypeVar(type_var) => type_var.ast_id.upcast(),
TypeParameter::ParamSpec(param_spec) => param_spec.ast_id.upcast(),
TypeParameter::TypeVarTuple(type_var_tuple) => type_var_tuple.ast_id.upcast(),
}
}
}
#[derive(Debug, Clone, Eq, PartialEq)]
pub struct TypeParameterTypeVar {
name: Name,
ast_id: HirAstId<TypeParamTypeVar>,
}
#[derive(Debug, Clone, Eq, PartialEq)]
pub struct TypeParameterParamSpec {
name: Name,
ast_id: HirAstId<TypeParamParamSpec>,
}
#[derive(Debug, Clone, Eq, PartialEq)]
pub struct TypeParameterTypeVarTuple {
name: Name,
ast_id: HirAstId<TypeParamTypeVarTuple>,
}
#[newtype_index]
pub struct GlobalId;
#[derive(Debug, Clone, Eq, PartialEq)]
pub struct Global {
// TODO track names
ast_id: HirAstId<StmtGlobal>,
}
#[newtype_index]
pub struct NonLocalId;
#[derive(Debug, Clone, Eq, PartialEq)]
pub struct NonLocal {
// TODO track names
ast_id: HirAstId<StmtNonlocal>,
}
pub enum DefinitionId {
Function(FunctionId),
Parameter(ParameterId),
Class(ClassId),
Assignment(AssignmentId),
AnnotatedAssignment(AnnotatedAssignmentId),
Global(GlobalId),
NonLocal(NonLocalId),
TypeParameter(TypeParameterId),
TypeAlias(TypeAlias),
}
pub enum DefinitionItem {
Function(Function),
Parameter(Parameter),
Class(Class),
Assignment(Assignment),
AnnotatedAssignment(AnnotatedAssignment),
Global(Global),
NonLocal(NonLocal),
TypeParameter(TypeParameter),
TypeAlias(TypeAlias),
}
// The closest is rust-analyzers item-tree. It only represents "Items" which make the public interface of a module
// (it excludes any other statement or expressions). rust-analyzer uses it as the main input to the name resolution
// algorithm
// > It is the input to the name resolution algorithm, as well as to the queries defined in `adt.rs`,
// > `data.rs`, and most things in `attr.rs`.
//
// > One important purpose of this layer is to provide an "invalidation barrier" for incremental
// > computations: when typing inside an item body, the `ItemTree` of the modified file is typically
// > unaffected, so we don't have to recompute name resolution results or item data (see `data.rs`).
//
// I haven't fully figured this out but I think that this composes the "public" interface of a module?
// But maybe that's too optimistic.
//
//
#[derive(Debug, Clone, Default, Eq, PartialEq)]
pub struct Definitions {
functions: IndexVec<FunctionId, Function>,
parameters: IndexVec<ParameterId, Parameter>,
classes: IndexVec<ClassId, Class>,
assignments: IndexVec<AssignmentId, Assignment>,
annotated_assignments: IndexVec<AnnotatedAssignmentId, AnnotatedAssignment>,
type_aliases: IndexVec<TypeAliasId, TypeAlias>,
type_parameters: IndexVec<TypeParameterId, TypeParameter>,
globals: IndexVec<GlobalId, Global>,
non_locals: IndexVec<NonLocalId, NonLocal>,
}
impl Definitions {
pub fn from_module(module: &ModModule, ast_ids: &AstIds, file_id: FileId) -> Self {
let mut visitor = DefinitionsVisitor {
definitions: Definitions::default(),
ast_ids,
file_id,
};
visitor.visit_body(&module.body);
visitor.definitions
}
}
impl Index<FunctionId> for Definitions {
type Output = Function;
fn index(&self, index: FunctionId) -> &Self::Output {
&self.functions[index]
}
}
impl Index<ParameterId> for Definitions {
type Output = Parameter;
fn index(&self, index: ParameterId) -> &Self::Output {
&self.parameters[index]
}
}
impl Index<ClassId> for Definitions {
type Output = Class;
fn index(&self, index: ClassId) -> &Self::Output {
&self.classes[index]
}
}
impl Index<AssignmentId> for Definitions {
type Output = Assignment;
fn index(&self, index: AssignmentId) -> &Self::Output {
&self.assignments[index]
}
}
impl Index<AnnotatedAssignmentId> for Definitions {
type Output = AnnotatedAssignment;
fn index(&self, index: AnnotatedAssignmentId) -> &Self::Output {
&self.annotated_assignments[index]
}
}
impl Index<TypeAliasId> for Definitions {
type Output = TypeAlias;
fn index(&self, index: TypeAliasId) -> &Self::Output {
&self.type_aliases[index]
}
}
impl Index<GlobalId> for Definitions {
type Output = Global;
fn index(&self, index: GlobalId) -> &Self::Output {
&self.globals[index]
}
}
impl Index<NonLocalId> for Definitions {
type Output = NonLocal;
fn index(&self, index: NonLocalId) -> &Self::Output {
&self.non_locals[index]
}
}
impl Index<TypeParameterId> for Definitions {
type Output = TypeParameter;
fn index(&self, index: TypeParameterId) -> &Self::Output {
&self.type_parameters[index]
}
}
struct DefinitionsVisitor<'a> {
definitions: Definitions,
ast_ids: &'a AstIds,
file_id: FileId,
}
impl DefinitionsVisitor<'_> {
fn ast_id<N: HasAstId>(&self, node: &N) -> HirAstId<N> {
HirAstId {
file_id: self.file_id,
node_id: self.ast_ids.ast_id(node),
}
}
fn lower_function_def(&mut self, function: &StmtFunctionDef) -> FunctionId {
let name = Name::new(&function.name);
let first_type_parameter_id = self.definitions.type_parameters.next_index();
let mut last_type_parameter_id = first_type_parameter_id;
if let Some(type_params) = &function.type_params {
for parameter in &type_params.type_params {
let id = self.lower_type_parameter(parameter);
last_type_parameter_id = id;
}
}
let parameters = self.lower_parameters(&function.parameters);
self.definitions.functions.push(Function {
name,
ast_id: self.ast_id(function),
parameters,
type_parameters: first_type_parameter_id..last_type_parameter_id,
})
}
fn lower_parameters(&mut self, parameters: &ruff_python_ast::Parameters) -> Range<ParameterId> {
let first_parameter_id = self.definitions.parameters.next_index();
let mut last_parameter_id = first_parameter_id;
for parameter in &parameters.posonlyargs {
last_parameter_id = self.definitions.parameters.push(Parameter {
kind: ParameterKind::PositionalOnly,
name: Name::new(&parameter.parameter.name),
default: None,
ast_id: self.ast_id(&parameter.parameter),
});
}
if let Some(vararg) = &parameters.vararg {
last_parameter_id = self.definitions.parameters.push(Parameter {
kind: ParameterKind::Vararg,
name: Name::new(&vararg.name),
default: None,
ast_id: self.ast_id(vararg),
});
}
for parameter in &parameters.kwonlyargs {
last_parameter_id = self.definitions.parameters.push(Parameter {
kind: ParameterKind::KeywordOnly,
name: Name::new(&parameter.parameter.name),
default: None,
ast_id: self.ast_id(&parameter.parameter),
});
}
if let Some(kwarg) = &parameters.kwarg {
last_parameter_id = self.definitions.parameters.push(Parameter {
kind: ParameterKind::KeywordOnly,
name: Name::new(&kwarg.name),
default: None,
ast_id: self.ast_id(kwarg),
});
}
first_parameter_id..last_parameter_id
}
fn lower_class_def(&mut self, class: &StmtClassDef) -> ClassId {
let name = Name::new(&class.name);
self.definitions.classes.push(Class {
name,
ast_id: self.ast_id(class),
})
}
fn lower_assignment(&mut self, assignment: &StmtAssign) {
// FIXME handle multiple names
if let Some(Expr::Name(name)) = assignment.targets.first() {
self.definitions.assignments.push(Assignment {
name: Name::new(&name.id),
ast_id: self.ast_id(assignment),
});
}
}
fn lower_annotated_assignment(&mut self, annotated_assignment: &StmtAnnAssign) {
if let Expr::Name(name) = &*annotated_assignment.target {
self.definitions
.annotated_assignments
.push(AnnotatedAssignment {
name: Name::new(&name.id),
ast_id: self.ast_id(annotated_assignment),
});
}
}
fn lower_type_alias(&mut self, type_alias: &StmtTypeAlias) {
if let Expr::Name(name) = &*type_alias.name {
let name = Name::new(&name.id);
let lower_parameters_id = self.definitions.type_parameters.next_index();
let mut last_parameter_id = lower_parameters_id;
if let Some(type_params) = &type_alias.type_params {
for type_parameter in &type_params.type_params {
let id = self.lower_type_parameter(type_parameter);
last_parameter_id = id;
}
}
self.definitions.type_aliases.push(TypeAlias {
name,
ast_id: self.ast_id(type_alias),
parameters: lower_parameters_id..last_parameter_id,
});
}
}
fn lower_type_parameter(&mut self, type_parameter: &TypeParam) -> TypeParameterId {
match type_parameter {
TypeParam::TypeVar(type_var) => {
self.definitions
.type_parameters
.push(TypeParameter::TypeVar(TypeParameterTypeVar {
name: Name::new(&type_var.name),
ast_id: self.ast_id(type_var),
}))
}
TypeParam::ParamSpec(param_spec) => {
self.definitions
.type_parameters
.push(TypeParameter::ParamSpec(TypeParameterParamSpec {
name: Name::new(&param_spec.name),
ast_id: self.ast_id(param_spec),
}))
}
TypeParam::TypeVarTuple(type_var_tuple) => {
self.definitions
.type_parameters
.push(TypeParameter::TypeVarTuple(TypeParameterTypeVarTuple {
name: Name::new(&type_var_tuple.name),
ast_id: self.ast_id(type_var_tuple),
}))
}
}
}
fn lower_import(&mut self, _import: &StmtImport) {
// TODO
}
fn lower_import_from(&mut self, _import_from: &StmtImportFrom) {
// TODO
}
fn lower_global(&mut self, global: &StmtGlobal) -> GlobalId {
self.definitions.globals.push(Global {
ast_id: self.ast_id(global),
})
}
fn lower_non_local(&mut self, non_local: &StmtNonlocal) -> NonLocalId {
self.definitions.non_locals.push(NonLocal {
ast_id: self.ast_id(non_local),
})
}
fn lower_except_handler(&mut self, _except_handler: &ExceptHandlerExceptHandler) {
// TODO
}
fn lower_with_item(&mut self, _with_item: &WithItem) {
// TODO
}
fn lower_match_case(&mut self, _match_case: &MatchCase) {
// TODO
}
}
impl PreorderVisitor<'_> for DefinitionsVisitor<'_> {
fn visit_stmt(&mut self, stmt: &Stmt) {
match stmt {
// Definition statements
Stmt::FunctionDef(definition) => {
self.lower_function_def(definition);
self.visit_body(&definition.body);
}
Stmt::ClassDef(definition) => {
self.lower_class_def(definition);
self.visit_body(&definition.body);
}
Stmt::Assign(assignment) => {
self.lower_assignment(assignment);
}
Stmt::AnnAssign(annotated_assignment) => {
self.lower_annotated_assignment(annotated_assignment);
}
Stmt::TypeAlias(type_alias) => {
self.lower_type_alias(type_alias);
}
Stmt::Import(import) => self.lower_import(import),
Stmt::ImportFrom(import_from) => self.lower_import_from(import_from),
Stmt::Global(global) => {
self.lower_global(global);
}
Stmt::Nonlocal(non_local) => {
self.lower_non_local(non_local);
}
// Visit the compound statement bodies because they can contain other definitions.
Stmt::For(_)
| Stmt::While(_)
| Stmt::If(_)
| Stmt::With(_)
| Stmt::Match(_)
| Stmt::Try(_) => {
preorder::walk_stmt(self, stmt);
}
// Skip over simple statements because they can't contain any other definitions.
Stmt::Return(_)
| Stmt::Delete(_)
| Stmt::AugAssign(_)
| Stmt::Raise(_)
| Stmt::Assert(_)
| Stmt::Expr(_)
| Stmt::Pass(_)
| Stmt::Break(_)
| Stmt::Continue(_)
| Stmt::IpyEscapeCommand(_) => {
// No op
}
}
}
fn visit_expr(&mut self, _: &'_ Expr) {}
fn visit_decorator(&mut self, _decorator: &'_ Decorator) {}
fn visit_except_handler(&mut self, except_handler: &'_ ExceptHandler) {
match except_handler {
ExceptHandler::ExceptHandler(except_handler) => {
self.lower_except_handler(except_handler);
}
}
}
fn visit_with_item(&mut self, with_item: &'_ WithItem) {
self.lower_with_item(with_item);
}
fn visit_match_case(&mut self, match_case: &'_ MatchCase) {
self.lower_match_case(match_case);
self.visit_body(&match_case.body);
}
}

View File

@@ -1,109 +0,0 @@
use std::fmt::Formatter;
use std::hash::BuildHasherDefault;
use std::ops::Deref;
use std::path::{Path, PathBuf};
use rustc_hash::{FxHashSet, FxHasher};
use crate::files::FileId;
pub mod ast_ids;
pub mod cache;
pub mod cancellation;
pub mod db;
pub mod files;
pub mod hir;
pub mod lint;
pub mod module;
mod parse;
pub mod program;
mod semantic;
pub mod source;
pub mod typeshed_versions;
pub mod watch;
pub(crate) type FxDashMap<K, V> = dashmap::DashMap<K, V, BuildHasherDefault<FxHasher>>;
#[allow(unused)]
pub(crate) type FxDashSet<V> = dashmap::DashSet<V, BuildHasherDefault<FxHasher>>;
pub(crate) type FxIndexSet<V> = indexmap::set::IndexSet<V, BuildHasherDefault<FxHasher>>;
#[derive(Debug, Clone)]
pub struct Workspace {
/// TODO this should be a resolved path. We should probably use a newtype wrapper that guarantees that
/// PATH is a UTF-8 path and is normalized.
root: PathBuf,
/// The files that are open in the workspace.
///
/// * Editor: The files that are actively being edited in the editor (the user has a tab open with the file).
/// * CLI: The resolved files passed as arguments to the CLI.
open_files: FxHashSet<FileId>,
}
impl Workspace {
pub fn new(root: PathBuf) -> Self {
Self {
root,
open_files: FxHashSet::default(),
}
}
pub fn root(&self) -> &Path {
self.root.as_path()
}
// TODO having the content in workspace feels wrong.
pub fn open_file(&mut self, file_id: FileId) {
self.open_files.insert(file_id);
}
pub fn close_file(&mut self, file_id: FileId) {
self.open_files.remove(&file_id);
}
// TODO introduce an `OpenFile` type instead of using an anonymous tuple.
pub fn open_files(&self) -> impl Iterator<Item = FileId> + '_ {
self.open_files.iter().copied()
}
pub fn is_file_open(&self, file_id: FileId) -> bool {
self.open_files.contains(&file_id)
}
}
#[derive(Debug, Clone, Eq, PartialEq, Hash)]
pub struct Name(smol_str::SmolStr);
impl Name {
#[inline]
pub fn new(name: &str) -> Self {
Self(smol_str::SmolStr::new(name))
}
pub fn as_str(&self) -> &str {
self.0.as_str()
}
}
impl Deref for Name {
type Target = str;
#[inline]
fn deref(&self) -> &Self::Target {
self.as_str()
}
}
impl<T> From<T> for Name
where
T: Into<smol_str::SmolStr>,
{
fn from(value: T) -> Self {
Self(value.into())
}
}
impl std::fmt::Display for Name {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
f.write_str(self.as_str())
}
}

View File

@@ -1,332 +0,0 @@
use std::cell::RefCell;
use std::ops::{Deref, DerefMut};
use std::sync::Arc;
use std::time::Duration;
use ruff_python_ast::visitor::Visitor;
use ruff_python_ast::{ModModule, StringLiteral};
use ruff_python_parser::Parsed;
use crate::cache::KeyValueCache;
use crate::db::{LintDb, LintJar, QueryResult};
use crate::files::FileId;
use crate::module::{resolve_module, ModuleName};
use crate::parse::parse;
use crate::semantic::{infer_definition_type, infer_symbol_public_type, Type};
use crate::semantic::{
resolve_global_symbol, semantic_index, Definition, GlobalSymbolId, SemanticIndex, SymbolId,
};
use crate::source::{source_text, Source};
#[tracing::instrument(level = "debug", skip(db))]
pub(crate) fn lint_syntax(db: &dyn LintDb, file_id: FileId) -> QueryResult<Diagnostics> {
let lint_jar: &LintJar = db.jar()?;
let storage = &lint_jar.lint_syntax;
#[allow(clippy::print_stdout)]
if std::env::var("RED_KNOT_SLOW_LINT").is_ok() {
for i in 0..10 {
db.cancelled()?;
println!("RED_KNOT_SLOW_LINT is set, sleeping for {i}/10 seconds");
std::thread::sleep(Duration::from_secs(1));
}
}
storage.get(&file_id, |file_id| {
let mut diagnostics = Vec::new();
let source = source_text(db.upcast(), *file_id)?;
lint_lines(source.text(), &mut diagnostics);
let parsed = parse(db.upcast(), *file_id)?;
if parsed.errors().is_empty() {
let ast = parsed.syntax();
let mut visitor = SyntaxLintVisitor {
diagnostics,
source: source.text(),
};
visitor.visit_body(&ast.body);
diagnostics = visitor.diagnostics;
} else {
diagnostics.extend(parsed.errors().iter().map(std::string::ToString::to_string));
}
Ok(Diagnostics::from(diagnostics))
})
}
fn lint_lines(source: &str, diagnostics: &mut Vec<String>) {
for (line_number, line) in source.lines().enumerate() {
if line.len() < 88 {
continue;
}
let char_count = line.chars().count();
if char_count > 88 {
diagnostics.push(format!(
"Line {} is too long ({} characters)",
line_number + 1,
char_count
));
}
}
}
#[tracing::instrument(level = "debug", skip(db))]
pub(crate) fn lint_semantic(db: &dyn LintDb, file_id: FileId) -> QueryResult<Diagnostics> {
let lint_jar: &LintJar = db.jar()?;
let storage = &lint_jar.lint_semantic;
storage.get(&file_id, |file_id| {
let source = source_text(db.upcast(), *file_id)?;
let parsed = parse(db.upcast(), *file_id)?;
let semantic_index = semantic_index(db.upcast(), *file_id)?;
let context = SemanticLintContext {
file_id: *file_id,
source,
parsed: &parsed,
semantic_index,
db,
diagnostics: RefCell::new(Vec::new()),
};
lint_unresolved_imports(&context)?;
lint_bad_overrides(&context)?;
Ok(Diagnostics::from(context.diagnostics.take()))
})
}
fn lint_unresolved_imports(context: &SemanticLintContext) -> QueryResult<()> {
// TODO: Consider iterating over the dependencies (imports) only instead of all definitions.
for (symbol, definition) in context.semantic_index().symbol_table().all_definitions() {
match definition {
Definition::Import(import) => {
let ty = context.infer_symbol_public_type(symbol)?;
if ty.is_unknown() {
context.push_diagnostic(format!("Unresolved module {}", import.module));
}
}
Definition::ImportFrom(import) => {
let ty = context.infer_symbol_public_type(symbol)?;
if ty.is_unknown() {
let module_name = import.module().map(Deref::deref).unwrap_or_default();
let message = if import.level() > 0 {
format!(
"Unresolved relative import '{}' from {}{}",
import.name(),
".".repeat(import.level() as usize),
module_name
)
} else {
format!(
"Unresolved import '{}' from '{}'",
import.name(),
module_name
)
};
context.push_diagnostic(message);
}
}
_ => {}
}
}
Ok(())
}
fn lint_bad_overrides(context: &SemanticLintContext) -> QueryResult<()> {
// TODO we should have a special marker on the real typing module (from typeshed) so if you
// have your own "typing" module in your project, we don't consider it THE typing module (and
// same for other stdlib modules that our lint rules care about)
let Some(typing_override) = context.resolve_global_symbol("typing", "override")? else {
// TODO once we bundle typeshed, this should be unreachable!()
return Ok(());
};
// TODO we should maybe index definitions by type instead of iterating all, or else iterate all
// just once, match, and branch to all lint rules that care about a type of definition
for (symbol, definition) in context.semantic_index().symbol_table().all_definitions() {
if !matches!(definition, Definition::FunctionDef(_)) {
continue;
}
let ty = infer_definition_type(
context.db.upcast(),
GlobalSymbolId {
file_id: context.file_id,
symbol_id: symbol,
},
definition.clone(),
)?;
let Type::Function(func) = ty else {
unreachable!("type of a FunctionDef should always be a Function");
};
let Some(class) = func.get_containing_class(context.db.upcast())? else {
// not a method of a class
continue;
};
if func.has_decorator(context.db.upcast(), typing_override)? {
let method_name = func.name(context.db.upcast())?;
if class
.get_super_class_member(context.db.upcast(), &method_name)?
.is_none()
{
// TODO should have a qualname() method to support nested classes
context.push_diagnostic(
format!(
"Method {}.{} is decorated with `typing.override` but does not override any base class method",
class.name(context.db.upcast())?,
method_name,
));
}
}
}
Ok(())
}
pub struct SemanticLintContext<'a> {
file_id: FileId,
source: Source,
parsed: &'a Parsed<ModModule>,
semantic_index: Arc<SemanticIndex>,
db: &'a dyn LintDb,
diagnostics: RefCell<Vec<String>>,
}
impl<'a> SemanticLintContext<'a> {
pub fn source_text(&self) -> &str {
self.source.text()
}
pub fn file_id(&self) -> FileId {
self.file_id
}
pub fn ast(&self) -> &'a ModModule {
self.parsed.syntax()
}
pub fn semantic_index(&self) -> &SemanticIndex {
&self.semantic_index
}
pub fn infer_symbol_public_type(&self, symbol_id: SymbolId) -> QueryResult<Type> {
infer_symbol_public_type(
self.db.upcast(),
GlobalSymbolId {
file_id: self.file_id,
symbol_id,
},
)
}
pub fn push_diagnostic(&self, diagnostic: String) {
self.diagnostics.borrow_mut().push(diagnostic);
}
pub fn extend_diagnostics(&mut self, diagnostics: impl IntoIterator<Item = String>) {
self.diagnostics.get_mut().extend(diagnostics);
}
pub fn resolve_global_symbol(
&self,
module: &str,
symbol_name: &str,
) -> QueryResult<Option<GlobalSymbolId>> {
let Some(module) = resolve_module(self.db.upcast(), ModuleName::new(module))? else {
return Ok(None);
};
resolve_global_symbol(self.db.upcast(), module, symbol_name)
}
}
#[derive(Debug)]
struct SyntaxLintVisitor<'a> {
diagnostics: Vec<String>,
source: &'a str,
}
impl Visitor<'_> for SyntaxLintVisitor<'_> {
fn visit_string_literal(&mut self, string_literal: &'_ StringLiteral) {
// A very naive implementation of use double quotes
let text = &self.source[string_literal.range];
if text.starts_with('\'') {
self.diagnostics
.push("Use double quotes for strings".to_string());
}
}
}
#[derive(Debug, Clone)]
pub enum Diagnostics {
Empty,
List(Arc<Vec<String>>),
}
impl Diagnostics {
pub fn as_slice(&self) -> &[String] {
match self {
Diagnostics::Empty => &[],
Diagnostics::List(list) => list.as_slice(),
}
}
}
impl Deref for Diagnostics {
type Target = [String];
fn deref(&self) -> &Self::Target {
self.as_slice()
}
}
impl From<Vec<String>> for Diagnostics {
fn from(value: Vec<String>) -> Self {
if value.is_empty() {
Diagnostics::Empty
} else {
Diagnostics::List(Arc::new(value))
}
}
}
#[derive(Default, Debug)]
pub struct LintSyntaxStorage(KeyValueCache<FileId, Diagnostics>);
impl Deref for LintSyntaxStorage {
type Target = KeyValueCache<FileId, Diagnostics>;
fn deref(&self) -> &Self::Target {
&self.0
}
}
impl DerefMut for LintSyntaxStorage {
fn deref_mut(&mut self) -> &mut Self::Target {
&mut self.0
}
}
#[derive(Default, Debug)]
pub struct LintSemanticStorage(KeyValueCache<FileId, Diagnostics>);
impl Deref for LintSemanticStorage {
type Target = KeyValueCache<FileId, Diagnostics>;
fn deref(&self) -> &Self::Target {
&self.0
}
}
impl DerefMut for LintSemanticStorage {
fn deref_mut(&mut self) -> &mut Self::Target {
&mut self.0
}
}

View File

@@ -0,0 +1,254 @@
//! Sets up logging for Red Knot
use anyhow::Context;
use colored::Colorize;
use std::fmt;
use std::fs::File;
use std::io::BufWriter;
use tracing::log::LevelFilter;
use tracing::{Event, Subscriber};
use tracing_subscriber::fmt::format::Writer;
use tracing_subscriber::fmt::{FmtContext, FormatEvent, FormatFields};
use tracing_subscriber::registry::LookupSpan;
use tracing_subscriber::EnvFilter;
/// Logging flags to `#[command(flatten)]` into your CLI
#[derive(clap::Args, Debug, Clone, Default)]
#[command(about = None, long_about = None)]
pub(crate) struct Verbosity {
#[arg(
long,
short = 'v',
help = "Use verbose output (or `-vv` and `-vvv` for more verbose output)",
action = clap::ArgAction::Count,
global = true,
)]
verbose: u8,
}
impl Verbosity {
/// Returns the verbosity level based on the number of `-v` flags.
///
/// Returns `None` if the user did not specify any verbosity flags.
pub(crate) fn level(&self) -> VerbosityLevel {
match self.verbose {
0 => VerbosityLevel::Default,
1 => VerbosityLevel::Verbose,
2 => VerbosityLevel::ExtraVerbose,
_ => VerbosityLevel::Trace,
}
}
}
#[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd)]
pub(crate) enum VerbosityLevel {
/// Default output level. Only shows Ruff and Red Knot events up to the [`WARN`](tracing::Level::WARN).
Default,
/// Enables verbose output. Emits Ruff and Red Knot events up to the [`INFO`](tracing::Level::INFO).
/// Corresponds to `-v`.
Verbose,
/// Enables a more verbose tracing format and emits Ruff and Red Knot events up to [`DEBUG`](tracing::Level::DEBUG).
/// Corresponds to `-vv`
ExtraVerbose,
/// Enables all tracing events and uses a tree-like output format. Corresponds to `-vvv`.
Trace,
}
impl VerbosityLevel {
const fn level_filter(self) -> LevelFilter {
match self {
VerbosityLevel::Default => LevelFilter::Warn,
VerbosityLevel::Verbose => LevelFilter::Info,
VerbosityLevel::ExtraVerbose => LevelFilter::Debug,
VerbosityLevel::Trace => LevelFilter::Trace,
}
}
pub(crate) const fn is_trace(self) -> bool {
matches!(self, VerbosityLevel::Trace)
}
pub(crate) const fn is_extra_verbose(self) -> bool {
matches!(self, VerbosityLevel::ExtraVerbose)
}
}
pub(crate) fn setup_tracing(level: VerbosityLevel) -> anyhow::Result<TracingGuard> {
use tracing_subscriber::prelude::*;
// The `RED_KNOT_LOG` environment variable overrides the default log level.
let filter = if let Ok(log_env_variable) = std::env::var("RED_KNOT_LOG") {
EnvFilter::builder()
.parse(log_env_variable)
.context("Failed to parse directives specified in RED_KNOT_LOG environment variable.")?
} else {
match level {
VerbosityLevel::Default => {
// Show warning traces
EnvFilter::default().add_directive(tracing::level_filters::LevelFilter::WARN.into())
}
level => {
let level_filter = level.level_filter();
// Show info|debug|trace events, but allow `RED_KNOT_LOG` to override
let filter = EnvFilter::default().add_directive(
format!("red_knot={level_filter}")
.parse()
.expect("Hardcoded directive to be valid"),
);
filter.add_directive(
format!("ruff={level_filter}")
.parse()
.expect("Hardcoded directive to be valid"),
)
}
}
};
let (profiling_layer, guard) = setup_profile();
let registry = tracing_subscriber::registry()
.with(filter)
.with(profiling_layer);
if level.is_trace() {
let subscriber = registry.with(
tracing_tree::HierarchicalLayer::default()
.with_indent_lines(true)
.with_indent_amount(2)
.with_bracketed_fields(true)
.with_thread_ids(true)
.with_targets(true)
.with_writer(std::io::stderr)
.with_timer(tracing_tree::time::Uptime::default()),
);
subscriber.init();
} else {
let subscriber = registry.with(
tracing_subscriber::fmt::layer()
.event_format(RedKnotFormat {
display_level: true,
display_timestamp: level.is_extra_verbose(),
show_spans: false,
})
.with_writer(std::io::stderr),
);
subscriber.init();
}
Ok(TracingGuard {
_flame_guard: guard,
})
}
#[allow(clippy::type_complexity)]
fn setup_profile<S>() -> (
Option<tracing_flame::FlameLayer<S, BufWriter<File>>>,
Option<tracing_flame::FlushGuard<BufWriter<File>>>,
)
where
S: Subscriber + for<'span> LookupSpan<'span>,
{
if let Ok("1" | "true") = std::env::var("RED_KNOT_LOG_PROFILE").as_deref() {
let (layer, guard) = tracing_flame::FlameLayer::with_file("tracing.folded")
.expect("Flame layer to be created");
(Some(layer), Some(guard))
} else {
(None, None)
}
}
pub(crate) struct TracingGuard {
_flame_guard: Option<tracing_flame::FlushGuard<BufWriter<File>>>,
}
struct RedKnotFormat {
display_timestamp: bool,
display_level: bool,
show_spans: bool,
}
/// See <https://docs.rs/tracing-subscriber/0.3.18/src/tracing_subscriber/fmt/format/mod.rs.html#1026-1156>
impl<S, N> FormatEvent<S, N> for RedKnotFormat
where
S: Subscriber + for<'a> LookupSpan<'a>,
N: for<'a> FormatFields<'a> + 'static,
{
fn format_event(
&self,
ctx: &FmtContext<'_, S, N>,
mut writer: Writer<'_>,
event: &Event<'_>,
) -> fmt::Result {
let meta = event.metadata();
let ansi = writer.has_ansi_escapes();
if self.display_timestamp {
let timestamp = chrono::Local::now()
.format("%Y-%m-%d %H:%M:%S.%f")
.to_string();
if ansi {
write!(writer, "{} ", timestamp.dimmed())?;
} else {
write!(
writer,
"{} ",
chrono::Local::now().format("%Y-%m-%d %H:%M:%S.%f")
)?;
}
}
if self.display_level {
let level = meta.level();
// Same colors as tracing
if ansi {
let formatted_level = level.to_string();
match *level {
tracing::Level::TRACE => {
write!(writer, "{} ", formatted_level.purple().bold())?;
}
tracing::Level::DEBUG => write!(writer, "{} ", formatted_level.blue().bold())?,
tracing::Level::INFO => write!(writer, "{} ", formatted_level.green().bold())?,
tracing::Level::WARN => write!(writer, "{} ", formatted_level.yellow().bold())?,
tracing::Level::ERROR => write!(writer, "{} ", level.to_string().red().bold())?,
}
} else {
write!(writer, "{level} ")?;
}
}
if self.show_spans {
let span = event.parent();
let mut seen = false;
let span = span
.and_then(|id| ctx.span(id))
.or_else(|| ctx.lookup_current());
let scope = span.into_iter().flat_map(|span| span.scope().from_root());
for span in scope {
seen = true;
if ansi {
write!(writer, "{}:", span.metadata().name().bold())?;
} else {
write!(writer, "{}:", span.metadata().name())?;
}
}
if seen {
writer.write_char(' ')?;
}
}
ctx.field_format().format_fields(writer.by_ref(), event)?;
writeln!(writer)
}
}

View File

@@ -1,63 +1,190 @@
#![allow(clippy::dbg_macro)]
use std::path::Path;
use std::process::{ExitCode, Termination};
use std::sync::Mutex;
use anyhow::{anyhow, Context};
use clap::Parser;
use colored::Colorize;
use crossbeam::channel as crossbeam_channel;
use tracing::subscriber::Interest;
use tracing::{Level, Metadata};
use tracing_subscriber::filter::LevelFilter;
use tracing_subscriber::layer::{Context, Filter, SubscriberExt};
use tracing_subscriber::{Layer, Registry};
use tracing_tree::time::Uptime;
use salsa::plumbing::ZalsaDatabase;
use red_knot::db::{HasJar, ParallelDatabase, QueryError, SourceDb, SourceJar};
use red_knot::module::{set_module_search_paths, ModuleResolutionInputs};
use red_knot::program::check::ExecutionMode;
use red_knot::program::{FileWatcherChange, Program};
use red_knot::watch::FileWatcher;
use red_knot::Workspace;
use red_knot_python_semantic::{ProgramSettings, SearchPathSettings};
use red_knot_server::run_server;
use red_knot_workspace::db::RootDatabase;
use red_knot_workspace::site_packages::VirtualEnvironment;
use red_knot_workspace::watch;
use red_knot_workspace::watch::WorkspaceWatcher;
use red_knot_workspace::workspace::WorkspaceMetadata;
use ruff_db::system::{OsSystem, System, SystemPath, SystemPathBuf};
use target_version::TargetVersion;
use crate::logging::{setup_tracing, Verbosity};
mod logging;
mod target_version;
mod verbosity;
#[derive(Debug, Parser)]
#[command(
author,
name = "red-knot",
about = "An extremely fast Python type checker."
)]
#[command(version)]
struct Args {
#[command(subcommand)]
pub(crate) command: Option<Command>,
#[arg(
long,
help = "Changes the current working directory.",
long_help = "Changes the current working directory before any specified operations. This affects the workspace and configuration discovery.",
value_name = "PATH"
)]
current_directory: Option<SystemPathBuf>,
#[arg(
long,
help = "Path to the virtual environment the project uses",
long_help = "\
Path to the virtual environment the project uses. \
If provided, red-knot will use the `site-packages` directory of this virtual environment \
to resolve type information for the project's third-party dependencies.",
value_name = "PATH"
)]
venv_path: Option<SystemPathBuf>,
#[arg(
long,
value_name = "DIRECTORY",
help = "Custom directory to use for stdlib typeshed stubs"
)]
custom_typeshed_dir: Option<SystemPathBuf>,
#[arg(
long,
value_name = "PATH",
help = "Additional path to use as a module-resolution source (can be passed multiple times)"
)]
extra_search_path: Vec<SystemPathBuf>,
#[arg(
long,
help = "Python version to assume when resolving types",
default_value_t = TargetVersion::default(),
value_name="VERSION")
]
target_version: TargetVersion,
#[clap(flatten)]
verbosity: Verbosity,
#[arg(
long,
help = "Run in watch mode by re-running whenever files change",
short = 'W'
)]
watch: bool,
}
#[derive(Debug, clap::Subcommand)]
pub enum Command {
/// Start the language server
Server,
}
#[allow(clippy::print_stdout, clippy::unnecessary_wraps, clippy::print_stderr)]
fn main() -> anyhow::Result<()> {
setup_tracing();
pub fn main() -> ExitStatus {
run().unwrap_or_else(|error| {
use std::io::Write;
let arguments: Vec<_> = std::env::args().collect();
// Use `writeln` instead of `eprintln` to avoid panicking when the stderr pipe is broken.
let mut stderr = std::io::stderr().lock();
if arguments.len() < 2 {
eprintln!("Usage: red_knot <path>");
return Err(anyhow::anyhow!("Invalid arguments"));
// This communicates that this isn't a linter error but Red Knot itself hard-errored for
// some reason (e.g. failed to resolve the configuration)
writeln!(stderr, "{}", "Red Knot failed".red().bold()).ok();
// Currently we generally only see one error, but e.g. with io errors when resolving
// the configuration it is help to chain errors ("resolving configuration failed" ->
// "failed to read file: subdir/pyproject.toml")
for cause in error.chain() {
writeln!(stderr, " {} {cause}", "Cause:".bold()).ok();
}
ExitStatus::Error
})
}
fn run() -> anyhow::Result<ExitStatus> {
let Args {
command,
current_directory,
custom_typeshed_dir,
extra_search_path: extra_paths,
venv_path,
target_version,
verbosity,
watch,
} = Args::parse_from(std::env::args().collect::<Vec<_>>());
if matches!(command, Some(Command::Server)) {
return run_server().map(|()| ExitStatus::Success);
}
let entry_point = Path::new(&arguments[1]);
let verbosity = verbosity.level();
countme::enable(verbosity.is_trace());
let _guard = setup_tracing(verbosity)?;
if !entry_point.exists() {
eprintln!("The entry point does not exist.");
return Err(anyhow::anyhow!("Invalid arguments"));
}
if !entry_point.is_file() {
eprintln!("The entry point is not a file.");
return Err(anyhow::anyhow!("Invalid arguments"));
}
let workspace_folder = entry_point.parent().unwrap();
let workspace = Workspace::new(workspace_folder.to_path_buf());
let workspace_search_path = workspace.root().to_path_buf();
let search_paths = ModuleResolutionInputs {
extra_paths: vec![],
workspace_root: workspace_search_path,
site_packages: None,
custom_typeshed: None,
// The base path to which all CLI arguments are relative to.
let cli_base_path = {
let cwd = std::env::current_dir().context("Failed to get the current working directory")?;
SystemPathBuf::from_path_buf(cwd)
.map_err(|path| {
anyhow!(
"The current working directory '{}' contains non-unicode characters. Red Knot only supports unicode paths.",
path.display()
)
})?
};
let mut program = Program::new(workspace);
set_module_search_paths(&mut program, search_paths);
let cwd = current_directory
.map(|cwd| {
if cwd.as_std_path().is_dir() {
Ok(SystemPath::absolute(&cwd, &cli_base_path))
} else {
Err(anyhow!(
"Provided current-directory path '{cwd}' is not a directory."
))
}
})
.transpose()?
.unwrap_or_else(|| cli_base_path.clone());
let entry_id = program.file_id(entry_point);
program.workspace_mut().open_file(entry_id);
let system = OsSystem::new(cwd.clone());
let workspace_metadata = WorkspaceMetadata::from_path(system.current_directory(), &system)?;
// TODO: Verify the remaining search path settings eagerly.
let site_packages = venv_path
.map(|path| {
VirtualEnvironment::new(path, &OsSystem::new(cli_base_path))
.and_then(|venv| venv.site_packages_directories(&system))
})
.transpose()?
.unwrap_or_default();
// TODO: Respect the settings from the workspace metadata. when resolving the program settings.
let program_settings = ProgramSettings {
target_version: target_version.into(),
search_paths: SearchPathSettings {
extra_paths,
src_root: workspace_metadata.root().to_path_buf(),
custom_typeshed: custom_typeshed_dir,
site_packages,
},
};
// TODO: Use the `program_settings` to compute the key for the database's persistent
// cache and load the cache if it exists.
let mut db = RootDatabase::new(workspace_metadata, program_settings, system);
let (main_loop, main_loop_cancellation_token) = MainLoop::new();
@@ -71,122 +198,158 @@ fn main() -> anyhow::Result<()> {
}
})?;
let file_changes_notifier = main_loop.file_changes_notifier();
let exit_status = if watch {
main_loop.watch(&mut db)?
} else {
main_loop.run(&mut db)
};
// Watch for file changes and re-trigger the analysis.
let mut file_watcher = FileWatcher::new(move |changes| {
file_changes_notifier.notify(changes);
})?;
tracing::trace!("Counts for entire CLI run:\n{}", countme::get_all());
file_watcher.watch_folder(workspace_folder)?;
std::mem::forget(db);
main_loop.run(&mut program);
Ok(exit_status)
}
let source_jar: &SourceJar = program.jar().unwrap();
#[derive(Copy, Clone)]
pub enum ExitStatus {
/// Checking was successful and there were no errors.
Success = 0,
dbg!(source_jar.parsed.statistics());
dbg!(source_jar.sources.statistics());
/// Checking was successful but there were errors.
Failure = 1,
Ok(())
/// Checking failed.
Error = 2,
}
impl Termination for ExitStatus {
fn report(self) -> ExitCode {
ExitCode::from(self as u8)
}
}
struct MainLoop {
orchestrator_sender: crossbeam_channel::Sender<OrchestratorMessage>,
main_loop_receiver: crossbeam_channel::Receiver<MainLoopMessage>,
/// Sender that can be used to send messages to the main loop.
sender: crossbeam_channel::Sender<MainLoopMessage>,
/// Receiver for the messages sent **to** the main loop.
receiver: crossbeam_channel::Receiver<MainLoopMessage>,
/// The file system watcher, if running in watch mode.
watcher: Option<WorkspaceWatcher>,
}
impl MainLoop {
fn new() -> (Self, MainLoopCancellationToken) {
let (orchestrator_sender, orchestrator_receiver) = crossbeam_channel::bounded(1);
let (main_loop_sender, main_loop_receiver) = crossbeam_channel::bounded(1);
let mut orchestrator = Orchestrator {
receiver: orchestrator_receiver,
sender: main_loop_sender.clone(),
revision: 0,
};
std::thread::spawn(move || {
orchestrator.run();
});
let (sender, receiver) = crossbeam_channel::bounded(10);
(
Self {
orchestrator_sender,
main_loop_receiver,
},
MainLoopCancellationToken {
sender: main_loop_sender,
sender: sender.clone(),
receiver,
watcher: None,
},
MainLoopCancellationToken { sender },
)
}
fn file_changes_notifier(&self) -> FileChangesNotifier {
FileChangesNotifier {
sender: self.orchestrator_sender.clone(),
}
fn watch(mut self, db: &mut RootDatabase) -> anyhow::Result<ExitStatus> {
tracing::debug!("Starting watch mode");
let sender = self.sender.clone();
let watcher = watch::directory_watcher(move |event| {
sender.send(MainLoopMessage::ApplyChanges(event)).unwrap();
})?;
self.watcher = Some(WorkspaceWatcher::new(watcher, db));
self.run(db);
Ok(ExitStatus::Success)
}
fn run(self, program: &mut Program) {
self.orchestrator_sender
.send(OrchestratorMessage::Run)
.unwrap();
fn run(mut self, db: &mut RootDatabase) -> ExitStatus {
self.sender.send(MainLoopMessage::CheckWorkspace).unwrap();
for message in &self.main_loop_receiver {
tracing::trace!("Main Loop: Tick");
let result = self.main_loop(db);
tracing::debug!("Exiting main loop");
result
}
fn main_loop(&mut self, db: &mut RootDatabase) -> ExitStatus {
// Schedule the first check.
tracing::debug!("Starting main loop");
let mut revision = 0u64;
while let Ok(message) = self.receiver.recv() {
match message {
MainLoopMessage::CheckProgram { revision } => {
let program = program.snapshot();
let sender = self.orchestrator_sender.clone();
MainLoopMessage::CheckWorkspace => {
let db = db.snapshot();
let sender = self.sender.clone();
// Spawn a new task that checks the program. This needs to be done in a separate thread
// Spawn a new task that checks the workspace. This needs to be done in a separate thread
// to prevent blocking the main loop here.
rayon::spawn(move || match program.check(ExecutionMode::ThreadPool) {
Ok(result) => {
rayon::spawn(move || {
if let Ok(result) = db.check() {
// Send the result back to the main loop for printing.
sender
.send(OrchestratorMessage::CheckProgramCompleted {
diagnostics: result,
revision,
})
.send(MainLoopMessage::CheckCompleted { result, revision })
.unwrap();
}
Err(QueryError::Cancelled) => {}
});
}
MainLoopMessage::ApplyChanges(changes) => {
// Automatically cancels any pending queries and waits for them to complete.
program.apply_changes(changes);
MainLoopMessage::CheckCompleted {
result,
revision: check_revision,
} => {
let has_diagnostics = !result.is_empty();
if check_revision == revision {
for diagnostic in result {
tracing::error!("{}", diagnostic);
}
} else {
tracing::debug!(
"Discarding check result for outdated revision: current: {revision}, result revision: {check_revision}"
);
}
if self.watcher.is_none() {
return if has_diagnostics {
ExitStatus::Failure
} else {
ExitStatus::Success
};
}
tracing::trace!("Counts after last check:\n{}", countme::get_all());
}
MainLoopMessage::CheckCompleted(diagnostics) => {
dbg!(diagnostics);
MainLoopMessage::ApplyChanges(changes) => {
revision += 1;
// Automatically cancels any pending queries and waits for them to complete.
db.apply_changes(changes);
if let Some(watcher) = self.watcher.as_mut() {
watcher.update(db);
}
self.sender.send(MainLoopMessage::CheckWorkspace).unwrap();
}
MainLoopMessage::Exit => {
return;
// Cancel any pending queries and wait for them to complete.
// TODO: Don't use Salsa internal APIs
// [Zulip-Thread](https://salsa.zulipchat.com/#narrow/stream/333573-salsa-3.2E0/topic/Expose.20an.20API.20to.20cancel.20other.20queries)
let _ = db.zalsa_mut();
return ExitStatus::Success;
}
}
tracing::debug!("Waiting for next main loop message.");
}
}
}
impl Drop for MainLoop {
fn drop(&mut self) {
self.orchestrator_sender
.send(OrchestratorMessage::Shutdown)
.unwrap();
}
}
#[derive(Debug, Clone)]
struct FileChangesNotifier {
sender: crossbeam_channel::Sender<OrchestratorMessage>,
}
impl FileChangesNotifier {
fn notify(&self, changes: Vec<FileWatcherChange>) {
self.sender
.send(OrchestratorMessage::FileChanges(changes))
.unwrap();
ExitStatus::Success
}
}
@@ -201,164 +364,11 @@ impl MainLoopCancellationToken {
}
}
struct Orchestrator {
/// Sends messages to the main loop.
sender: crossbeam_channel::Sender<MainLoopMessage>,
/// Receives messages from the main loop.
receiver: crossbeam_channel::Receiver<OrchestratorMessage>,
revision: usize,
}
impl Orchestrator {
fn run(&mut self) {
while let Ok(message) = self.receiver.recv() {
match message {
OrchestratorMessage::Run => {
self.sender
.send(MainLoopMessage::CheckProgram {
revision: self.revision,
})
.unwrap();
}
OrchestratorMessage::CheckProgramCompleted {
diagnostics,
revision,
} => {
// Only take the diagnostics if they are for the latest revision.
if self.revision == revision {
self.sender
.send(MainLoopMessage::CheckCompleted(diagnostics))
.unwrap();
} else {
tracing::debug!("Discarding diagnostics for outdated revision {revision} (current: {}).", self.revision);
}
}
OrchestratorMessage::FileChanges(changes) => {
// Request cancellation, but wait until all analysis tasks have completed to
// avoid stale messages in the next main loop.
self.revision += 1;
self.debounce_changes(changes);
}
OrchestratorMessage::Shutdown => {
return self.shutdown();
}
}
}
}
fn debounce_changes(&self, mut changes: Vec<FileWatcherChange>) {
loop {
// Consume possibly incoming file change messages before running a new analysis, but don't wait for more than 100ms.
crossbeam_channel::select! {
recv(self.receiver) -> message => {
match message {
Ok(OrchestratorMessage::Shutdown) => {
return self.shutdown();
}
Ok(OrchestratorMessage::FileChanges(file_changes)) => {
changes.extend(file_changes);
}
Ok(OrchestratorMessage::CheckProgramCompleted { .. })=> {
// disregard any outdated completion message.
}
Ok(OrchestratorMessage::Run) => unreachable!("The orchestrator is already running."),
Err(_) => {
// There are no more senders, no point in waiting for more messages
return;
}
}
},
default(std::time::Duration::from_millis(10)) => {
// No more file changes after 10 ms, send the changes and schedule a new analysis
self.sender.send(MainLoopMessage::ApplyChanges(changes)).unwrap();
self.sender.send(MainLoopMessage::CheckProgram { revision: self.revision}).unwrap();
return;
}
}
}
}
#[allow(clippy::unused_self)]
fn shutdown(&self) {
tracing::trace!("Shutting down orchestrator.");
}
}
/// Message sent from the orchestrator to the main loop.
#[derive(Debug)]
enum MainLoopMessage {
CheckProgram { revision: usize },
CheckCompleted(Vec<String>),
ApplyChanges(Vec<FileWatcherChange>),
CheckWorkspace,
CheckCompleted { result: Vec<String>, revision: u64 },
ApplyChanges(Vec<watch::ChangeEvent>),
Exit,
}
#[derive(Debug)]
enum OrchestratorMessage {
Run,
Shutdown,
CheckProgramCompleted {
diagnostics: Vec<String>,
revision: usize,
},
FileChanges(Vec<FileWatcherChange>),
}
fn setup_tracing() {
let subscriber = Registry::default().with(
tracing_tree::HierarchicalLayer::default()
.with_indent_lines(true)
.with_indent_amount(2)
.with_bracketed_fields(true)
.with_thread_ids(true)
.with_targets(true)
.with_writer(|| Box::new(std::io::stderr()))
.with_timer(Uptime::default())
.with_filter(LoggingFilter {
trace_level: Level::TRACE,
}),
);
tracing::subscriber::set_global_default(subscriber).unwrap();
}
struct LoggingFilter {
trace_level: Level,
}
impl LoggingFilter {
fn is_enabled(&self, meta: &Metadata<'_>) -> bool {
let filter = if meta.target().starts_with("red_knot") || meta.target().starts_with("ruff") {
self.trace_level
} else {
Level::INFO
};
meta.level() <= &filter
}
}
impl<S> Filter<S> for LoggingFilter {
fn enabled(&self, meta: &Metadata<'_>, _cx: &Context<'_, S>) -> bool {
self.is_enabled(meta)
}
fn callsite_enabled(&self, meta: &'static Metadata<'static>) -> Interest {
if self.is_enabled(meta) {
Interest::always()
} else {
Interest::never()
}
}
fn max_level_hint(&self) -> Option<LevelFilter> {
Some(LevelFilter::from_level(self.trace_level))
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -1,41 +0,0 @@
use std::ops::{Deref, DerefMut};
use std::sync::Arc;
use ruff_python_ast::ModModule;
use ruff_python_parser::Parsed;
use crate::cache::KeyValueCache;
use crate::db::{QueryResult, SourceDb};
use crate::files::FileId;
use crate::source::source_text;
#[tracing::instrument(level = "debug", skip(db))]
pub(crate) fn parse(db: &dyn SourceDb, file_id: FileId) -> QueryResult<Arc<Parsed<ModModule>>> {
let jar = db.jar()?;
jar.parsed.get(&file_id, |file_id| {
let source = source_text(db, *file_id)?;
Ok(Arc::new(ruff_python_parser::parse_unchecked_source(
source.text(),
source.kind().into(),
)))
})
}
#[derive(Debug, Default)]
pub struct ParsedStorage(KeyValueCache<FileId, Arc<Parsed<ModModule>>>);
impl Deref for ParsedStorage {
type Target = KeyValueCache<FileId, Arc<Parsed<ModModule>>>;
fn deref(&self) -> &Self::Target {
&self.0
}
}
impl DerefMut for ParsedStorage {
fn deref_mut(&mut self) -> &mut Self::Target {
&mut self.0
}
}

View File

@@ -1,413 +0,0 @@
use rayon::{current_num_threads, yield_local};
use rustc_hash::FxHashSet;
use crate::db::{Database, QueryError, QueryResult};
use crate::files::FileId;
use crate::lint::{lint_semantic, lint_syntax, Diagnostics};
use crate::module::{file_to_module, resolve_module};
use crate::program::Program;
use crate::semantic::{semantic_index, Dependency};
impl Program {
/// Checks all open files in the workspace and its dependencies.
#[tracing::instrument(level = "debug", skip_all)]
pub fn check(&self, mode: ExecutionMode) -> QueryResult<Vec<String>> {
self.cancelled()?;
let mut context = CheckContext::new(self);
match mode {
ExecutionMode::SingleThreaded => SingleThreadedExecutor.run(&mut context)?,
ExecutionMode::ThreadPool => ThreadPoolExecutor.run(&mut context)?,
};
Ok(context.finish())
}
#[tracing::instrument(level = "debug", skip(self, context))]
fn check_file(&self, file: FileId, context: &CheckFileContext) -> QueryResult<Diagnostics> {
self.cancelled()?;
let index = semantic_index(self, file)?;
let dependencies = index.symbol_table().dependencies();
if !dependencies.is_empty() {
let module = file_to_module(self, file)?;
// TODO scheduling all dependencies here is wasteful if we don't infer any types on them
// but I think that's unlikely, so it is okay?
// Anyway, we need to figure out a way to retrieve the dependencies of a module
// from the persistent cache. So maybe it should be a separate query after all.
for dependency in dependencies {
let dependency_name = match dependency {
Dependency::Module(name) => Some(name.clone()),
Dependency::Relative { .. } => match &module {
Some(module) => module.resolve_dependency(self, dependency)?,
None => None,
},
};
if let Some(dependency_name) = dependency_name {
// TODO We may want to have a different check functions for non-first-party
// files because we only need to index them and not check them.
// Supporting non-first-party code also requires supporting typing stubs.
if let Some(dependency) = resolve_module(self, dependency_name)? {
if dependency.path(self)?.root().kind().is_first_party() {
context.schedule_dependency(dependency.path(self)?.file());
}
}
}
}
}
let mut diagnostics = Vec::new();
if self.workspace().is_file_open(file) {
diagnostics.extend_from_slice(&lint_syntax(self, file)?);
diagnostics.extend_from_slice(&lint_semantic(self, file)?);
}
Ok(Diagnostics::from(diagnostics))
}
}
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
pub enum ExecutionMode {
SingleThreaded,
ThreadPool,
}
/// Context that stores state information about the entire check operation.
struct CheckContext<'a> {
/// IDs of the files that have been queued for checking.
///
/// Used to avoid queuing the same file twice.
scheduled_files: FxHashSet<FileId>,
/// Reference to the program that is checked.
program: &'a Program,
/// The aggregated diagnostics
diagnostics: Vec<String>,
}
impl<'a> CheckContext<'a> {
fn new(program: &'a Program) -> Self {
Self {
scheduled_files: FxHashSet::default(),
program,
diagnostics: Vec::new(),
}
}
/// Returns the tasks to check all open files in the workspace.
fn check_open_files(&mut self) -> Vec<CheckOpenFileTask> {
self.scheduled_files
.extend(self.program.workspace().open_files());
self.program
.workspace()
.open_files()
.map(|file_id| CheckOpenFileTask { file_id })
.collect()
}
/// Returns the task to check a dependency.
fn check_dependency(&mut self, file_id: FileId) -> Option<CheckDependencyTask> {
if self.scheduled_files.insert(file_id) {
Some(CheckDependencyTask { file_id })
} else {
None
}
}
/// Pushes the result for a single file check operation
fn push_diagnostics(&mut self, diagnostics: &Diagnostics) {
self.diagnostics.extend_from_slice(diagnostics);
}
/// Returns a reference to the program that is being checked.
fn program(&self) -> &'a Program {
self.program
}
/// Creates a task context that is used to check a single file.
fn task_context<'b, S>(&self, dependency_scheduler: &'b S) -> CheckTaskContext<'a, 'b, S>
where
S: ScheduleDependency,
{
CheckTaskContext {
program: self.program,
dependency_scheduler,
}
}
fn finish(self) -> Vec<String> {
self.diagnostics
}
}
/// Trait that abstracts away how a dependency of a file gets scheduled for checking.
trait ScheduleDependency {
/// Schedules the file with the given ID for checking.
fn schedule(&self, file_id: FileId);
}
impl<T> ScheduleDependency for T
where
T: Fn(FileId),
{
fn schedule(&self, file_id: FileId) {
let f = self;
f(file_id);
}
}
/// Context that is used to run a single file check task.
///
/// The task is generic over `S` because it is passed across thread boundaries and
/// we don't want to add the requirement that [`ScheduleDependency`] must be [`Send`].
struct CheckTaskContext<'a, 'scheduler, S>
where
S: ScheduleDependency,
{
dependency_scheduler: &'scheduler S,
program: &'a Program,
}
impl<'a, 'scheduler, S> CheckTaskContext<'a, 'scheduler, S>
where
S: ScheduleDependency,
{
fn as_file_context(&self) -> CheckFileContext<'scheduler> {
CheckFileContext {
dependency_scheduler: self.dependency_scheduler,
}
}
}
/// Context passed when checking a single file.
///
/// This is a trimmed down version of [`CheckTaskContext`] with the type parameter `S` erased
/// to avoid monomorphization of [`Program:check_file`].
struct CheckFileContext<'a> {
dependency_scheduler: &'a dyn ScheduleDependency,
}
impl<'a> CheckFileContext<'a> {
fn schedule_dependency(&self, file_id: FileId) {
self.dependency_scheduler.schedule(file_id);
}
}
#[derive(Debug)]
enum CheckFileTask {
OpenFile(CheckOpenFileTask),
Dependency(CheckDependencyTask),
}
impl CheckFileTask {
/// Runs the task and returns the results for checking this file.
fn run<S>(&self, context: &CheckTaskContext<S>) -> QueryResult<Diagnostics>
where
S: ScheduleDependency,
{
match self {
Self::OpenFile(task) => task.run(context),
Self::Dependency(task) => task.run(context),
}
}
fn file_id(&self) -> FileId {
match self {
CheckFileTask::OpenFile(task) => task.file_id,
CheckFileTask::Dependency(task) => task.file_id,
}
}
}
/// Task to check an open file.
#[derive(Debug)]
struct CheckOpenFileTask {
file_id: FileId,
}
impl CheckOpenFileTask {
fn run<S>(&self, context: &CheckTaskContext<S>) -> QueryResult<Diagnostics>
where
S: ScheduleDependency,
{
context
.program
.check_file(self.file_id, &context.as_file_context())
}
}
/// Task to check a dependency file.
#[derive(Debug)]
struct CheckDependencyTask {
file_id: FileId,
}
impl CheckDependencyTask {
fn run<S>(&self, context: &CheckTaskContext<S>) -> QueryResult<Diagnostics>
where
S: ScheduleDependency,
{
context
.program
.check_file(self.file_id, &context.as_file_context())
}
}
/// Executor that schedules the checking of individual program files.
trait CheckExecutor {
fn run(self, context: &mut CheckContext) -> QueryResult<()>;
}
/// Executor that runs all check operations on the current thread.
///
/// The executor does not schedule dependencies for checking.
/// The main motivation for scheduling dependencies
/// in a multithreaded environment is to parse and index the dependencies concurrently.
/// However, that doesn't make sense in a single threaded environment, because the dependencies then compute
/// with checking the open files. Checking dependencies in a single threaded environment is more likely
/// to hurt performance because we end up analyzing files in their entirety, even if we only need to type check parts of them.
#[derive(Debug, Default)]
struct SingleThreadedExecutor;
impl CheckExecutor for SingleThreadedExecutor {
fn run(self, context: &mut CheckContext) -> QueryResult<()> {
let mut queue = context.check_open_files();
let noop_schedule_dependency = |_| {};
while let Some(file) = queue.pop() {
context.program().cancelled()?;
let task_context = context.task_context(&noop_schedule_dependency);
context.push_diagnostics(&file.run(&task_context)?);
}
Ok(())
}
}
/// Executor that runs the check operations on a thread pool.
///
/// The executor runs each check operation as its own task using a thread pool.
///
/// Other than [`SingleThreadedExecutor`], this executor schedules dependencies for checking. It
/// even schedules dependencies for checking when the thread pool size is 1 for a better debugging experience.
#[derive(Debug, Default)]
struct ThreadPoolExecutor;
impl CheckExecutor for ThreadPoolExecutor {
fn run(self, context: &mut CheckContext) -> QueryResult<()> {
let num_threads = current_num_threads();
let single_threaded = num_threads == 1;
let span = tracing::trace_span!("ThreadPoolExecutor::run", num_threads);
let _ = span.enter();
let mut queue: Vec<_> = context
.check_open_files()
.into_iter()
.map(CheckFileTask::OpenFile)
.collect();
let (sender, receiver) = if single_threaded {
// Use an unbounded queue for single threaded execution to prevent deadlocks
// when a single file schedules multiple dependencies.
crossbeam::channel::unbounded()
} else {
// Use a bounded queue to apply backpressure when the orchestration thread isn't able to keep
// up processing messages from the worker threads.
crossbeam::channel::bounded(num_threads)
};
let schedule_sender = sender.clone();
let schedule_dependency = move |file_id| {
schedule_sender
.send(ThreadPoolMessage::ScheduleDependency(file_id))
.unwrap();
};
let result = rayon::in_place_scope(|scope| {
let mut pending = 0usize;
loop {
context.program().cancelled()?;
// 1. Try to get a queued message to ensure that we have always remaining space in the channel to prevent blocking the worker threads.
// 2. Try to process a queued file
// 3. If there's no queued file wait for the next incoming message.
// 4. Exit if there are no more messages and no senders.
let message = if let Ok(message) = receiver.try_recv() {
message
} else if let Some(task) = queue.pop() {
pending += 1;
let task_context = context.task_context(&schedule_dependency);
let sender = sender.clone();
let task_span = tracing::trace_span!(
parent: &span,
"CheckFileTask::run",
file_id = task.file_id().as_u32(),
);
scope.spawn(move |_| {
task_span.in_scope(|| match task.run(&task_context) {
Ok(result) => {
sender.send(ThreadPoolMessage::Completed(result)).unwrap();
}
Err(err) => sender.send(ThreadPoolMessage::Errored(err)).unwrap(),
});
});
// If this is a single threaded rayon thread pool, yield the current thread
// or we never start processing the work items.
if single_threaded {
yield_local();
}
continue;
} else if let Ok(message) = receiver.recv() {
message
} else {
break;
};
match message {
ThreadPoolMessage::ScheduleDependency(dependency) => {
if let Some(task) = context.check_dependency(dependency) {
queue.push(CheckFileTask::Dependency(task));
}
}
ThreadPoolMessage::Completed(diagnostics) => {
context.push_diagnostics(&diagnostics);
pending -= 1;
if pending == 0 && queue.is_empty() {
break;
}
}
ThreadPoolMessage::Errored(err) => {
return Err(err);
}
}
}
Ok(())
});
result
}
}
#[derive(Debug)]
enum ThreadPoolMessage {
ScheduleDependency(FileId),
Completed(Diagnostics),
Errored(QueryError),
}

View File

@@ -1,275 +0,0 @@
use std::collections::hash_map::Entry;
use std::path::{Path, PathBuf};
use std::sync::Arc;
use rustc_hash::FxHashMap;
use crate::db::{
Database, Db, DbRuntime, DbWithJar, HasJar, HasJars, JarsStorage, LintDb, LintJar,
ParallelDatabase, QueryResult, SemanticDb, SemanticJar, Snapshot, SourceDb, SourceJar, Upcast,
};
use crate::files::{FileId, Files};
use crate::Workspace;
pub mod check;
#[derive(Debug)]
pub struct Program {
jars: JarsStorage<Program>,
files: Files,
workspace: Workspace,
}
impl Program {
pub fn new(workspace: Workspace) -> Self {
Self {
jars: JarsStorage::default(),
files: Files::default(),
workspace,
}
}
pub fn apply_changes<I>(&mut self, changes: I)
where
I: IntoIterator<Item = FileWatcherChange>,
{
let mut aggregated_changes = AggregatedChanges::default();
aggregated_changes.extend(changes.into_iter().map(|change| FileChange {
id: self.files.intern(&change.path),
kind: change.kind,
}));
let (source, semantic, lint) = self.jars_mut();
for change in aggregated_changes.iter() {
semantic.module_resolver.remove_module_by_file(change.id);
semantic.semantic_indices.remove(&change.id);
source.sources.remove(&change.id);
source.parsed.remove(&change.id);
// TODO: remove all dependent modules as well
semantic.type_store.remove_module(change.id);
lint.lint_syntax.remove(&change.id);
lint.lint_semantic.remove(&change.id);
}
}
pub fn files(&self) -> &Files {
&self.files
}
pub fn workspace(&self) -> &Workspace {
&self.workspace
}
pub fn workspace_mut(&mut self) -> &mut Workspace {
&mut self.workspace
}
}
impl SourceDb for Program {
fn file_id(&self, path: &Path) -> FileId {
self.files.intern(path)
}
fn file_path(&self, file_id: FileId) -> Arc<Path> {
self.files.path(file_id)
}
}
impl DbWithJar<SourceJar> for Program {}
impl SemanticDb for Program {}
impl DbWithJar<SemanticJar> for Program {}
impl LintDb for Program {}
impl DbWithJar<LintJar> for Program {}
impl Upcast<dyn SemanticDb> for Program {
fn upcast(&self) -> &(dyn SemanticDb + 'static) {
self
}
}
impl Upcast<dyn SourceDb> for Program {
fn upcast(&self) -> &(dyn SourceDb + 'static) {
self
}
}
impl Upcast<dyn LintDb> for Program {
fn upcast(&self) -> &(dyn LintDb + 'static) {
self
}
}
impl Db for Program {}
impl Database for Program {
fn runtime(&self) -> &DbRuntime {
self.jars.runtime()
}
fn runtime_mut(&mut self) -> &mut DbRuntime {
self.jars.runtime_mut()
}
}
impl ParallelDatabase for Program {
fn snapshot(&self) -> Snapshot<Self> {
Snapshot::new(Self {
jars: self.jars.snapshot(),
files: self.files.snapshot(),
workspace: self.workspace.clone(),
})
}
}
impl HasJars for Program {
type Jars = (SourceJar, SemanticJar, LintJar);
fn jars(&self) -> QueryResult<&Self::Jars> {
self.jars.jars()
}
fn jars_mut(&mut self) -> &mut Self::Jars {
self.jars.jars_mut()
}
}
impl HasJar<SourceJar> for Program {
fn jar(&self) -> QueryResult<&SourceJar> {
Ok(&self.jars()?.0)
}
fn jar_mut(&mut self) -> &mut SourceJar {
&mut self.jars_mut().0
}
}
impl HasJar<SemanticJar> for Program {
fn jar(&self) -> QueryResult<&SemanticJar> {
Ok(&self.jars()?.1)
}
fn jar_mut(&mut self) -> &mut SemanticJar {
&mut self.jars_mut().1
}
}
impl HasJar<LintJar> for Program {
fn jar(&self) -> QueryResult<&LintJar> {
Ok(&self.jars()?.2)
}
fn jar_mut(&mut self) -> &mut LintJar {
&mut self.jars_mut().2
}
}
#[derive(Clone, Debug)]
pub struct FileWatcherChange {
path: PathBuf,
kind: FileChangeKind,
}
impl FileWatcherChange {
pub fn new(path: PathBuf, kind: FileChangeKind) -> Self {
Self { path, kind }
}
}
#[derive(Copy, Clone, Debug)]
struct FileChange {
id: FileId,
kind: FileChangeKind,
}
impl FileChange {
fn file_id(self) -> FileId {
self.id
}
fn kind(self) -> FileChangeKind {
self.kind
}
}
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
pub enum FileChangeKind {
Created,
Modified,
Deleted,
}
#[derive(Default, Debug)]
struct AggregatedChanges {
changes: FxHashMap<FileId, FileChangeKind>,
}
impl AggregatedChanges {
fn add(&mut self, change: FileChange) {
match self.changes.entry(change.file_id()) {
Entry::Occupied(mut entry) => {
let merged = entry.get_mut();
match (merged, change.kind()) {
(FileChangeKind::Created, FileChangeKind::Deleted) => {
// Deletion after creations means that ruff never saw the file.
entry.remove();
}
(FileChangeKind::Created, FileChangeKind::Modified) => {
// No-op, for ruff, modifying a file that it doesn't yet know that it exists is still considered a creation.
}
(FileChangeKind::Modified, FileChangeKind::Created) => {
// Uhh, that should probably not happen. Continue considering it a modification.
}
(FileChangeKind::Modified, FileChangeKind::Deleted) => {
*entry.get_mut() = FileChangeKind::Deleted;
}
(FileChangeKind::Deleted, FileChangeKind::Created) => {
*entry.get_mut() = FileChangeKind::Modified;
}
(FileChangeKind::Deleted, FileChangeKind::Modified) => {
// That's weird, but let's consider it a modification.
*entry.get_mut() = FileChangeKind::Modified;
}
(FileChangeKind::Created, FileChangeKind::Created)
| (FileChangeKind::Modified, FileChangeKind::Modified)
| (FileChangeKind::Deleted, FileChangeKind::Deleted) => {
// No-op transitions. Some of them should be impossible but we handle them anyway.
}
}
}
Entry::Vacant(entry) => {
entry.insert(change.kind());
}
}
}
fn extend<I>(&mut self, changes: I)
where
I: IntoIterator<Item = FileChange>,
{
let iter = changes.into_iter();
let (lower, _) = iter.size_hint();
self.changes.reserve(lower);
for change in iter {
self.add(change);
}
}
fn iter(&self) -> impl Iterator<Item = FileChange> + '_ {
self.changes.iter().map(|(id, kind)| FileChange {
id: *id,
kind: *kind,
})
}
}

View File

@@ -1,883 +0,0 @@
use std::num::NonZeroU32;
use ruff_python_ast as ast;
use ruff_python_ast::visitor::source_order::SourceOrderVisitor;
use ruff_python_ast::AstNode;
use crate::ast_ids::{NodeKey, TypedNodeKey};
use crate::cache::KeyValueCache;
use crate::db::{QueryResult, SemanticDb, SemanticJar};
use crate::files::FileId;
use crate::module::Module;
use crate::module::ModuleName;
use crate::parse::parse;
use crate::Name;
pub(crate) use definitions::Definition;
use definitions::{ImportDefinition, ImportFromDefinition};
pub(crate) use flow_graph::ConstrainedDefinition;
use flow_graph::{FlowGraph, FlowGraphBuilder, FlowNodeId, ReachableDefinitionsIterator};
use ruff_index::{newtype_index, IndexVec};
use rustc_hash::FxHashMap;
use std::ops::{Deref, DerefMut};
use std::sync::Arc;
pub(crate) use symbol_table::{Dependency, SymbolId};
use symbol_table::{ScopeId, ScopeKind, SymbolFlags, SymbolTable, SymbolTableBuilder};
pub(crate) use types::{infer_definition_type, infer_symbol_public_type, Type, TypeStore};
mod definitions;
mod flow_graph;
mod symbol_table;
mod types;
#[tracing::instrument(level = "debug", skip(db))]
pub fn semantic_index(db: &dyn SemanticDb, file_id: FileId) -> QueryResult<Arc<SemanticIndex>> {
let jar: &SemanticJar = db.jar()?;
jar.semantic_indices.get(&file_id, |_| {
let parsed = parse(db.upcast(), file_id)?;
Ok(Arc::from(SemanticIndex::from_ast(parsed.syntax())))
})
}
#[tracing::instrument(level = "debug", skip(db))]
pub fn resolve_global_symbol(
db: &dyn SemanticDb,
module: Module,
name: &str,
) -> QueryResult<Option<GlobalSymbolId>> {
let file_id = module.path(db)?.file();
let symbol_table = &semantic_index(db, file_id)?.symbol_table;
let Some(symbol_id) = symbol_table.root_symbol_id_by_name(name) else {
return Ok(None);
};
Ok(Some(GlobalSymbolId { file_id, symbol_id }))
}
#[newtype_index]
pub struct ExpressionId;
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
pub struct GlobalSymbolId {
pub(crate) file_id: FileId,
pub(crate) symbol_id: SymbolId,
}
#[derive(Debug)]
pub struct SemanticIndex {
symbol_table: SymbolTable,
flow_graph: FlowGraph,
expressions: FxHashMap<NodeKey, ExpressionId>,
expressions_by_id: IndexVec<ExpressionId, NodeKey>,
}
impl SemanticIndex {
pub fn from_ast(module: &ast::ModModule) -> Self {
let root_scope_id = SymbolTable::root_scope_id();
let mut indexer = SemanticIndexer {
symbol_table_builder: SymbolTableBuilder::new(),
flow_graph_builder: FlowGraphBuilder::new(),
scopes: vec![ScopeState {
scope_id: root_scope_id,
current_flow_node_id: FlowGraph::start(),
}],
expressions: FxHashMap::default(),
expressions_by_id: IndexVec::default(),
current_definition: None,
};
indexer.visit_body(&module.body);
indexer.finish()
}
fn resolve_expression_id<'a>(
&self,
ast: &'a ast::ModModule,
expression_id: ExpressionId,
) -> ast::AnyNodeRef<'a> {
let node_key = self.expressions_by_id[expression_id];
node_key
.resolve(ast.as_any_node_ref())
.expect("node to resolve")
}
/// Return an iterator over all definitions of `symbol_id` reachable from `use_expr`. The value
/// of `symbol_id` in `use_expr` must originate from one of the iterated definitions (or from
/// an external reassignment of the name outside of this scope).
pub fn reachable_definitions(
&self,
symbol_id: SymbolId,
use_expr: &ast::Expr,
) -> ReachableDefinitionsIterator {
let expression_id = self.expression_id(use_expr);
ReachableDefinitionsIterator::new(
&self.flow_graph,
symbol_id,
self.flow_graph.for_expr(expression_id),
)
}
pub fn expression_id(&self, expression: &ast::Expr) -> ExpressionId {
self.expressions[&NodeKey::from_node(expression.into())]
}
pub fn symbol_table(&self) -> &SymbolTable {
&self.symbol_table
}
}
#[derive(Debug)]
struct ScopeState {
scope_id: ScopeId,
current_flow_node_id: FlowNodeId,
}
#[derive(Debug)]
struct SemanticIndexer {
symbol_table_builder: SymbolTableBuilder,
flow_graph_builder: FlowGraphBuilder,
scopes: Vec<ScopeState>,
/// the definition whose target(s) we are currently walking
current_definition: Option<Definition>,
expressions: FxHashMap<NodeKey, ExpressionId>,
expressions_by_id: IndexVec<ExpressionId, NodeKey>,
}
impl SemanticIndexer {
pub(crate) fn finish(mut self) -> SemanticIndex {
let SemanticIndexer {
flow_graph_builder,
symbol_table_builder,
..
} = self;
self.expressions.shrink_to_fit();
self.expressions_by_id.shrink_to_fit();
SemanticIndex {
flow_graph: flow_graph_builder.finish(),
symbol_table: symbol_table_builder.finish(),
expressions: self.expressions,
expressions_by_id: self.expressions_by_id,
}
}
fn set_current_flow_node(&mut self, new_flow_node_id: FlowNodeId) {
let scope_state = self.scopes.last_mut().expect("scope stack is never empty");
scope_state.current_flow_node_id = new_flow_node_id;
}
fn current_flow_node(&self) -> FlowNodeId {
self.scopes
.last()
.expect("scope stack is never empty")
.current_flow_node_id
}
fn add_or_update_symbol(&mut self, identifier: &str, flags: SymbolFlags) -> SymbolId {
self.symbol_table_builder
.add_or_update_symbol(self.cur_scope(), identifier, flags)
}
fn add_or_update_symbol_with_def(
&mut self,
identifier: &str,
definition: Definition,
) -> SymbolId {
let symbol_id = self.add_or_update_symbol(identifier, SymbolFlags::IS_DEFINED);
self.symbol_table_builder
.add_definition(symbol_id, definition.clone());
let new_flow_node_id =
self.flow_graph_builder
.add_definition(symbol_id, definition, self.current_flow_node());
self.set_current_flow_node(new_flow_node_id);
symbol_id
}
fn push_scope(
&mut self,
name: &str,
kind: ScopeKind,
definition: Option<Definition>,
defining_symbol: Option<SymbolId>,
) -> ScopeId {
let scope_id = self.symbol_table_builder.add_child_scope(
self.cur_scope(),
name,
kind,
definition,
defining_symbol,
);
self.scopes.push(ScopeState {
scope_id,
current_flow_node_id: FlowGraph::start(),
});
scope_id
}
fn pop_scope(&mut self) -> ScopeId {
self.scopes
.pop()
.expect("Scope stack should never be empty")
.scope_id
}
fn cur_scope(&self) -> ScopeId {
self.scopes
.last()
.expect("Scope stack should never be empty")
.scope_id
}
fn record_scope_for_node(&mut self, node_key: NodeKey, scope_id: ScopeId) {
self.symbol_table_builder
.record_scope_for_node(node_key, scope_id);
}
fn insert_constraint(&mut self, expr: &ast::Expr) {
let node_key = NodeKey::from_node(expr.into());
let expression_id = self.expressions[&node_key];
let constraint = self
.flow_graph_builder
.add_constraint(self.current_flow_node(), expression_id);
self.set_current_flow_node(constraint);
}
fn with_type_params(
&mut self,
name: &str,
params: &Option<Box<ast::TypeParams>>,
definition: Option<Definition>,
defining_symbol: Option<SymbolId>,
nested: impl FnOnce(&mut Self) -> ScopeId,
) -> ScopeId {
if let Some(type_params) = params {
self.push_scope(name, ScopeKind::Annotation, definition, defining_symbol);
for type_param in &type_params.type_params {
let name = match type_param {
ast::TypeParam::TypeVar(ast::TypeParamTypeVar { name, .. }) => name,
ast::TypeParam::ParamSpec(ast::TypeParamParamSpec { name, .. }) => name,
ast::TypeParam::TypeVarTuple(ast::TypeParamTypeVarTuple { name, .. }) => name,
};
self.add_or_update_symbol(name, SymbolFlags::IS_DEFINED);
}
}
let scope_id = nested(self);
if params.is_some() {
self.pop_scope();
}
scope_id
}
}
impl SourceOrderVisitor<'_> for SemanticIndexer {
fn visit_expr(&mut self, expr: &ast::Expr) {
let node_key = NodeKey::from_node(expr.into());
let expression_id = self.expressions_by_id.push(node_key);
debug_assert_eq!(
expression_id,
self.flow_graph_builder
.record_expr(self.current_flow_node())
);
debug_assert_eq!(
expression_id,
self.symbol_table_builder
.record_expression(self.cur_scope())
);
self.expressions.insert(node_key, expression_id);
match expr {
ast::Expr::Name(ast::ExprName { id, ctx, .. }) => {
let flags = match ctx {
ast::ExprContext::Load => SymbolFlags::IS_USED,
ast::ExprContext::Store => SymbolFlags::IS_DEFINED,
ast::ExprContext::Del => SymbolFlags::IS_DEFINED,
ast::ExprContext::Invalid => SymbolFlags::empty(),
};
self.add_or_update_symbol(id, flags);
if flags.contains(SymbolFlags::IS_DEFINED) {
if let Some(curdef) = self.current_definition.clone() {
self.add_or_update_symbol_with_def(id, curdef);
}
}
ast::visitor::source_order::walk_expr(self, expr);
}
ast::Expr::Named(node) => {
debug_assert!(self.current_definition.is_none());
self.current_definition =
Some(Definition::NamedExpr(TypedNodeKey::from_node(node)));
// TODO walrus in comprehensions is implicitly nonlocal
self.visit_expr(&node.target);
self.current_definition = None;
self.visit_expr(&node.value);
}
ast::Expr::If(ast::ExprIf {
body, test, orelse, ..
}) => {
// TODO detect statically known truthy or falsy test (via type inference, not naive
// AST inspection, so we can't simplify here, need to record test expression in CFG
// for later checking)
self.visit_expr(test);
let if_branch = self.flow_graph_builder.add_branch(self.current_flow_node());
self.set_current_flow_node(if_branch);
self.insert_constraint(test);
self.visit_expr(body);
let post_body = self.current_flow_node();
self.set_current_flow_node(if_branch);
self.visit_expr(orelse);
let post_else = self
.flow_graph_builder
.add_phi(self.current_flow_node(), post_body);
self.set_current_flow_node(post_else);
}
_ => {
ast::visitor::source_order::walk_expr(self, expr);
}
}
}
fn visit_stmt(&mut self, stmt: &ast::Stmt) {
// TODO need to capture more definition statements here
match stmt {
ast::Stmt::ClassDef(node) => {
let node_key = TypedNodeKey::from_node(node);
let def = Definition::ClassDef(node_key.clone());
let symbol_id = self.add_or_update_symbol_with_def(&node.name, def.clone());
for decorator in &node.decorator_list {
self.visit_decorator(decorator);
}
let scope_id = self.with_type_params(
&node.name,
&node.type_params,
Some(def.clone()),
Some(symbol_id),
|indexer| {
if let Some(arguments) = &node.arguments {
indexer.visit_arguments(arguments);
}
let scope_id = indexer.push_scope(
&node.name,
ScopeKind::Class,
Some(def.clone()),
Some(symbol_id),
);
indexer.visit_body(&node.body);
indexer.pop_scope();
scope_id
},
);
self.record_scope_for_node(*node_key.erased(), scope_id);
}
ast::Stmt::FunctionDef(node) => {
let node_key = TypedNodeKey::from_node(node);
let def = Definition::FunctionDef(node_key.clone());
let symbol_id = self.add_or_update_symbol_with_def(&node.name, def.clone());
for decorator in &node.decorator_list {
self.visit_decorator(decorator);
}
let scope_id = self.with_type_params(
&node.name,
&node.type_params,
Some(def.clone()),
Some(symbol_id),
|indexer| {
indexer.visit_parameters(&node.parameters);
for expr in &node.returns {
indexer.visit_annotation(expr);
}
let scope_id = indexer.push_scope(
&node.name,
ScopeKind::Function,
Some(def.clone()),
Some(symbol_id),
);
indexer.visit_body(&node.body);
indexer.pop_scope();
scope_id
},
);
self.record_scope_for_node(*node_key.erased(), scope_id);
}
ast::Stmt::Import(ast::StmtImport { names, .. }) => {
for alias in names {
let symbol_name = if let Some(asname) = &alias.asname {
asname.id.as_str()
} else {
alias.name.id.split('.').next().unwrap()
};
let module = ModuleName::new(&alias.name.id);
let def = Definition::Import(ImportDefinition {
module: module.clone(),
});
self.add_or_update_symbol_with_def(symbol_name, def);
self.symbol_table_builder
.add_dependency(Dependency::Module(module));
}
}
ast::Stmt::ImportFrom(ast::StmtImportFrom {
module,
names,
level,
..
}) => {
let module = module.as_ref().map(|m| ModuleName::new(&m.id));
for alias in names {
let symbol_name = if let Some(asname) = &alias.asname {
asname.id.as_str()
} else {
alias.name.id.as_str()
};
let def = Definition::ImportFrom(ImportFromDefinition {
module: module.clone(),
name: Name::new(&alias.name.id),
level: *level,
});
self.add_or_update_symbol_with_def(symbol_name, def);
}
let dependency = if let Some(module) = module {
match NonZeroU32::new(*level) {
Some(level) => Dependency::Relative {
level,
module: Some(module),
},
None => Dependency::Module(module),
}
} else {
Dependency::Relative {
level: NonZeroU32::new(*level)
.expect("Import without a module to have a level > 0"),
module,
}
};
self.symbol_table_builder.add_dependency(dependency);
}
ast::Stmt::Assign(node) => {
debug_assert!(self.current_definition.is_none());
self.visit_expr(&node.value);
self.current_definition =
Some(Definition::Assignment(TypedNodeKey::from_node(node)));
for expr in &node.targets {
self.visit_expr(expr);
}
self.current_definition = None;
}
ast::Stmt::If(node) => {
// TODO detect statically known truthy or falsy test (via type inference, not naive
// AST inspection, so we can't simplify here, need to record test expression in CFG
// for later checking)
// we visit the if "test" condition first regardless
self.visit_expr(&node.test);
// create branch node: does the if test pass or not?
let if_branch = self.flow_graph_builder.add_branch(self.current_flow_node());
// visit the body of the `if` clause
self.set_current_flow_node(if_branch);
self.insert_constraint(&node.test);
self.visit_body(&node.body);
// Flow node for the last if/elif condition branch; represents the "no branch
// taken yet" possibility (where "taking a branch" means that the condition in an
// if or elif evaluated to true and control flow went into that clause).
let mut prior_branch = if_branch;
// Flow node for the state after the prior if/elif/else clause; represents "we have
// taken one of the branches up to this point." Initially set to the post-if-clause
// state, later will be set to the phi node joining that possible path with the
// possibility that we took a later if/elif/else clause instead.
let mut post_prior_clause = self.current_flow_node();
// Flag to mark if the final clause is an "else" -- if so, that means the "match no
// clauses" path is not possible, we have to go through one of the clauses.
let mut last_branch_is_else = false;
for clause in &node.elif_else_clauses {
if let Some(test) = &clause.test {
self.visit_expr(test);
// This is an elif clause. Create a new branch node. Its predecessor is the
// previous branch node, because we can only take one branch in an entire
// if/elif/else chain, so if we take this branch, it can only be because we
// didn't take the previous one.
prior_branch = self.flow_graph_builder.add_branch(prior_branch);
self.set_current_flow_node(prior_branch);
self.insert_constraint(test);
} else {
// This is an else clause. No need to create a branch node; there's no
// branch here, if we haven't taken any previous branch, we definitely go
// into the "else" clause.
self.set_current_flow_node(prior_branch);
last_branch_is_else = true;
}
self.visit_elif_else_clause(clause);
// Update `post_prior_clause` to a new phi node joining the possibility that we
// took any of the previous branches with the possibility that we took the one
// just visited.
post_prior_clause = self
.flow_graph_builder
.add_phi(self.current_flow_node(), post_prior_clause);
}
if !last_branch_is_else {
// Final branch was not an "else", which means it's possible we took zero
// branches in the entire if/elif chain, so we need one more phi node to join
// the "no branches taken" possibility.
post_prior_clause = self
.flow_graph_builder
.add_phi(post_prior_clause, prior_branch);
}
// Onward, with current flow node set to our final Phi node.
self.set_current_flow_node(post_prior_clause);
}
_ => {
ast::visitor::source_order::walk_stmt(self, stmt);
}
}
}
}
#[derive(Debug, Default)]
pub struct SemanticIndexStorage(KeyValueCache<FileId, Arc<SemanticIndex>>);
impl Deref for SemanticIndexStorage {
type Target = KeyValueCache<FileId, Arc<SemanticIndex>>;
fn deref(&self) -> &Self::Target {
&self.0
}
}
impl DerefMut for SemanticIndexStorage {
fn deref_mut(&mut self) -> &mut Self::Target {
&mut self.0
}
}
#[cfg(test)]
mod tests {
use crate::semantic::symbol_table::{Symbol, SymbolIterator};
use ruff_python_ast as ast;
use ruff_python_ast::ModModule;
use ruff_python_parser::{Mode, Parsed};
use super::{Definition, ScopeKind, SemanticIndex, SymbolId};
fn parse(code: &str) -> Parsed<ModModule> {
ruff_python_parser::parse_unchecked(code, Mode::Module)
.try_into_module()
.unwrap()
}
fn names<I>(it: SymbolIterator<I>) -> Vec<&str>
where
I: Iterator<Item = SymbolId>,
{
let mut symbols: Vec<_> = it.map(Symbol::name).collect();
symbols.sort_unstable();
symbols
}
#[test]
fn empty() {
let parsed = parse("");
let table = SemanticIndex::from_ast(parsed.syntax()).symbol_table;
assert_eq!(names(table.root_symbols()).len(), 0);
}
#[test]
fn simple() {
let parsed = parse("x");
let table = SemanticIndex::from_ast(parsed.syntax()).symbol_table;
assert_eq!(names(table.root_symbols()), vec!["x"]);
assert_eq!(
table
.definitions(table.root_symbol_id_by_name("x").unwrap())
.len(),
0
);
}
#[test]
fn annotation_only() {
let parsed = parse("x: int");
let table = SemanticIndex::from_ast(parsed.syntax()).symbol_table;
assert_eq!(names(table.root_symbols()), vec!["int", "x"]);
// TODO record definition
}
#[test]
fn import() {
let parsed = parse("import foo");
let table = SemanticIndex::from_ast(parsed.syntax()).symbol_table;
assert_eq!(names(table.root_symbols()), vec!["foo"]);
assert_eq!(
table
.definitions(table.root_symbol_id_by_name("foo").unwrap())
.len(),
1
);
}
#[test]
fn import_sub() {
let parsed = parse("import foo.bar");
let table = SemanticIndex::from_ast(parsed.syntax()).symbol_table;
assert_eq!(names(table.root_symbols()), vec!["foo"]);
}
#[test]
fn import_as() {
let parsed = parse("import foo.bar as baz");
let table = SemanticIndex::from_ast(parsed.syntax()).symbol_table;
assert_eq!(names(table.root_symbols()), vec!["baz"]);
}
#[test]
fn import_from() {
let parsed = parse("from bar import foo");
let table = SemanticIndex::from_ast(parsed.syntax()).symbol_table;
assert_eq!(names(table.root_symbols()), vec!["foo"]);
assert_eq!(
table
.definitions(table.root_symbol_id_by_name("foo").unwrap())
.len(),
1
);
assert!(
table.root_symbol_id_by_name("foo").is_some_and(|sid| {
let s = sid.symbol(&table);
s.is_defined() || !s.is_used()
}),
"symbols that are defined get the defined flag"
);
}
#[test]
fn assign() {
let parsed = parse("x = foo");
let table = SemanticIndex::from_ast(parsed.syntax()).symbol_table;
assert_eq!(names(table.root_symbols()), vec!["foo", "x"]);
assert_eq!(
table
.definitions(table.root_symbol_id_by_name("x").unwrap())
.len(),
1
);
assert!(
table.root_symbol_id_by_name("foo").is_some_and(|sid| {
let s = sid.symbol(&table);
!s.is_defined() && s.is_used()
}),
"a symbol used but not defined in a scope should have only the used flag"
);
}
#[test]
fn class_scope() {
let parsed = parse(
"
class C:
x = 1
y = 2
",
);
let table = SemanticIndex::from_ast(parsed.syntax()).symbol_table;
assert_eq!(names(table.root_symbols()), vec!["C", "y"]);
let scopes = table.root_child_scope_ids();
assert_eq!(scopes.len(), 1);
let c_scope = scopes[0].scope(&table);
assert_eq!(c_scope.kind(), ScopeKind::Class);
assert_eq!(c_scope.name(), "C");
assert_eq!(names(table.symbols_for_scope(scopes[0])), vec!["x"]);
assert_eq!(
table
.definitions(table.root_symbol_id_by_name("C").unwrap())
.len(),
1
);
}
#[test]
fn func_scope() {
let parsed = parse(
"
def func():
x = 1
y = 2
",
);
let table = SemanticIndex::from_ast(parsed.syntax()).symbol_table;
assert_eq!(names(table.root_symbols()), vec!["func", "y"]);
let scopes = table.root_child_scope_ids();
assert_eq!(scopes.len(), 1);
let func_scope = scopes[0].scope(&table);
assert_eq!(func_scope.kind(), ScopeKind::Function);
assert_eq!(func_scope.name(), "func");
assert_eq!(names(table.symbols_for_scope(scopes[0])), vec!["x"]);
assert_eq!(
table
.definitions(table.root_symbol_id_by_name("func").unwrap())
.len(),
1
);
}
#[test]
fn dupes() {
let parsed = parse(
"
def func():
x = 1
def func():
y = 2
",
);
let table = SemanticIndex::from_ast(parsed.syntax()).symbol_table;
assert_eq!(names(table.root_symbols()), vec!["func"]);
let scopes = table.root_child_scope_ids();
assert_eq!(scopes.len(), 2);
let func_scope_1 = scopes[0].scope(&table);
let func_scope_2 = scopes[1].scope(&table);
assert_eq!(func_scope_1.kind(), ScopeKind::Function);
assert_eq!(func_scope_1.name(), "func");
assert_eq!(func_scope_2.kind(), ScopeKind::Function);
assert_eq!(func_scope_2.name(), "func");
assert_eq!(names(table.symbols_for_scope(scopes[0])), vec!["x"]);
assert_eq!(names(table.symbols_for_scope(scopes[1])), vec!["y"]);
assert_eq!(
table
.definitions(table.root_symbol_id_by_name("func").unwrap())
.len(),
2
);
}
#[test]
fn generic_func() {
let parsed = parse(
"
def func[T]():
x = 1
",
);
let table = SemanticIndex::from_ast(parsed.syntax()).symbol_table;
assert_eq!(names(table.root_symbols()), vec!["func"]);
let scopes = table.root_child_scope_ids();
assert_eq!(scopes.len(), 1);
let ann_scope_id = scopes[0];
let ann_scope = ann_scope_id.scope(&table);
assert_eq!(ann_scope.kind(), ScopeKind::Annotation);
assert_eq!(ann_scope.name(), "func");
assert_eq!(names(table.symbols_for_scope(ann_scope_id)), vec!["T"]);
let scopes = table.child_scope_ids_of(ann_scope_id);
assert_eq!(scopes.len(), 1);
let func_scope_id = scopes[0];
let func_scope = func_scope_id.scope(&table);
assert_eq!(func_scope.kind(), ScopeKind::Function);
assert_eq!(func_scope.name(), "func");
assert_eq!(names(table.symbols_for_scope(func_scope_id)), vec!["x"]);
}
#[test]
fn generic_class() {
let parsed = parse(
"
class C[T]:
x = 1
",
);
let table = SemanticIndex::from_ast(parsed.syntax()).symbol_table;
assert_eq!(names(table.root_symbols()), vec!["C"]);
let scopes = table.root_child_scope_ids();
assert_eq!(scopes.len(), 1);
let ann_scope_id = scopes[0];
let ann_scope = ann_scope_id.scope(&table);
assert_eq!(ann_scope.kind(), ScopeKind::Annotation);
assert_eq!(ann_scope.name(), "C");
assert_eq!(names(table.symbols_for_scope(ann_scope_id)), vec!["T"]);
assert!(
table
.symbol_by_name(ann_scope_id, "T")
.is_some_and(|s| s.is_defined() && !s.is_used()),
"type parameters are defined by the scope that introduces them"
);
let scopes = table.child_scope_ids_of(ann_scope_id);
assert_eq!(scopes.len(), 1);
let func_scope_id = scopes[0];
let func_scope = func_scope_id.scope(&table);
assert_eq!(func_scope.kind(), ScopeKind::Class);
assert_eq!(func_scope.name(), "C");
assert_eq!(names(table.symbols_for_scope(func_scope_id)), vec!["x"]);
}
#[test]
fn reachability_trivial() {
let parsed = parse("x = 1; x");
let ast = parsed.syntax();
let index = SemanticIndex::from_ast(ast);
let table = &index.symbol_table;
let x_sym = table
.root_symbol_id_by_name("x")
.expect("x symbol should exist");
let ast::Stmt::Expr(ast::StmtExpr { value: x_use, .. }) = &ast.body[1] else {
panic!("should be an expr")
};
let x_defs: Vec<_> = index
.reachable_definitions(x_sym, x_use)
.map(|constrained_definition| constrained_definition.definition)
.collect();
assert_eq!(x_defs.len(), 1);
let Definition::Assignment(node_key) = &x_defs[0] else {
panic!("def should be an assignment")
};
let Some(def_node) = node_key.resolve(ast.into()) else {
panic!("node key should resolve")
};
let ast::Expr::NumberLiteral(ast::ExprNumberLiteral {
value: ast::Number::Int(num),
..
}) = &*def_node.value
else {
panic!("should be a number literal")
};
assert_eq!(*num, 1);
}
#[test]
fn expression_scope() {
let parsed = parse("x = 1;\ndef test():\n y = 4");
let ast = parsed.syntax();
let index = SemanticIndex::from_ast(ast);
let table = &index.symbol_table;
let x_sym = table
.root_symbol_by_name("x")
.expect("x symbol should exist");
let x_stmt = ast.body[0].as_assign_stmt().unwrap();
let x_id = index.expression_id(&x_stmt.targets[0]);
assert_eq!(table.scope_of_expression(x_id).kind(), ScopeKind::Module);
assert_eq!(table.scope_id_of_expression(x_id), x_sym.scope_id());
let def = ast.body[1].as_function_def_stmt().unwrap();
let y_stmt = def.body[0].as_assign_stmt().unwrap();
let y_id = index.expression_id(&y_stmt.targets[0]);
assert_eq!(table.scope_of_expression(y_id).kind(), ScopeKind::Function);
}
}

View File

@@ -1,52 +0,0 @@
use crate::ast_ids::TypedNodeKey;
use crate::semantic::ModuleName;
use crate::Name;
use ruff_python_ast as ast;
// TODO storing TypedNodeKey for definitions means we have to search to find them again in the AST;
// this is at best O(log n). If looking up definitions is a bottleneck we should look for
// alternatives here.
// TODO intern Definitions in SymbolTable and reference using IDs?
#[derive(Clone, Debug)]
pub enum Definition {
// For the import cases, we don't need reference to any arbitrary AST subtrees (annotations,
// RHS), and referencing just the import statement node is imprecise (a single import statement
// can assign many symbols, we'd have to re-search for the one we care about), so we just copy
// the small amount of information we need from the AST.
Import(ImportDefinition),
ImportFrom(ImportFromDefinition),
ClassDef(TypedNodeKey<ast::StmtClassDef>),
FunctionDef(TypedNodeKey<ast::StmtFunctionDef>),
Assignment(TypedNodeKey<ast::StmtAssign>),
AnnotatedAssignment(TypedNodeKey<ast::StmtAnnAssign>),
NamedExpr(TypedNodeKey<ast::ExprNamed>),
/// represents the implicit initial definition of every name as "unbound"
Unbound,
// TODO with statements, except handlers, function args...
}
#[derive(Clone, Debug)]
pub struct ImportDefinition {
pub module: ModuleName,
}
#[derive(Clone, Debug)]
pub struct ImportFromDefinition {
pub module: Option<ModuleName>,
pub name: Name,
pub level: u32,
}
impl ImportFromDefinition {
pub fn module(&self) -> Option<&ModuleName> {
self.module.as_ref()
}
pub fn name(&self) -> &Name {
&self.name
}
pub fn level(&self) -> u32 {
self.level
}
}

View File

@@ -1,270 +0,0 @@
use super::symbol_table::SymbolId;
use crate::semantic::{Definition, ExpressionId};
use ruff_index::{newtype_index, IndexVec};
use std::iter::FusedIterator;
use std::ops::Range;
#[newtype_index]
pub struct FlowNodeId;
#[derive(Debug)]
pub(crate) enum FlowNode {
Start,
Definition(DefinitionFlowNode),
Branch(BranchFlowNode),
Phi(PhiFlowNode),
Constraint(ConstraintFlowNode),
}
/// A point in control flow where a symbol is defined
#[derive(Debug)]
pub(crate) struct DefinitionFlowNode {
symbol_id: SymbolId,
definition: Definition,
predecessor: FlowNodeId,
}
/// A branch in control flow
#[derive(Debug)]
pub(crate) struct BranchFlowNode {
predecessor: FlowNodeId,
}
/// A join point where control flow paths come together
#[derive(Debug)]
pub(crate) struct PhiFlowNode {
first_predecessor: FlowNodeId,
second_predecessor: FlowNodeId,
}
/// A branch test which may apply constraints to a symbol's type
#[derive(Debug)]
pub(crate) struct ConstraintFlowNode {
predecessor: FlowNodeId,
test_expression: ExpressionId,
}
#[derive(Debug)]
pub struct FlowGraph {
flow_nodes_by_id: IndexVec<FlowNodeId, FlowNode>,
expression_map: IndexVec<ExpressionId, FlowNodeId>,
}
impl FlowGraph {
pub fn start() -> FlowNodeId {
FlowNodeId::from_usize(0)
}
pub fn for_expr(&self, expr: ExpressionId) -> FlowNodeId {
self.expression_map[expr]
}
}
#[derive(Debug)]
pub(crate) struct FlowGraphBuilder {
flow_graph: FlowGraph,
}
impl FlowGraphBuilder {
pub(crate) fn new() -> Self {
let mut graph = FlowGraph {
flow_nodes_by_id: IndexVec::default(),
expression_map: IndexVec::default(),
};
graph.flow_nodes_by_id.push(FlowNode::Start);
Self { flow_graph: graph }
}
pub(crate) fn add(&mut self, node: FlowNode) -> FlowNodeId {
self.flow_graph.flow_nodes_by_id.push(node)
}
pub(crate) fn add_definition(
&mut self,
symbol_id: SymbolId,
definition: Definition,
predecessor: FlowNodeId,
) -> FlowNodeId {
self.add(FlowNode::Definition(DefinitionFlowNode {
symbol_id,
definition,
predecessor,
}))
}
pub(crate) fn add_branch(&mut self, predecessor: FlowNodeId) -> FlowNodeId {
self.add(FlowNode::Branch(BranchFlowNode { predecessor }))
}
pub(crate) fn add_phi(
&mut self,
first_predecessor: FlowNodeId,
second_predecessor: FlowNodeId,
) -> FlowNodeId {
self.add(FlowNode::Phi(PhiFlowNode {
first_predecessor,
second_predecessor,
}))
}
pub(crate) fn add_constraint(
&mut self,
predecessor: FlowNodeId,
test_expression: ExpressionId,
) -> FlowNodeId {
self.add(FlowNode::Constraint(ConstraintFlowNode {
predecessor,
test_expression,
}))
}
pub(super) fn record_expr(&mut self, node_id: FlowNodeId) -> ExpressionId {
self.flow_graph.expression_map.push(node_id)
}
pub(super) fn finish(mut self) -> FlowGraph {
self.flow_graph.flow_nodes_by_id.shrink_to_fit();
self.flow_graph.expression_map.shrink_to_fit();
self.flow_graph
}
}
/// A definition, and the set of constraints between a use and the definition
#[derive(Debug, Clone)]
pub struct ConstrainedDefinition {
pub definition: Definition,
pub constraints: Vec<ExpressionId>,
}
/// A flow node and the constraints we passed through to reach it
#[derive(Debug)]
struct FlowState {
node_id: FlowNodeId,
constraints_range: Range<usize>,
}
#[derive(Debug)]
pub struct ReachableDefinitionsIterator<'a> {
flow_graph: &'a FlowGraph,
symbol_id: SymbolId,
pending: Vec<FlowState>,
constraints: Vec<ExpressionId>,
}
impl<'a> ReachableDefinitionsIterator<'a> {
pub fn new(flow_graph: &'a FlowGraph, symbol_id: SymbolId, start_node_id: FlowNodeId) -> Self {
Self {
flow_graph,
symbol_id,
pending: vec![FlowState {
node_id: start_node_id,
constraints_range: 0..0,
}],
constraints: vec![],
}
}
}
impl<'a> Iterator for ReachableDefinitionsIterator<'a> {
type Item = ConstrainedDefinition;
fn next(&mut self) -> Option<Self::Item> {
let FlowState {
mut node_id,
mut constraints_range,
} = self.pending.pop()?;
self.constraints.truncate(constraints_range.end + 1);
loop {
match &self.flow_graph.flow_nodes_by_id[node_id] {
FlowNode::Start => {
// constraints on unbound are irrelevant
return Some(ConstrainedDefinition {
definition: Definition::Unbound,
constraints: vec![],
});
}
FlowNode::Definition(def_node) => {
if def_node.symbol_id == self.symbol_id {
return Some(ConstrainedDefinition {
definition: def_node.definition.clone(),
constraints: self.constraints[constraints_range].to_vec(),
});
}
node_id = def_node.predecessor;
}
FlowNode::Branch(branch_node) => {
node_id = branch_node.predecessor;
}
FlowNode::Phi(phi_node) => {
self.pending.push(FlowState {
node_id: phi_node.first_predecessor,
constraints_range: constraints_range.clone(),
});
node_id = phi_node.second_predecessor;
}
FlowNode::Constraint(constraint_node) => {
node_id = constraint_node.predecessor;
self.constraints.push(constraint_node.test_expression);
constraints_range.end += 1;
}
}
}
}
}
impl<'a> FusedIterator for ReachableDefinitionsIterator<'a> {}
impl std::fmt::Display for FlowGraph {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
writeln!(f, "flowchart TD")?;
for (id, node) in self.flow_nodes_by_id.iter_enumerated() {
write!(f, " id{}", id.as_u32())?;
match node {
FlowNode::Start => writeln!(f, r"[\Start/]")?,
FlowNode::Definition(def_node) => {
writeln!(f, r"(Define symbol {})", def_node.symbol_id.as_u32())?;
writeln!(
f,
r" id{}-->id{}",
def_node.predecessor.as_u32(),
id.as_u32()
)?;
}
FlowNode::Branch(branch_node) => {
writeln!(f, r"{{Branch}}")?;
writeln!(
f,
r" id{}-->id{}",
branch_node.predecessor.as_u32(),
id.as_u32()
)?;
}
FlowNode::Phi(phi_node) => {
writeln!(f, r"((Phi))")?;
writeln!(
f,
r" id{}-->id{}",
phi_node.second_predecessor.as_u32(),
id.as_u32()
)?;
writeln!(
f,
r" id{}-->id{}",
phi_node.first_predecessor.as_u32(),
id.as_u32()
)?;
}
FlowNode::Constraint(constraint_node) => {
writeln!(f, r"((Constraint))")?;
writeln!(
f,
r" id{}-->id{}",
constraint_node.predecessor.as_u32(),
id.as_u32()
)?;
}
}
}
Ok(())
}
}

View File

@@ -1,560 +0,0 @@
#![allow(dead_code)]
use std::hash::{Hash, Hasher};
use std::iter::{Copied, DoubleEndedIterator, FusedIterator};
use std::num::NonZeroU32;
use bitflags::bitflags;
use hashbrown::hash_map::{Keys, RawEntryMut};
use rustc_hash::{FxHashMap, FxHasher};
use ruff_index::{newtype_index, IndexVec};
use crate::ast_ids::NodeKey;
use crate::module::ModuleName;
use crate::semantic::{Definition, ExpressionId};
use crate::Name;
type Map<K, V> = hashbrown::HashMap<K, V, ()>;
#[newtype_index]
pub struct ScopeId;
impl ScopeId {
pub fn scope(self, table: &SymbolTable) -> &Scope {
&table.scopes_by_id[self]
}
}
#[newtype_index]
pub struct SymbolId;
impl SymbolId {
pub fn symbol(self, table: &SymbolTable) -> &Symbol {
&table.symbols_by_id[self]
}
}
#[derive(Copy, Clone, Debug, PartialEq)]
pub enum ScopeKind {
Module,
Annotation,
Class,
Function,
}
#[derive(Debug)]
pub struct Scope {
name: Name,
kind: ScopeKind,
parent: Option<ScopeId>,
children: Vec<ScopeId>,
/// the definition (e.g. class or function) that created this scope
definition: Option<Definition>,
/// the symbol (e.g. class or function) that owns this scope
defining_symbol: Option<SymbolId>,
/// symbol IDs, hashed by symbol name
symbols_by_name: Map<SymbolId, ()>,
}
impl Scope {
pub fn name(&self) -> &str {
self.name.as_str()
}
pub fn kind(&self) -> ScopeKind {
self.kind
}
pub fn definition(&self) -> Option<Definition> {
self.definition.clone()
}
pub fn defining_symbol(&self) -> Option<SymbolId> {
self.defining_symbol
}
}
#[derive(Debug)]
pub(crate) enum Kind {
FreeVar,
CellVar,
CellVarAssigned,
ExplicitGlobal,
ImplicitGlobal,
}
bitflags! {
#[derive(Copy,Clone,Debug)]
pub struct SymbolFlags: u8 {
const IS_USED = 1 << 0;
const IS_DEFINED = 1 << 1;
/// TODO: This flag is not yet set by anything
const MARKED_GLOBAL = 1 << 2;
/// TODO: This flag is not yet set by anything
const MARKED_NONLOCAL = 1 << 3;
}
}
#[derive(Debug)]
pub struct Symbol {
name: Name,
flags: SymbolFlags,
scope_id: ScopeId,
// kind: Kind,
}
impl Symbol {
pub fn name(&self) -> &str {
self.name.as_str()
}
pub fn scope_id(&self) -> ScopeId {
self.scope_id
}
/// Is the symbol used in its containing scope?
pub fn is_used(&self) -> bool {
self.flags.contains(SymbolFlags::IS_USED)
}
/// Is the symbol defined in its containing scope?
pub fn is_defined(&self) -> bool {
self.flags.contains(SymbolFlags::IS_DEFINED)
}
// TODO: implement Symbol.kind 2-pass analysis to categorize as: free-var, cell-var,
// explicit-global, implicit-global and implement Symbol.kind by modifying the preorder
// traversal code
}
#[derive(Debug, Clone)]
pub enum Dependency {
Module(ModuleName),
Relative {
level: NonZeroU32,
module: Option<ModuleName>,
},
}
/// Table of all symbols in all scopes for a module.
#[derive(Debug)]
pub struct SymbolTable {
scopes_by_id: IndexVec<ScopeId, Scope>,
symbols_by_id: IndexVec<SymbolId, Symbol>,
/// the definitions for each symbol
defs: FxHashMap<SymbolId, Vec<Definition>>,
/// map of AST node (e.g. class/function def) to sub-scope it creates
scopes_by_node: FxHashMap<NodeKey, ScopeId>,
/// Maps expressions to their enclosing scope.
expression_scopes: IndexVec<ExpressionId, ScopeId>,
/// dependencies of this module
dependencies: Vec<Dependency>,
}
impl SymbolTable {
pub fn dependencies(&self) -> &[Dependency] {
&self.dependencies
}
pub const fn root_scope_id() -> ScopeId {
ScopeId::from_usize(0)
}
pub fn root_scope(&self) -> &Scope {
&self.scopes_by_id[SymbolTable::root_scope_id()]
}
pub fn symbol_ids_for_scope(&self, scope_id: ScopeId) -> Copied<Keys<SymbolId, ()>> {
self.scopes_by_id[scope_id].symbols_by_name.keys().copied()
}
pub fn symbols_for_scope(
&self,
scope_id: ScopeId,
) -> SymbolIterator<Copied<Keys<SymbolId, ()>>> {
SymbolIterator {
table: self,
ids: self.symbol_ids_for_scope(scope_id),
}
}
pub fn root_symbol_ids(&self) -> Copied<Keys<SymbolId, ()>> {
self.symbol_ids_for_scope(SymbolTable::root_scope_id())
}
pub fn root_symbols(&self) -> SymbolIterator<Copied<Keys<SymbolId, ()>>> {
self.symbols_for_scope(SymbolTable::root_scope_id())
}
pub fn child_scope_ids_of(&self, scope_id: ScopeId) -> &[ScopeId] {
&self.scopes_by_id[scope_id].children
}
pub fn child_scopes_of(&self, scope_id: ScopeId) -> ScopeIterator<&[ScopeId]> {
ScopeIterator {
table: self,
ids: self.child_scope_ids_of(scope_id),
}
}
pub fn root_child_scope_ids(&self) -> &[ScopeId] {
self.child_scope_ids_of(SymbolTable::root_scope_id())
}
pub fn root_child_scopes(&self) -> ScopeIterator<&[ScopeId]> {
self.child_scopes_of(SymbolTable::root_scope_id())
}
pub fn symbol_id_by_name(&self, scope_id: ScopeId, name: &str) -> Option<SymbolId> {
let scope = &self.scopes_by_id[scope_id];
let hash = SymbolTable::hash_name(name);
let name = Name::new(name);
Some(
*scope
.symbols_by_name
.raw_entry()
.from_hash(hash, |symid| self.symbols_by_id[*symid].name == name)?
.0,
)
}
pub fn symbol_by_name(&self, scope_id: ScopeId, name: &str) -> Option<&Symbol> {
Some(&self.symbols_by_id[self.symbol_id_by_name(scope_id, name)?])
}
pub fn root_symbol_id_by_name(&self, name: &str) -> Option<SymbolId> {
self.symbol_id_by_name(SymbolTable::root_scope_id(), name)
}
pub fn root_symbol_by_name(&self, name: &str) -> Option<&Symbol> {
self.symbol_by_name(SymbolTable::root_scope_id(), name)
}
pub fn scope_id_of_symbol(&self, symbol_id: SymbolId) -> ScopeId {
self.symbols_by_id[symbol_id].scope_id
}
pub fn scope_of_symbol(&self, symbol_id: SymbolId) -> &Scope {
&self.scopes_by_id[self.scope_id_of_symbol(symbol_id)]
}
pub fn scope_id_of_expression(&self, expression: ExpressionId) -> ScopeId {
self.expression_scopes[expression]
}
pub fn scope_of_expression(&self, expr_id: ExpressionId) -> &Scope {
&self.scopes_by_id[self.scope_id_of_expression(expr_id)]
}
pub fn parent_scopes(
&self,
scope_id: ScopeId,
) -> ScopeIterator<impl Iterator<Item = ScopeId> + '_> {
ScopeIterator {
table: self,
ids: std::iter::successors(Some(scope_id), |scope| self.scopes_by_id[*scope].parent),
}
}
pub fn parent_scope(&self, scope_id: ScopeId) -> Option<ScopeId> {
self.scopes_by_id[scope_id].parent
}
pub fn scope_id_for_node(&self, node_key: &NodeKey) -> ScopeId {
self.scopes_by_node[node_key]
}
pub fn definitions(&self, symbol_id: SymbolId) -> &[Definition] {
self.defs
.get(&symbol_id)
.map(std::vec::Vec::as_slice)
.unwrap_or_default()
}
pub fn all_definitions(&self) -> impl Iterator<Item = (SymbolId, &Definition)> + '_ {
self.defs
.iter()
.flat_map(|(sym_id, defs)| defs.iter().map(move |def| (*sym_id, def)))
}
fn hash_name(name: &str) -> u64 {
let mut hasher = FxHasher::default();
name.hash(&mut hasher);
hasher.finish()
}
}
pub struct SymbolIterator<'a, I> {
table: &'a SymbolTable,
ids: I,
}
impl<'a, I> Iterator for SymbolIterator<'a, I>
where
I: Iterator<Item = SymbolId>,
{
type Item = &'a Symbol;
fn next(&mut self) -> Option<Self::Item> {
let id = self.ids.next()?;
Some(&self.table.symbols_by_id[id])
}
fn size_hint(&self) -> (usize, Option<usize>) {
self.ids.size_hint()
}
}
impl<'a, I> FusedIterator for SymbolIterator<'a, I> where
I: Iterator<Item = SymbolId> + FusedIterator
{
}
impl<'a, I> DoubleEndedIterator for SymbolIterator<'a, I>
where
I: Iterator<Item = SymbolId> + DoubleEndedIterator,
{
fn next_back(&mut self) -> Option<Self::Item> {
let id = self.ids.next_back()?;
Some(&self.table.symbols_by_id[id])
}
}
// TODO maybe get rid of this and just do all data access via methods on ScopeId?
pub struct ScopeIterator<'a, I> {
table: &'a SymbolTable,
ids: I,
}
/// iterate (`ScopeId`, `Scope`) pairs for given `ScopeId` iterator
impl<'a, I> Iterator for ScopeIterator<'a, I>
where
I: Iterator<Item = ScopeId>,
{
type Item = (ScopeId, &'a Scope);
fn next(&mut self) -> Option<Self::Item> {
let id = self.ids.next()?;
Some((id, &self.table.scopes_by_id[id]))
}
fn size_hint(&self) -> (usize, Option<usize>) {
self.ids.size_hint()
}
}
impl<'a, I> FusedIterator for ScopeIterator<'a, I> where I: Iterator<Item = ScopeId> + FusedIterator {}
impl<'a, I> DoubleEndedIterator for ScopeIterator<'a, I>
where
I: Iterator<Item = ScopeId> + DoubleEndedIterator,
{
fn next_back(&mut self) -> Option<Self::Item> {
let id = self.ids.next_back()?;
Some((id, &self.table.scopes_by_id[id]))
}
}
#[derive(Debug)]
pub(super) struct SymbolTableBuilder {
symbol_table: SymbolTable,
}
impl SymbolTableBuilder {
pub(super) fn new() -> Self {
let mut table = SymbolTable {
scopes_by_id: IndexVec::new(),
symbols_by_id: IndexVec::new(),
defs: FxHashMap::default(),
scopes_by_node: FxHashMap::default(),
expression_scopes: IndexVec::new(),
dependencies: Vec::new(),
};
table.scopes_by_id.push(Scope {
name: Name::new("<module>"),
kind: ScopeKind::Module,
parent: None,
children: Vec::new(),
definition: None,
defining_symbol: None,
symbols_by_name: Map::default(),
});
Self {
symbol_table: table,
}
}
pub(super) fn finish(self) -> SymbolTable {
let mut symbol_table = self.symbol_table;
symbol_table.scopes_by_id.shrink_to_fit();
symbol_table.symbols_by_id.shrink_to_fit();
symbol_table.defs.shrink_to_fit();
symbol_table.scopes_by_node.shrink_to_fit();
symbol_table.expression_scopes.shrink_to_fit();
symbol_table.dependencies.shrink_to_fit();
symbol_table
}
pub(super) fn add_or_update_symbol(
&mut self,
scope_id: ScopeId,
name: &str,
flags: SymbolFlags,
) -> SymbolId {
let hash = SymbolTable::hash_name(name);
let scope = &mut self.symbol_table.scopes_by_id[scope_id];
let name = Name::new(name);
let entry = scope
.symbols_by_name
.raw_entry_mut()
.from_hash(hash, |existing| {
self.symbol_table.symbols_by_id[*existing].name == name
});
match entry {
RawEntryMut::Occupied(entry) => {
if let Some(symbol) = self.symbol_table.symbols_by_id.get_mut(*entry.key()) {
symbol.flags.insert(flags);
};
*entry.key()
}
RawEntryMut::Vacant(entry) => {
let id = self.symbol_table.symbols_by_id.push(Symbol {
name,
flags,
scope_id,
});
entry.insert_with_hasher(hash, id, (), |symid| {
SymbolTable::hash_name(&self.symbol_table.symbols_by_id[*symid].name)
});
id
}
}
}
pub(super) fn add_definition(&mut self, symbol_id: SymbolId, definition: Definition) {
self.symbol_table
.defs
.entry(symbol_id)
.or_default()
.push(definition);
}
pub(super) fn add_child_scope(
&mut self,
parent_scope_id: ScopeId,
name: &str,
kind: ScopeKind,
definition: Option<Definition>,
defining_symbol: Option<SymbolId>,
) -> ScopeId {
let new_scope_id = self.symbol_table.scopes_by_id.push(Scope {
name: Name::new(name),
kind,
parent: Some(parent_scope_id),
children: Vec::new(),
definition,
defining_symbol,
symbols_by_name: Map::default(),
});
let parent_scope = &mut self.symbol_table.scopes_by_id[parent_scope_id];
parent_scope.children.push(new_scope_id);
new_scope_id
}
pub(super) fn record_scope_for_node(&mut self, node_key: NodeKey, scope_id: ScopeId) {
self.symbol_table.scopes_by_node.insert(node_key, scope_id);
}
pub(super) fn add_dependency(&mut self, dependency: Dependency) {
self.symbol_table.dependencies.push(dependency);
}
/// Records the scope for the current expression
pub(super) fn record_expression(&mut self, scope: ScopeId) -> ExpressionId {
self.symbol_table.expression_scopes.push(scope)
}
}
#[cfg(test)]
mod tests {
use super::{ScopeKind, SymbolFlags, SymbolTable, SymbolTableBuilder};
#[test]
fn insert_same_name_symbol_twice() {
let mut builder = SymbolTableBuilder::new();
let root_scope_id = SymbolTable::root_scope_id();
let symbol_id_1 =
builder.add_or_update_symbol(root_scope_id, "foo", SymbolFlags::IS_DEFINED);
let symbol_id_2 = builder.add_or_update_symbol(root_scope_id, "foo", SymbolFlags::IS_USED);
let table = builder.finish();
assert_eq!(symbol_id_1, symbol_id_2);
assert!(symbol_id_1.symbol(&table).is_used(), "flags must merge");
assert!(symbol_id_1.symbol(&table).is_defined(), "flags must merge");
}
#[test]
fn insert_different_named_symbols() {
let mut builder = SymbolTableBuilder::new();
let root_scope_id = SymbolTable::root_scope_id();
let symbol_id_1 = builder.add_or_update_symbol(root_scope_id, "foo", SymbolFlags::empty());
let symbol_id_2 = builder.add_or_update_symbol(root_scope_id, "bar", SymbolFlags::empty());
assert_ne!(symbol_id_1, symbol_id_2);
}
#[test]
fn add_child_scope_with_symbol() {
let mut builder = SymbolTableBuilder::new();
let root_scope_id = SymbolTable::root_scope_id();
let foo_symbol_top =
builder.add_or_update_symbol(root_scope_id, "foo", SymbolFlags::empty());
let c_scope = builder.add_child_scope(root_scope_id, "C", ScopeKind::Class, None, None);
let foo_symbol_inner = builder.add_or_update_symbol(c_scope, "foo", SymbolFlags::empty());
assert_ne!(foo_symbol_top, foo_symbol_inner);
}
#[test]
fn scope_from_id() {
let table = SymbolTableBuilder::new().finish();
let root_scope_id = SymbolTable::root_scope_id();
let scope = root_scope_id.scope(&table);
assert_eq!(scope.name.as_str(), "<module>");
assert_eq!(scope.kind, ScopeKind::Module);
}
#[test]
fn symbol_from_id() {
let mut builder = SymbolTableBuilder::new();
let root_scope_id = SymbolTable::root_scope_id();
let foo_symbol_id =
builder.add_or_update_symbol(root_scope_id, "foo", SymbolFlags::empty());
let table = builder.finish();
let symbol = foo_symbol_id.symbol(&table);
assert_eq!(symbol.name(), "foo");
}
#[test]
fn bigger_symbol_table() {
let mut builder = SymbolTableBuilder::new();
let root_scope_id = SymbolTable::root_scope_id();
let foo_symbol_id =
builder.add_or_update_symbol(root_scope_id, "foo", SymbolFlags::empty());
builder.add_or_update_symbol(root_scope_id, "bar", SymbolFlags::empty());
builder.add_or_update_symbol(root_scope_id, "baz", SymbolFlags::empty());
builder.add_or_update_symbol(root_scope_id, "qux", SymbolFlags::empty());
let table = builder.finish();
let foo_symbol_id_2 = table
.root_symbol_id_by_name("foo")
.expect("foo symbol to be found");
assert_eq!(foo_symbol_id_2, foo_symbol_id);
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -1,762 +0,0 @@
#![allow(dead_code)]
use ruff_python_ast as ast;
use ruff_python_ast::AstNode;
use std::fmt::Debug;
use crate::db::{QueryResult, SemanticDb, SemanticJar};
use crate::module::{resolve_module, ModuleName};
use crate::parse::parse;
use crate::semantic::types::{ModuleTypeId, Type};
use crate::semantic::{
resolve_global_symbol, semantic_index, ConstrainedDefinition, Definition, GlobalSymbolId,
ImportDefinition, ImportFromDefinition,
};
use crate::{FileId, Name};
// FIXME: Figure out proper dead-lock free synchronisation now that this takes `&db` instead of `&mut db`.
/// Resolve the public-facing type for a symbol (the type seen by other scopes: other modules, or
/// nested functions). Because calls to nested functions and imports can occur anywhere in control
/// flow, this type must be conservative and consider all definitions of the symbol that could
/// possibly be seen by another scope. Currently we take the most conservative approach, which is
/// the union of all definitions. We may be able to narrow this in future to eliminate definitions
/// which can't possibly (or at least likely) be seen by any other scope, so that e.g. we could
/// infer `Literal["1"]` instead of `Literal[1] | Literal["1"]` for `x` in `x = x; x = str(x);`.
#[tracing::instrument(level = "trace", skip(db))]
pub fn infer_symbol_public_type(db: &dyn SemanticDb, symbol: GlobalSymbolId) -> QueryResult<Type> {
let index = semantic_index(db, symbol.file_id)?;
let defs = index.symbol_table().definitions(symbol.symbol_id).to_vec();
let jar: &SemanticJar = db.jar()?;
if let Some(ty) = jar.type_store.get_cached_symbol_public_type(symbol) {
return Ok(ty);
}
let ty = infer_type_from_definitions(db, symbol, defs.iter().cloned())?;
jar.type_store.cache_symbol_public_type(symbol, ty);
// TODO record dependencies
Ok(ty)
}
/// Infer type of a symbol as union of the given `Definitions`.
fn infer_type_from_definitions<T>(
db: &dyn SemanticDb,
symbol: GlobalSymbolId,
definitions: T,
) -> QueryResult<Type>
where
T: Debug + IntoIterator<Item = Definition>,
{
infer_type_from_constrained_definitions(
db,
symbol,
definitions
.into_iter()
.map(|definition| ConstrainedDefinition {
definition,
constraints: vec![],
}),
)
}
/// Infer type of a symbol as union of the given `ConstrainedDefinitions`.
fn infer_type_from_constrained_definitions<T>(
db: &dyn SemanticDb,
symbol: GlobalSymbolId,
constrained_definitions: T,
) -> QueryResult<Type>
where
T: IntoIterator<Item = ConstrainedDefinition>,
{
let jar: &SemanticJar = db.jar()?;
let mut tys = constrained_definitions
.into_iter()
.map(|def| infer_constrained_definition_type(db, symbol, def.clone()))
.peekable();
if let Some(first) = tys.next() {
if tys.peek().is_some() {
Ok(jar.type_store.add_union(
symbol.file_id,
&Iterator::chain(std::iter::once(first), tys).collect::<QueryResult<Vec<_>>>()?,
))
} else {
first
}
} else {
Ok(Type::Unknown)
}
}
/// Infer type for a ConstrainedDefinition (intersection of the definition type and the
/// constraints)
#[tracing::instrument(level = "trace", skip(db))]
pub fn infer_constrained_definition_type(
db: &dyn SemanticDb,
symbol: GlobalSymbolId,
constrained_definition: ConstrainedDefinition,
) -> QueryResult<Type> {
let ConstrainedDefinition {
definition,
constraints,
} = constrained_definition;
let index = semantic_index(db, symbol.file_id)?;
let parsed = parse(db.upcast(), symbol.file_id)?;
let mut intersected_types = vec![infer_definition_type(db, symbol, definition)?];
for constraint in constraints {
if let Some(constraint_type) = infer_constraint_type(
db,
symbol,
index.resolve_expression_id(parsed.syntax(), constraint),
)? {
intersected_types.push(constraint_type);
}
}
let jar: &SemanticJar = db.jar()?;
Ok(jar
.type_store
.add_intersection(symbol.file_id, &intersected_types, &[]))
}
/// Infer a type for a Definition
#[tracing::instrument(level = "trace", skip(db))]
pub fn infer_definition_type(
db: &dyn SemanticDb,
symbol: GlobalSymbolId,
definition: Definition,
) -> QueryResult<Type> {
let jar: &SemanticJar = db.jar()?;
let type_store = &jar.type_store;
let file_id = symbol.file_id;
match definition {
Definition::Unbound => Ok(Type::Unbound),
Definition::Import(ImportDefinition {
module: module_name,
}) => {
if let Some(module) = resolve_module(db, module_name.clone())? {
Ok(Type::Module(ModuleTypeId { module, file_id }))
} else {
Ok(Type::Unknown)
}
}
Definition::ImportFrom(ImportFromDefinition {
module,
name,
level,
}) => {
// TODO relative imports
assert!(matches!(level, 0));
let module_name = ModuleName::new(module.as_ref().expect("TODO relative imports"));
let Some(module) = resolve_module(db, module_name.clone())? else {
return Ok(Type::Unknown);
};
if let Some(remote_symbol) = resolve_global_symbol(db, module, &name)? {
infer_symbol_public_type(db, remote_symbol)
} else {
Ok(Type::Unknown)
}
}
Definition::ClassDef(node_key) => {
if let Some(ty) = type_store.get_cached_node_type(file_id, node_key.erased()) {
Ok(ty)
} else {
let parsed = parse(db.upcast(), file_id)?;
let ast = parsed.syntax();
let index = semantic_index(db, file_id)?;
let node = node_key.resolve_unwrap(ast.as_any_node_ref());
let mut bases = Vec::with_capacity(node.bases().len());
for base in node.bases() {
bases.push(infer_expr_type(db, file_id, base)?);
}
let scope_id = index.symbol_table().scope_id_for_node(node_key.erased());
let ty = type_store.add_class(file_id, &node.name.id, scope_id, bases);
type_store.cache_node_type(file_id, *node_key.erased(), ty);
Ok(ty)
}
}
Definition::FunctionDef(node_key) => {
if let Some(ty) = type_store.get_cached_node_type(file_id, node_key.erased()) {
Ok(ty)
} else {
let parsed = parse(db.upcast(), file_id)?;
let ast = parsed.syntax();
let index = semantic_index(db, file_id)?;
let node = node_key
.resolve(ast.as_any_node_ref())
.expect("node key should resolve");
let decorator_tys = node
.decorator_list
.iter()
.map(|decorator| infer_expr_type(db, file_id, &decorator.expression))
.collect::<QueryResult<_>>()?;
let scope_id = index.symbol_table().scope_id_for_node(node_key.erased());
let ty = type_store.add_function(
file_id,
&node.name.id,
symbol.symbol_id,
scope_id,
decorator_tys,
);
type_store.cache_node_type(file_id, *node_key.erased(), ty);
Ok(ty)
}
}
Definition::Assignment(node_key) => {
let parsed = parse(db.upcast(), file_id)?;
let ast = parsed.syntax();
let node = node_key.resolve_unwrap(ast.as_any_node_ref());
// TODO handle unpacking assignment
infer_expr_type(db, file_id, &node.value)
}
Definition::AnnotatedAssignment(node_key) => {
let parsed = parse(db.upcast(), file_id)?;
let ast = parsed.syntax();
let node = node_key.resolve_unwrap(ast.as_any_node_ref());
// TODO actually look at the annotation
let Some(value) = &node.value else {
return Ok(Type::Unknown);
};
// TODO handle unpacking assignment
infer_expr_type(db, file_id, value)
}
Definition::NamedExpr(node_key) => {
let parsed = parse(db.upcast(), file_id)?;
let ast = parsed.syntax();
let node = node_key.resolve_unwrap(ast.as_any_node_ref());
infer_expr_type(db, file_id, &node.value)
}
}
}
/// Return the type that the given constraint (an expression from a control-flow test) requires the
/// given symbol to have. For example, returns the Type "~None" as the constraint type if given the
/// symbol ID for x and the expression ID for `x is not None`. Returns (Rust) None if the given
/// expression applies no constraints on the given symbol.
#[tracing::instrument(level = "trace", skip(db))]
fn infer_constraint_type(
db: &dyn SemanticDb,
symbol_id: GlobalSymbolId,
// TODO this should preferably take an &ast::Expr instead of AnyNodeRef
expression: ast::AnyNodeRef,
) -> QueryResult<Option<Type>> {
let file_id = symbol_id.file_id;
let index = semantic_index(db, file_id)?;
let jar: &SemanticJar = db.jar()?;
let symbol_name = symbol_id.symbol_id.symbol(&index.symbol_table).name();
// TODO narrowing attributes
// TODO narrowing dict keys
// TODO isinstance, ==/!=, type(...), literals, bools...
match expression {
ast::AnyNodeRef::ExprCompare(ast::ExprCompare {
left,
ops,
comparators,
..
}) => {
// TODO chained comparisons
match left.as_ref() {
ast::Expr::Name(ast::ExprName { id, .. }) if id == symbol_name => match ops[0] {
ast::CmpOp::Is | ast::CmpOp::IsNot => {
Ok(match infer_expr_type(db, file_id, &comparators[0])? {
Type::None => Some(Type::None),
_ => None,
}
.map(|ty| {
if matches!(ops[0], ast::CmpOp::IsNot) {
jar.type_store.add_intersection(file_id, &[], &[ty])
} else {
ty
}
}))
}
_ => Ok(None),
},
_ => Ok(None),
}
}
_ => Ok(None),
}
}
/// Infer type of the given expression.
fn infer_expr_type(db: &dyn SemanticDb, file_id: FileId, expr: &ast::Expr) -> QueryResult<Type> {
// TODO cache the resolution of the type on the node
let index = semantic_index(db, file_id)?;
match expr {
ast::Expr::NoneLiteral(_) => Ok(Type::None),
ast::Expr::NumberLiteral(ast::ExprNumberLiteral { value, .. }) => {
match value {
ast::Number::Int(n) => {
// TODO support big int literals
Ok(n.as_i64().map(Type::IntLiteral).unwrap_or(Type::Unknown))
}
// TODO builtins.float or builtins.complex
_ => Ok(Type::Unknown),
}
}
ast::Expr::Name(name) => {
// TODO look up in the correct scope, don't assume global
if let Some(symbol_id) = index.symbol_table().root_symbol_id_by_name(&name.id) {
infer_type_from_constrained_definitions(
db,
GlobalSymbolId { file_id, symbol_id },
index.reachable_definitions(symbol_id, expr),
)
} else {
Ok(Type::Unknown)
}
}
ast::Expr::Attribute(ast::ExprAttribute { value, attr, .. }) => {
let value_type = infer_expr_type(db, file_id, value)?;
let attr_name = &Name::new(&attr.id);
value_type
.get_member(db, attr_name)
.map(|ty| ty.unwrap_or(Type::Unknown))
}
ast::Expr::BinOp(ast::ExprBinOp {
left, op, right, ..
}) => {
let left_ty = infer_expr_type(db, file_id, left)?;
let right_ty = infer_expr_type(db, file_id, right)?;
// TODO add reverse bin op support if right <: left
left_ty.resolve_bin_op(db, *op, right_ty)
}
ast::Expr::Named(ast::ExprNamed { value, .. }) => infer_expr_type(db, file_id, value),
ast::Expr::If(ast::ExprIf { body, orelse, .. }) => {
// TODO detect statically known truthy or falsy test
let body_ty = infer_expr_type(db, file_id, body)?;
let else_ty = infer_expr_type(db, file_id, orelse)?;
let jar: &SemanticJar = db.jar()?;
Ok(jar.type_store.add_union(file_id, &[body_ty, else_ty]))
}
_ => todo!("expression type resolution for {:?}", expr),
}
}
#[cfg(test)]
mod tests {
use std::path::PathBuf;
use crate::db::tests::TestDb;
use crate::db::{HasJar, SemanticJar};
use crate::module::{
resolve_module, set_module_search_paths, ModuleName, ModuleResolutionInputs,
};
use crate::semantic::{infer_symbol_public_type, resolve_global_symbol, Type};
use crate::Name;
// TODO with virtual filesystem we shouldn't have to write files to disk for these
// tests
struct TestCase {
temp_dir: tempfile::TempDir,
db: TestDb,
src: PathBuf,
}
fn create_test() -> std::io::Result<TestCase> {
let temp_dir = tempfile::tempdir()?;
let src = temp_dir.path().join("src");
std::fs::create_dir(&src)?;
let src = src.canonicalize()?;
let search_paths = ModuleResolutionInputs {
extra_paths: vec![],
workspace_root: src.clone(),
site_packages: None,
custom_typeshed: None,
};
let mut db = TestDb::default();
set_module_search_paths(&mut db, search_paths);
Ok(TestCase { temp_dir, db, src })
}
fn write_to_path(case: &TestCase, relative_path: &str, contents: &str) -> anyhow::Result<()> {
let path = case.src.join(relative_path);
std::fs::write(path, contents)?;
Ok(())
}
fn get_public_type(
case: &TestCase,
module_name: &str,
variable_name: &str,
) -> anyhow::Result<Type> {
let db = &case.db;
let module = resolve_module(db, ModuleName::new(module_name))?.expect("Module to exist");
let symbol = resolve_global_symbol(db, module, variable_name)?.expect("symbol to exist");
Ok(infer_symbol_public_type(db, symbol)?)
}
fn assert_public_type(
case: &TestCase,
module_name: &str,
variable_name: &str,
type_name: &str,
) -> anyhow::Result<()> {
let ty = get_public_type(case, module_name, variable_name)?;
let jar = HasJar::<SemanticJar>::jar(&case.db)?;
assert_eq!(format!("{}", ty.display(&jar.type_store)), type_name);
Ok(())
}
#[test]
fn follow_import_to_class() -> anyhow::Result<()> {
let case = create_test()?;
write_to_path(&case, "a.py", "from b import C as D; E = D")?;
write_to_path(&case, "b.py", "class C: pass")?;
assert_public_type(&case, "a", "E", "Literal[C]")
}
#[test]
fn resolve_base_class_by_name() -> anyhow::Result<()> {
let case = create_test()?;
write_to_path(
&case,
"mod.py",
"
class Base: pass
class Sub(Base): pass
",
)?;
let ty = get_public_type(&case, "mod", "Sub")?;
let Type::Class(class_id) = ty else {
panic!("Sub is not a Class")
};
let jar = HasJar::<SemanticJar>::jar(&case.db)?;
let base_names: Vec<_> = jar
.type_store
.get_class(class_id)
.bases()
.iter()
.map(|base_ty| format!("{}", base_ty.display(&jar.type_store)))
.collect();
assert_eq!(base_names, vec!["Literal[Base]"]);
Ok(())
}
#[test]
fn resolve_method() -> anyhow::Result<()> {
let case = create_test()?;
write_to_path(
&case,
"mod.py",
"
class C:
def f(self): pass
",
)?;
let ty = get_public_type(&case, "mod", "C")?;
let Type::Class(class_id) = ty else {
panic!("C is not a Class");
};
let member_ty = class_id
.get_own_class_member(&case.db, &Name::new("f"))
.expect("C.f to resolve");
let Some(Type::Function(func_id)) = member_ty else {
panic!("C.f is not a Function");
};
let jar = HasJar::<SemanticJar>::jar(&case.db)?;
let function = jar.type_store.get_function(func_id);
assert_eq!(function.name(), "f");
Ok(())
}
#[test]
fn resolve_module_member() -> anyhow::Result<()> {
let case = create_test()?;
write_to_path(&case, "a.py", "import b; D = b.C")?;
write_to_path(&case, "b.py", "class C: pass")?;
assert_public_type(&case, "a", "D", "Literal[C]")
}
#[test]
fn resolve_literal() -> anyhow::Result<()> {
let case = create_test()?;
write_to_path(&case, "a.py", "x = 1")?;
assert_public_type(&case, "a", "x", "Literal[1]")
}
#[test]
fn resolve_union() -> anyhow::Result<()> {
let case = create_test()?;
write_to_path(
&case,
"a.py",
"
if flag:
x = 1
else:
x = 2
",
)?;
assert_public_type(&case, "a", "x", "Literal[1, 2]")
}
#[test]
fn resolve_visible_def() -> anyhow::Result<()> {
let case = create_test()?;
write_to_path(&case, "a.py", "y = 1; y = 2; x = y")?;
assert_public_type(&case, "a", "x", "Literal[2]")
}
#[test]
fn join_paths() -> anyhow::Result<()> {
let case = create_test()?;
write_to_path(
&case,
"a.py",
"
y = 1
y = 2
if flag:
y = 3
x = y
",
)?;
assert_public_type(&case, "a", "x", "Literal[2, 3]")
}
#[test]
fn maybe_unbound() -> anyhow::Result<()> {
let case = create_test()?;
write_to_path(
&case,
"a.py",
"
if flag:
y = 1
x = y
",
)?;
assert_public_type(&case, "a", "x", "Literal[1] | Unbound")
}
#[test]
fn if_elif_else() -> anyhow::Result<()> {
let case = create_test()?;
write_to_path(
&case,
"a.py",
"
y = 1
y = 2
if flag:
y = 3
elif flag2:
y = 4
else:
r = y
y = 5
s = y
x = y
",
)?;
assert_public_type(&case, "a", "x", "Literal[3, 4, 5]")?;
assert_public_type(&case, "a", "r", "Literal[2]")?;
assert_public_type(&case, "a", "s", "Literal[5]")
}
#[test]
fn if_elif() -> anyhow::Result<()> {
let case = create_test()?;
write_to_path(
&case,
"a.py",
"
y = 1
y = 2
if flag:
y = 3
elif flag2:
y = 4
x = y
",
)?;
assert_public_type(&case, "a", "x", "Literal[2, 3, 4]")
}
#[test]
fn literal_int_arithmetic() -> anyhow::Result<()> {
let case = create_test()?;
write_to_path(
&case,
"a.py",
"
a = 2 + 1
b = a - 4
c = a * b
d = c / 3
e = 5 % 3
",
)?;
assert_public_type(&case, "a", "a", "Literal[3]")?;
assert_public_type(&case, "a", "b", "Literal[-1]")?;
assert_public_type(&case, "a", "c", "Literal[-3]")?;
assert_public_type(&case, "a", "d", "Literal[-1]")?;
assert_public_type(&case, "a", "e", "Literal[2]")
}
#[test]
fn walrus() -> anyhow::Result<()> {
let case = create_test()?;
write_to_path(
&case,
"a.py",
"
x = (y := 1) + 1
",
)?;
assert_public_type(&case, "a", "x", "Literal[2]")?;
assert_public_type(&case, "a", "y", "Literal[1]")
}
#[test]
fn ifexpr() -> anyhow::Result<()> {
let case = create_test()?;
write_to_path(
&case,
"a.py",
"
x = 1 if flag else 2
",
)?;
assert_public_type(&case, "a", "x", "Literal[1, 2]")
}
#[test]
fn ifexpr_walrus() -> anyhow::Result<()> {
let case = create_test()?;
write_to_path(
&case,
"a.py",
"
y = z = 0
x = (y := 1) if flag else (z := 2)
a = y
b = z
",
)?;
assert_public_type(&case, "a", "x", "Literal[1, 2]")?;
assert_public_type(&case, "a", "a", "Literal[0, 1]")?;
assert_public_type(&case, "a", "b", "Literal[0, 2]")
}
#[test]
fn ifexpr_walrus_2() -> anyhow::Result<()> {
let case = create_test()?;
write_to_path(
&case,
"a.py",
"
y = 0
(y := 1) if flag else (y := 2)
a = y
",
)?;
assert_public_type(&case, "a", "a", "Literal[1, 2]")
}
#[test]
fn ifexpr_nested() -> anyhow::Result<()> {
let case = create_test()?;
write_to_path(
&case,
"a.py",
"
x = 1 if flag else 2 if flag2 else 3
",
)?;
assert_public_type(&case, "a", "x", "Literal[1, 2, 3]")
}
#[test]
fn none() -> anyhow::Result<()> {
let case = create_test()?;
write_to_path(
&case,
"a.py",
"
x = 1 if flag else None
",
)?;
assert_public_type(&case, "a", "x", "Literal[1] | None")
}
#[test]
fn narrow_none() -> anyhow::Result<()> {
let case = create_test()?;
write_to_path(
&case,
"a.py",
"
x = 1 if flag else None
y = 0
if x is not None:
y = x
z = y
",
)?;
// TODO normalization of unions and intersections: this type is technically correct but
// begging for normalization
assert_public_type(&case, "a", "z", "Literal[0] | Literal[1] | None & ~None")
}
}

View File

@@ -1,105 +0,0 @@
use std::ops::{Deref, DerefMut};
use std::sync::Arc;
use ruff_notebook::Notebook;
use ruff_python_ast::PySourceType;
use crate::cache::KeyValueCache;
use crate::db::{QueryResult, SourceDb};
use crate::files::FileId;
#[tracing::instrument(level = "debug", skip(db))]
pub(crate) fn source_text(db: &dyn SourceDb, file_id: FileId) -> QueryResult<Source> {
let jar = db.jar()?;
let sources = &jar.sources;
sources.get(&file_id, |file_id| {
let path = db.file_path(*file_id);
let source_text = std::fs::read_to_string(&path).unwrap_or_else(|err| {
tracing::error!("Failed to read file '{path:?}: {err}'. Falling back to empty text");
String::new()
});
let python_ty = PySourceType::from(&path);
let kind = match python_ty {
PySourceType::Python => {
SourceKind::Python(Arc::from(source_text))
}
PySourceType::Stub => SourceKind::Stub(Arc::from(source_text)),
PySourceType::Ipynb => {
let notebook = Notebook::from_source_code(&source_text).unwrap_or_else(|err| {
// TODO should this be changed to never fail?
// or should we instead add a diagnostic somewhere? But what would we return in this case?
tracing::error!(
"Failed to parse notebook '{path:?}: {err}'. Falling back to an empty notebook"
);
Notebook::from_source_code("").unwrap()
});
SourceKind::IpyNotebook(Arc::new(notebook))
}
};
Ok(Source { kind })
})
}
#[derive(Debug, Clone, PartialEq)]
pub enum SourceKind {
Python(Arc<str>),
Stub(Arc<str>),
IpyNotebook(Arc<Notebook>),
}
impl<'a> From<&'a SourceKind> for PySourceType {
fn from(value: &'a SourceKind) -> Self {
match value {
SourceKind::Python(_) => PySourceType::Python,
SourceKind::Stub(_) => PySourceType::Stub,
SourceKind::IpyNotebook(_) => PySourceType::Ipynb,
}
}
}
#[derive(Debug, Clone, PartialEq)]
pub struct Source {
kind: SourceKind,
}
impl Source {
pub fn python<T: Into<Arc<str>>>(source: T) -> Self {
Self {
kind: SourceKind::Python(source.into()),
}
}
pub fn kind(&self) -> &SourceKind {
&self.kind
}
pub fn text(&self) -> &str {
match &self.kind {
SourceKind::Python(text) => text,
SourceKind::Stub(text) => text,
SourceKind::IpyNotebook(notebook) => notebook.source_code(),
}
}
}
#[derive(Debug, Default)]
pub struct SourceStorage(pub(crate) KeyValueCache<FileId, Source>);
impl Deref for SourceStorage {
type Target = KeyValueCache<FileId, Source>;
fn deref(&self) -> &Self::Target {
&self.0
}
}
impl DerefMut for SourceStorage {
fn deref_mut(&mut self) -> &mut Self::Target {
&mut self.0
}
}

View File

@@ -0,0 +1,48 @@
/// Enumeration of all supported Python versions
///
/// TODO: unify with the `PythonVersion` enum in the linter/formatter crates?
#[derive(Copy, Clone, Hash, Debug, PartialEq, Eq, PartialOrd, Ord, Default, clap::ValueEnum)]
pub enum TargetVersion {
Py37,
#[default]
Py38,
Py39,
Py310,
Py311,
Py312,
Py313,
}
impl TargetVersion {
const fn as_str(self) -> &'static str {
match self {
Self::Py37 => "py37",
Self::Py38 => "py38",
Self::Py39 => "py39",
Self::Py310 => "py310",
Self::Py311 => "py311",
Self::Py312 => "py312",
Self::Py313 => "py313",
}
}
}
impl std::fmt::Display for TargetVersion {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.write_str(self.as_str())
}
}
impl From<TargetVersion> for red_knot_python_semantic::PythonVersion {
fn from(value: TargetVersion) -> Self {
match value {
TargetVersion::Py37 => Self::PY37,
TargetVersion::Py38 => Self::PY38,
TargetVersion::Py39 => Self::PY39,
TargetVersion::Py310 => Self::PY310,
TargetVersion::Py311 => Self::PY311,
TargetVersion::Py312 => Self::PY312,
TargetVersion::Py313 => Self::PY313,
}
}
}

View File

@@ -1,526 +0,0 @@
use std::collections::BTreeMap;
use std::fmt;
use std::num::{NonZeroU16, NonZeroUsize};
use std::ops::{RangeFrom, RangeInclusive};
use std::str::FromStr;
use rustc_hash::FxHashMap;
use smol_str::SmolStr;
use ruff_python_stdlib::identifiers::is_identifier;
#[derive(Debug, PartialEq, Eq)]
pub struct TypeshedVersionsParseError {
line_number: NonZeroU16,
reason: TypeshedVersionsParseErrorKind,
}
impl fmt::Display for TypeshedVersionsParseError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let TypeshedVersionsParseError {
line_number,
reason,
} = self;
write!(
f,
"Error while parsing line {line_number} of typeshed's VERSIONS file: {reason}"
)
}
}
impl std::error::Error for TypeshedVersionsParseError {
fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
if let TypeshedVersionsParseErrorKind::IntegerParsingFailure { err, .. } = &self.reason {
Some(err)
} else {
None
}
}
}
#[derive(Debug, PartialEq, Eq)]
pub enum TypeshedVersionsParseErrorKind {
TooManyLines(NonZeroUsize),
UnexpectedNumberOfColons,
InvalidModuleName(String),
UnexpectedNumberOfHyphens,
UnexpectedNumberOfPeriods(String),
IntegerParsingFailure {
version: String,
err: std::num::ParseIntError,
},
}
impl fmt::Display for TypeshedVersionsParseErrorKind {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::TooManyLines(num_lines) => write!(
f,
"File has too many lines ({num_lines}); maximum allowed is {}",
NonZeroU16::MAX
),
Self::UnexpectedNumberOfColons => {
f.write_str("Expected every non-comment line to have exactly one colon")
}
Self::InvalidModuleName(name) => write!(
f,
"Expected all components of '{name}' to be valid Python identifiers"
),
Self::UnexpectedNumberOfHyphens => {
f.write_str("Expected every non-comment line to have exactly one '-' character")
}
Self::UnexpectedNumberOfPeriods(format) => write!(
f,
"Expected all versions to be in the form {{MAJOR}}.{{MINOR}}; got '{format}'"
),
Self::IntegerParsingFailure { version, err } => write!(
f,
"Failed to convert '{version}' to a pair of integers due to {err}",
),
}
}
}
#[derive(Debug, PartialEq, Eq)]
pub struct TypeshedVersions(FxHashMap<SmolStr, PyVersionRange>);
impl TypeshedVersions {
pub fn len(&self) -> usize {
self.0.len()
}
pub fn is_empty(&self) -> bool {
self.0.is_empty()
}
pub fn contains_module(&self, module_name: impl Into<SmolStr>) -> bool {
self.0.contains_key(&module_name.into())
}
pub fn module_exists_on_version(
&self,
module: impl Into<SmolStr>,
version: impl Into<PyVersion>,
) -> bool {
let version = version.into();
let mut module: Option<SmolStr> = Some(module.into());
while let Some(module_to_try) = module {
if let Some(range) = self.0.get(&module_to_try) {
return range.contains(version);
}
module = module_to_try
.rsplit_once('.')
.map(|(parent, _)| SmolStr::new(parent));
}
false
}
}
impl FromStr for TypeshedVersions {
type Err = TypeshedVersionsParseError;
fn from_str(s: &str) -> Result<Self, Self::Err> {
let mut map = FxHashMap::default();
for (line_index, line) in s.lines().enumerate() {
// humans expect line numbers to be 1-indexed
let line_number = NonZeroUsize::new(line_index.saturating_add(1)).unwrap();
let Ok(line_number) = NonZeroU16::try_from(line_number) else {
return Err(TypeshedVersionsParseError {
line_number: NonZeroU16::MAX,
reason: TypeshedVersionsParseErrorKind::TooManyLines(line_number),
});
};
let Some(content) = line.split('#').map(str::trim).next() else {
continue;
};
if content.is_empty() {
continue;
}
let mut parts = content.split(':').map(str::trim);
let (Some(module_name), Some(rest), None) = (parts.next(), parts.next(), parts.next())
else {
return Err(TypeshedVersionsParseError {
line_number,
reason: TypeshedVersionsParseErrorKind::UnexpectedNumberOfColons,
});
};
let module_name = SmolStr::new(module_name);
if !module_name.split('.').all(is_identifier) {
return Err(TypeshedVersionsParseError {
line_number,
reason: TypeshedVersionsParseErrorKind::InvalidModuleName(
module_name.to_string(),
),
});
}
match PyVersionRange::from_str(rest) {
Ok(version) => map.insert(module_name, version),
Err(reason) => {
return Err(TypeshedVersionsParseError {
line_number,
reason,
})
}
};
}
Ok(Self(map))
}
}
impl fmt::Display for TypeshedVersions {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let sorted_items: BTreeMap<&SmolStr, &PyVersionRange> = self.0.iter().collect();
for (module_name, range) in sorted_items {
writeln!(f, "{module_name}: {range}")?;
}
Ok(())
}
}
#[derive(Debug, Clone, Eq, PartialEq)]
enum PyVersionRange {
AvailableFrom(RangeFrom<PyVersion>),
AvailableWithin(RangeInclusive<PyVersion>),
}
impl PyVersionRange {
fn contains(&self, version: PyVersion) -> bool {
match self {
Self::AvailableFrom(inner) => inner.contains(&version),
Self::AvailableWithin(inner) => inner.contains(&version),
}
}
}
impl FromStr for PyVersionRange {
type Err = TypeshedVersionsParseErrorKind;
fn from_str(s: &str) -> Result<Self, Self::Err> {
let mut parts = s.split('-').map(str::trim);
match (parts.next(), parts.next(), parts.next()) {
(Some(lower), Some(""), None) => Ok(Self::AvailableFrom((lower.parse()?)..)),
(Some(lower), Some(upper), None) => {
Ok(Self::AvailableWithin((lower.parse()?)..=(upper.parse()?)))
}
_ => Err(TypeshedVersionsParseErrorKind::UnexpectedNumberOfHyphens),
}
}
}
impl fmt::Display for PyVersionRange {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::AvailableFrom(range_from) => write!(f, "{}-", range_from.start),
Self::AvailableWithin(range_inclusive) => {
write!(f, "{}-{}", range_inclusive.start(), range_inclusive.end())
}
}
}
}
#[derive(Debug, Clone, Copy, Eq, PartialEq, Ord, PartialOrd, Hash)]
pub struct PyVersion {
major: u8,
minor: u8,
}
impl FromStr for PyVersion {
type Err = TypeshedVersionsParseErrorKind;
fn from_str(s: &str) -> Result<Self, Self::Err> {
let mut parts = s.split('.').map(str::trim);
let (Some(major), Some(minor), None) = (parts.next(), parts.next(), parts.next()) else {
return Err(TypeshedVersionsParseErrorKind::UnexpectedNumberOfPeriods(
s.to_string(),
));
};
let major = match u8::from_str(major) {
Ok(major) => major,
Err(err) => {
return Err(TypeshedVersionsParseErrorKind::IntegerParsingFailure {
version: s.to_string(),
err,
})
}
};
let minor = match u8::from_str(minor) {
Ok(minor) => minor,
Err(err) => {
return Err(TypeshedVersionsParseErrorKind::IntegerParsingFailure {
version: s.to_string(),
err,
})
}
};
Ok(Self { major, minor })
}
}
impl fmt::Display for PyVersion {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let PyVersion { major, minor } = self;
write!(f, "{major}.{minor}")
}
}
// TODO: unify with the PythonVersion enum in the linter/formatter crates?
#[derive(Copy, Clone, Hash, Debug, PartialEq, Eq, PartialOrd, Ord, Default)]
pub enum SupportedPyVersion {
Py37,
#[default]
Py38,
Py39,
Py310,
Py311,
Py312,
Py313,
}
impl From<SupportedPyVersion> for PyVersion {
fn from(value: SupportedPyVersion) -> Self {
match value {
SupportedPyVersion::Py37 => PyVersion { major: 3, minor: 7 },
SupportedPyVersion::Py38 => PyVersion { major: 3, minor: 8 },
SupportedPyVersion::Py39 => PyVersion { major: 3, minor: 9 },
SupportedPyVersion::Py310 => PyVersion {
major: 3,
minor: 10,
},
SupportedPyVersion::Py311 => PyVersion {
major: 3,
minor: 11,
},
SupportedPyVersion::Py312 => PyVersion {
major: 3,
minor: 12,
},
SupportedPyVersion::Py313 => PyVersion {
major: 3,
minor: 13,
},
}
}
}
#[cfg(test)]
mod tests {
use std::num::{IntErrorKind, NonZeroU16};
use super::*;
use insta::assert_snapshot;
#[allow(unsafe_code)]
const ONE: NonZeroU16 = unsafe { NonZeroU16::new_unchecked(1) };
#[test]
fn can_parse_vendored_versions_file() {
let versions_data = include_str!(concat!(
env!("CARGO_MANIFEST_DIR"),
"/vendor/typeshed/stdlib/VERSIONS"
));
let versions = TypeshedVersions::from_str(versions_data).unwrap();
assert!(versions.len() > 100);
assert!(versions.len() < 1000);
assert!(versions.contains_module("asyncio"));
assert!(versions.module_exists_on_version("asyncio", SupportedPyVersion::Py310));
assert!(versions.contains_module("asyncio.staggered"));
assert!(versions.module_exists_on_version("asyncio.staggered", SupportedPyVersion::Py38));
assert!(!versions.module_exists_on_version("asyncio.staggered", SupportedPyVersion::Py37));
assert!(versions.contains_module("audioop"));
assert!(versions.module_exists_on_version("audioop", SupportedPyVersion::Py312));
assert!(!versions.module_exists_on_version("audioop", SupportedPyVersion::Py313));
}
#[test]
fn can_parse_mock_versions_file() {
const VERSIONS: &str = "\
# a comment
# some more comment
# yet more comment
# and some more comment
bar: 2.7-3.10
# more comment
bar.baz: 3.1-3.9
foo: 3.8- # trailing comment
";
let parsed_versions = TypeshedVersions::from_str(VERSIONS).unwrap();
assert_eq!(parsed_versions.len(), 3);
assert_snapshot!(parsed_versions.to_string(), @r###"
bar: 2.7-3.10
bar.baz: 3.1-3.9
foo: 3.8-
"###
);
assert!(parsed_versions.contains_module("foo"));
assert!(!parsed_versions.module_exists_on_version("foo", SupportedPyVersion::Py37));
assert!(parsed_versions.module_exists_on_version("foo", SupportedPyVersion::Py38));
assert!(parsed_versions.module_exists_on_version("foo", SupportedPyVersion::Py311));
assert!(parsed_versions.contains_module("bar"));
assert!(parsed_versions.module_exists_on_version("bar", SupportedPyVersion::Py37));
assert!(parsed_versions.module_exists_on_version("bar", SupportedPyVersion::Py310));
assert!(!parsed_versions.module_exists_on_version("bar", SupportedPyVersion::Py311));
assert!(parsed_versions.contains_module("bar.baz"));
assert!(parsed_versions.module_exists_on_version("bar.baz", SupportedPyVersion::Py37));
assert!(parsed_versions.module_exists_on_version("bar.baz", SupportedPyVersion::Py39));
assert!(!parsed_versions.module_exists_on_version("bar.baz", SupportedPyVersion::Py310));
assert!(!parsed_versions.contains_module("spam"));
assert!(!parsed_versions.module_exists_on_version("spam", SupportedPyVersion::Py37));
assert!(!parsed_versions.module_exists_on_version("spam", SupportedPyVersion::Py313));
}
#[test]
fn invalid_huge_versions_file() {
let offset = 100;
let too_many = u16::MAX as usize + offset;
let mut massive_versions_file = String::new();
for i in 0..too_many {
massive_versions_file.push_str(&format!("x{i}: 3.8-\n"));
}
assert_eq!(
TypeshedVersions::from_str(&massive_versions_file),
Err(TypeshedVersionsParseError {
line_number: NonZeroU16::MAX,
reason: TypeshedVersionsParseErrorKind::TooManyLines(
NonZeroUsize::new(too_many + 1 - offset).unwrap()
)
})
);
}
#[test]
fn invalid_typeshed_versions_bad_colon_number() {
assert_eq!(
TypeshedVersions::from_str("foo 3.7"),
Err(TypeshedVersionsParseError {
line_number: ONE,
reason: TypeshedVersionsParseErrorKind::UnexpectedNumberOfColons
})
);
assert_eq!(
TypeshedVersions::from_str("foo:: 3.7"),
Err(TypeshedVersionsParseError {
line_number: ONE,
reason: TypeshedVersionsParseErrorKind::UnexpectedNumberOfColons
})
);
}
#[test]
fn invalid_typeshed_versions_non_identifier_modules() {
assert_eq!(
TypeshedVersions::from_str("not!an!identifier!: 3.7"),
Err(TypeshedVersionsParseError {
line_number: ONE,
reason: TypeshedVersionsParseErrorKind::InvalidModuleName(
"not!an!identifier!".to_string()
)
})
);
assert_eq!(
TypeshedVersions::from_str("(also_not).(an_identifier): 3.7"),
Err(TypeshedVersionsParseError {
line_number: ONE,
reason: TypeshedVersionsParseErrorKind::InvalidModuleName(
"(also_not).(an_identifier)".to_string()
)
})
);
}
#[test]
fn invalid_typeshed_versions_bad_hyphen_number() {
assert_eq!(
TypeshedVersions::from_str("foo: 3.8"),
Err(TypeshedVersionsParseError {
line_number: ONE,
reason: TypeshedVersionsParseErrorKind::UnexpectedNumberOfHyphens
})
);
assert_eq!(
TypeshedVersions::from_str("foo: 3.8--"),
Err(TypeshedVersionsParseError {
line_number: ONE,
reason: TypeshedVersionsParseErrorKind::UnexpectedNumberOfHyphens
})
);
assert_eq!(
TypeshedVersions::from_str("foo: 3.8--3.9"),
Err(TypeshedVersionsParseError {
line_number: ONE,
reason: TypeshedVersionsParseErrorKind::UnexpectedNumberOfHyphens
})
);
}
#[test]
fn invalid_typeshed_versions_bad_period_number() {
assert_eq!(
TypeshedVersions::from_str("foo: 38-"),
Err(TypeshedVersionsParseError {
line_number: ONE,
reason: TypeshedVersionsParseErrorKind::UnexpectedNumberOfPeriods("38".to_string())
})
);
assert_eq!(
TypeshedVersions::from_str("foo: 3..8-"),
Err(TypeshedVersionsParseError {
line_number: ONE,
reason: TypeshedVersionsParseErrorKind::UnexpectedNumberOfPeriods(
"3..8".to_string()
)
})
);
assert_eq!(
TypeshedVersions::from_str("foo: 3.8-3..11"),
Err(TypeshedVersionsParseError {
line_number: ONE,
reason: TypeshedVersionsParseErrorKind::UnexpectedNumberOfPeriods(
"3..11".to_string()
)
})
);
}
#[test]
fn invalid_typeshed_versions_non_digits() {
let err = TypeshedVersions::from_str("foo: 1.two-").unwrap_err();
assert_eq!(err.line_number, ONE);
let TypeshedVersionsParseErrorKind::IntegerParsingFailure { version, err } = err.reason
else {
panic!()
};
assert_eq!(version, "1.two".to_string());
assert_eq!(*err.kind(), IntErrorKind::InvalidDigit);
let err = TypeshedVersions::from_str("foo: 3.8-four.9").unwrap_err();
assert_eq!(err.line_number, ONE);
let TypeshedVersionsParseErrorKind::IntegerParsingFailure { version, err } = err.reason
else {
panic!()
};
assert_eq!(version, "four.9".to_string());
assert_eq!(*err.kind(), IntErrorKind::InvalidDigit);
}
}

View File

@@ -0,0 +1 @@

View File

@@ -1,77 +0,0 @@
use std::path::Path;
use anyhow::Context;
use notify::event::{CreateKind, RemoveKind};
use notify::{recommended_watcher, Event, EventKind, RecommendedWatcher, RecursiveMode, Watcher};
use crate::program::{FileChangeKind, FileWatcherChange};
pub struct FileWatcher {
watcher: RecommendedWatcher,
}
pub trait EventHandler: Send + 'static {
fn handle(&self, changes: Vec<FileWatcherChange>);
}
impl<F> EventHandler for F
where
F: Fn(Vec<FileWatcherChange>) + Send + 'static,
{
fn handle(&self, changes: Vec<FileWatcherChange>) {
let f = self;
f(changes);
}
}
impl FileWatcher {
pub fn new<E>(handler: E) -> anyhow::Result<Self>
where
E: EventHandler,
{
Self::from_handler(Box::new(handler))
}
fn from_handler(handler: Box<dyn EventHandler>) -> anyhow::Result<Self> {
let watcher = recommended_watcher(move |changes: notify::Result<Event>| {
match changes {
Ok(event) => {
// TODO verify that this handles all events correctly
let change_kind = match event.kind {
EventKind::Create(CreateKind::File) => FileChangeKind::Created,
EventKind::Modify(_) => FileChangeKind::Modified,
EventKind::Remove(RemoveKind::File) => FileChangeKind::Deleted,
_ => {
return;
}
};
let mut changes = Vec::new();
for path in event.paths {
if path.is_file() {
changes.push(FileWatcherChange::new(path, change_kind));
}
}
if !changes.is_empty() {
handler.handle(changes);
}
}
// TODO proper error handling
Err(err) => {
panic!("Error: {err}");
}
}
})
.context("Failed to create file watcher.")?;
Ok(Self { watcher })
}
pub fn watch_folder(&mut self, path: &Path) -> anyhow::Result<()> {
self.watcher.watch(path, RecursiveMode::Recursive)?;
Ok(())
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -1 +0,0 @@
114409d49b43ba62a179ebb856fa70a5161f751e

View File

@@ -1,117 +0,0 @@
import sys
from typing import Literal
SF_APPEND: Literal[0x00040000]
SF_ARCHIVED: Literal[0x00010000]
SF_IMMUTABLE: Literal[0x00020000]
SF_NOUNLINK: Literal[0x00100000]
SF_SNAPSHOT: Literal[0x00200000]
ST_MODE: Literal[0]
ST_INO: Literal[1]
ST_DEV: Literal[2]
ST_NLINK: Literal[3]
ST_UID: Literal[4]
ST_GID: Literal[5]
ST_SIZE: Literal[6]
ST_ATIME: Literal[7]
ST_MTIME: Literal[8]
ST_CTIME: Literal[9]
S_IFIFO: Literal[0o010000]
S_IFLNK: Literal[0o120000]
S_IFREG: Literal[0o100000]
S_IFSOCK: Literal[0o140000]
S_IFBLK: Literal[0o060000]
S_IFCHR: Literal[0o020000]
S_IFDIR: Literal[0o040000]
# These are 0 on systems that don't support the specific kind of file.
# Example: Linux doesn't support door files, so S_IFDOOR is 0 on linux.
S_IFDOOR: int
S_IFPORT: int
S_IFWHT: int
S_ISUID: Literal[0o4000]
S_ISGID: Literal[0o2000]
S_ISVTX: Literal[0o1000]
S_IRWXU: Literal[0o0700]
S_IRUSR: Literal[0o0400]
S_IWUSR: Literal[0o0200]
S_IXUSR: Literal[0o0100]
S_IRWXG: Literal[0o0070]
S_IRGRP: Literal[0o0040]
S_IWGRP: Literal[0o0020]
S_IXGRP: Literal[0o0010]
S_IRWXO: Literal[0o0007]
S_IROTH: Literal[0o0004]
S_IWOTH: Literal[0o0002]
S_IXOTH: Literal[0o0001]
S_ENFMT: Literal[0o2000]
S_IREAD: Literal[0o0400]
S_IWRITE: Literal[0o0200]
S_IEXEC: Literal[0o0100]
UF_APPEND: Literal[0x00000004]
UF_COMPRESSED: Literal[0x00000020] # OS X 10.6+ only
UF_HIDDEN: Literal[0x00008000] # OX X 10.5+ only
UF_IMMUTABLE: Literal[0x00000002]
UF_NODUMP: Literal[0x00000001]
UF_NOUNLINK: Literal[0x00000010]
UF_OPAQUE: Literal[0x00000008]
def S_IMODE(mode: int, /) -> int: ...
def S_IFMT(mode: int, /) -> int: ...
def S_ISBLK(mode: int, /) -> bool: ...
def S_ISCHR(mode: int, /) -> bool: ...
def S_ISDIR(mode: int, /) -> bool: ...
def S_ISDOOR(mode: int, /) -> bool: ...
def S_ISFIFO(mode: int, /) -> bool: ...
def S_ISLNK(mode: int, /) -> bool: ...
def S_ISPORT(mode: int, /) -> bool: ...
def S_ISREG(mode: int, /) -> bool: ...
def S_ISSOCK(mode: int, /) -> bool: ...
def S_ISWHT(mode: int, /) -> bool: ...
def filemode(mode: int, /) -> str: ...
if sys.platform == "win32":
IO_REPARSE_TAG_SYMLINK: int
IO_REPARSE_TAG_MOUNT_POINT: int
IO_REPARSE_TAG_APPEXECLINK: int
if sys.platform == "win32":
FILE_ATTRIBUTE_ARCHIVE: Literal[32]
FILE_ATTRIBUTE_COMPRESSED: Literal[2048]
FILE_ATTRIBUTE_DEVICE: Literal[64]
FILE_ATTRIBUTE_DIRECTORY: Literal[16]
FILE_ATTRIBUTE_ENCRYPTED: Literal[16384]
FILE_ATTRIBUTE_HIDDEN: Literal[2]
FILE_ATTRIBUTE_INTEGRITY_STREAM: Literal[32768]
FILE_ATTRIBUTE_NORMAL: Literal[128]
FILE_ATTRIBUTE_NOT_CONTENT_INDEXED: Literal[8192]
FILE_ATTRIBUTE_NO_SCRUB_DATA: Literal[131072]
FILE_ATTRIBUTE_OFFLINE: Literal[4096]
FILE_ATTRIBUTE_READONLY: Literal[1]
FILE_ATTRIBUTE_REPARSE_POINT: Literal[1024]
FILE_ATTRIBUTE_SPARSE_FILE: Literal[512]
FILE_ATTRIBUTE_SYSTEM: Literal[4]
FILE_ATTRIBUTE_TEMPORARY: Literal[256]
FILE_ATTRIBUTE_VIRTUAL: Literal[65536]
if sys.version_info >= (3, 13):
SF_SETTABLE: Literal[0x3FFF0000]
# https://github.com/python/cpython/issues/114081#issuecomment-2119017790
# SF_RESTRICTED: Literal[0x00080000]
SF_FIRMLINK: Literal[0x00800000]
SF_DATALESS: Literal[0x40000000]
SF_SUPPORTED: Literal[0x9F0000]
SF_SYNTHETIC: Literal[0xC0000000]
UF_TRACKED: Literal[0x00000040]
UF_DATAVAULT: Literal[0x00000080]
UF_SETTABLE: Literal[0x0000FFFF]

View File

@@ -1,20 +0,0 @@
import enum
import sys
from typing import Literal
LOG_THRESHOLD_FOR_CONNLOST_WRITES: Literal[5]
ACCEPT_RETRY_DELAY: Literal[1]
DEBUG_STACK_DEPTH: Literal[10]
SSL_HANDSHAKE_TIMEOUT: float
SENDFILE_FALLBACK_READBUFFER_SIZE: Literal[262144]
if sys.version_info >= (3, 11):
SSL_SHUTDOWN_TIMEOUT: float
FLOW_CONTROL_HIGH_WATER_SSL_READ: Literal[256]
FLOW_CONTROL_HIGH_WATER_SSL_WRITE: Literal[512]
if sys.version_info >= (3, 12):
THREAD_JOIN_TIMEOUT: Literal[300]
class _SendfileMode(enum.Enum):
UNSUPPORTED = 1
TRY_NATIVE = 2
FALLBACK = 3

View File

@@ -1,20 +0,0 @@
import functools
import traceback
from collections.abc import Iterable
from types import FrameType, FunctionType
from typing import Any, overload
from typing_extensions import TypeAlias
class _HasWrapper:
__wrapper__: _HasWrapper | FunctionType
_FuncType: TypeAlias = FunctionType | _HasWrapper | functools.partial[Any] | functools.partialmethod[Any]
@overload
def _get_function_source(func: _FuncType) -> tuple[str, int]: ...
@overload
def _get_function_source(func: object) -> tuple[str, int] | None: ...
def _format_callback_source(func: object, args: Iterable[Any]) -> str: ...
def _format_args_and_kwargs(args: Iterable[Any], kwargs: dict[str, Any]) -> str: ...
def _format_callback(func: object, args: Iterable[Any], kwargs: dict[str, Any], suffix: str = "") -> str: ...
def extract_stack(f: FrameType | None = None, limit: int | None = None) -> traceback.StackSummary: ...

View File

@@ -1,196 +0,0 @@
import sys
import types
from abc import ABCMeta, abstractmethod
from collections.abc import Callable
from typing import Literal
from typing_extensions import Self, TypeVarTuple, Unpack, deprecated
from .events import AbstractEventLoop, BaseDefaultEventLoopPolicy
from .selector_events import BaseSelectorEventLoop
_Ts = TypeVarTuple("_Ts")
# This is also technically not available on Win,
# but other parts of typeshed need this definition.
# So, it is special cased.
if sys.version_info >= (3, 12):
@deprecated("Deprecated as of Python 3.12; will be removed in Python 3.14")
class AbstractChildWatcher:
@abstractmethod
def add_child_handler(
self, pid: int, callback: Callable[[int, int, Unpack[_Ts]], object], *args: Unpack[_Ts]
) -> None: ...
@abstractmethod
def remove_child_handler(self, pid: int) -> bool: ...
@abstractmethod
def attach_loop(self, loop: AbstractEventLoop | None) -> None: ...
@abstractmethod
def close(self) -> None: ...
@abstractmethod
def __enter__(self) -> Self: ...
@abstractmethod
def __exit__(
self, typ: type[BaseException] | None, exc: BaseException | None, tb: types.TracebackType | None
) -> None: ...
@abstractmethod
def is_active(self) -> bool: ...
else:
class AbstractChildWatcher:
@abstractmethod
def add_child_handler(
self, pid: int, callback: Callable[[int, int, Unpack[_Ts]], object], *args: Unpack[_Ts]
) -> None: ...
@abstractmethod
def remove_child_handler(self, pid: int) -> bool: ...
@abstractmethod
def attach_loop(self, loop: AbstractEventLoop | None) -> None: ...
@abstractmethod
def close(self) -> None: ...
@abstractmethod
def __enter__(self) -> Self: ...
@abstractmethod
def __exit__(
self, typ: type[BaseException] | None, exc: BaseException | None, tb: types.TracebackType | None
) -> None: ...
@abstractmethod
def is_active(self) -> bool: ...
if sys.platform != "win32":
if sys.version_info >= (3, 9):
__all__ = (
"SelectorEventLoop",
"AbstractChildWatcher",
"SafeChildWatcher",
"FastChildWatcher",
"PidfdChildWatcher",
"MultiLoopChildWatcher",
"ThreadedChildWatcher",
"DefaultEventLoopPolicy",
)
else:
__all__ = (
"SelectorEventLoop",
"AbstractChildWatcher",
"SafeChildWatcher",
"FastChildWatcher",
"MultiLoopChildWatcher",
"ThreadedChildWatcher",
"DefaultEventLoopPolicy",
)
# Doesn't actually have ABCMeta metaclass at runtime, but mypy complains if we don't have it in the stub.
# See discussion in #7412
class BaseChildWatcher(AbstractChildWatcher, metaclass=ABCMeta):
def close(self) -> None: ...
def is_active(self) -> bool: ...
def attach_loop(self, loop: AbstractEventLoop | None) -> None: ...
if sys.version_info >= (3, 12):
@deprecated("Deprecated as of Python 3.12; will be removed in Python 3.14")
class SafeChildWatcher(BaseChildWatcher):
def __enter__(self) -> Self: ...
def __exit__(self, a: type[BaseException] | None, b: BaseException | None, c: types.TracebackType | None) -> None: ...
def add_child_handler(
self, pid: int, callback: Callable[[int, int, Unpack[_Ts]], object], *args: Unpack[_Ts]
) -> None: ...
def remove_child_handler(self, pid: int) -> bool: ...
@deprecated("Deprecated as of Python 3.12; will be removed in Python 3.14")
class FastChildWatcher(BaseChildWatcher):
def __enter__(self) -> Self: ...
def __exit__(self, a: type[BaseException] | None, b: BaseException | None, c: types.TracebackType | None) -> None: ...
def add_child_handler(
self, pid: int, callback: Callable[[int, int, Unpack[_Ts]], object], *args: Unpack[_Ts]
) -> None: ...
def remove_child_handler(self, pid: int) -> bool: ...
else:
class SafeChildWatcher(BaseChildWatcher):
def __enter__(self) -> Self: ...
def __exit__(self, a: type[BaseException] | None, b: BaseException | None, c: types.TracebackType | None) -> None: ...
def add_child_handler(
self, pid: int, callback: Callable[[int, int, Unpack[_Ts]], object], *args: Unpack[_Ts]
) -> None: ...
def remove_child_handler(self, pid: int) -> bool: ...
class FastChildWatcher(BaseChildWatcher):
def __enter__(self) -> Self: ...
def __exit__(self, a: type[BaseException] | None, b: BaseException | None, c: types.TracebackType | None) -> None: ...
def add_child_handler(
self, pid: int, callback: Callable[[int, int, Unpack[_Ts]], object], *args: Unpack[_Ts]
) -> None: ...
def remove_child_handler(self, pid: int) -> bool: ...
class _UnixSelectorEventLoop(BaseSelectorEventLoop): ...
class _UnixDefaultEventLoopPolicy(BaseDefaultEventLoopPolicy):
if sys.version_info >= (3, 12):
@deprecated("Deprecated as of Python 3.12; will be removed in Python 3.14")
def get_child_watcher(self) -> AbstractChildWatcher: ...
@deprecated("Deprecated as of Python 3.12; will be removed in Python 3.14")
def set_child_watcher(self, watcher: AbstractChildWatcher | None) -> None: ...
else:
def get_child_watcher(self) -> AbstractChildWatcher: ...
def set_child_watcher(self, watcher: AbstractChildWatcher | None) -> None: ...
SelectorEventLoop = _UnixSelectorEventLoop
DefaultEventLoopPolicy = _UnixDefaultEventLoopPolicy
if sys.version_info >= (3, 12):
@deprecated("Deprecated as of Python 3.12; will be removed in Python 3.14")
class MultiLoopChildWatcher(AbstractChildWatcher):
def is_active(self) -> bool: ...
def close(self) -> None: ...
def __enter__(self) -> Self: ...
def __exit__(
self, exc_type: type[BaseException] | None, exc_val: BaseException | None, exc_tb: types.TracebackType | None
) -> None: ...
def add_child_handler(
self, pid: int, callback: Callable[[int, int, Unpack[_Ts]], object], *args: Unpack[_Ts]
) -> None: ...
def remove_child_handler(self, pid: int) -> bool: ...
def attach_loop(self, loop: AbstractEventLoop | None) -> None: ...
else:
class MultiLoopChildWatcher(AbstractChildWatcher):
def is_active(self) -> bool: ...
def close(self) -> None: ...
def __enter__(self) -> Self: ...
def __exit__(
self, exc_type: type[BaseException] | None, exc_val: BaseException | None, exc_tb: types.TracebackType | None
) -> None: ...
def add_child_handler(
self, pid: int, callback: Callable[[int, int, Unpack[_Ts]], object], *args: Unpack[_Ts]
) -> None: ...
def remove_child_handler(self, pid: int) -> bool: ...
def attach_loop(self, loop: AbstractEventLoop | None) -> None: ...
class ThreadedChildWatcher(AbstractChildWatcher):
def is_active(self) -> Literal[True]: ...
def close(self) -> None: ...
def __enter__(self) -> Self: ...
def __exit__(
self, exc_type: type[BaseException] | None, exc_val: BaseException | None, exc_tb: types.TracebackType | None
) -> None: ...
def __del__(self) -> None: ...
def add_child_handler(
self, pid: int, callback: Callable[[int, int, Unpack[_Ts]], object], *args: Unpack[_Ts]
) -> None: ...
def remove_child_handler(self, pid: int) -> bool: ...
def attach_loop(self, loop: AbstractEventLoop | None) -> None: ...
if sys.version_info >= (3, 9):
class PidfdChildWatcher(AbstractChildWatcher):
def __enter__(self) -> Self: ...
def __exit__(
self, exc_type: type[BaseException] | None, exc_val: BaseException | None, exc_tb: types.TracebackType | None
) -> None: ...
def is_active(self) -> bool: ...
def close(self) -> None: ...
def attach_loop(self, loop: AbstractEventLoop | None) -> None: ...
def add_child_handler(
self, pid: int, callback: Callable[[int, int, Unpack[_Ts]], object], *args: Unpack[_Ts]
) -> None: ...
def remove_child_handler(self, pid: int) -> bool: ...

View File

@@ -1,32 +0,0 @@
from ._base import (
ALL_COMPLETED as ALL_COMPLETED,
FIRST_COMPLETED as FIRST_COMPLETED,
FIRST_EXCEPTION as FIRST_EXCEPTION,
BrokenExecutor as BrokenExecutor,
CancelledError as CancelledError,
Executor as Executor,
Future as Future,
InvalidStateError as InvalidStateError,
TimeoutError as TimeoutError,
as_completed as as_completed,
wait as wait,
)
from .process import ProcessPoolExecutor as ProcessPoolExecutor
from .thread import ThreadPoolExecutor as ThreadPoolExecutor
__all__ = (
"FIRST_COMPLETED",
"FIRST_EXCEPTION",
"ALL_COMPLETED",
"CancelledError",
"TimeoutError",
"BrokenExecutor",
"Future",
"Executor",
"wait",
"as_completed",
"ProcessPoolExecutor",
"ThreadPoolExecutor",
)
def __dir__() -> tuple[str, ...]: ...

View File

@@ -1,16 +0,0 @@
from typing import Any, TypeVar
__all__ = ["Error", "copy", "deepcopy"]
_T = TypeVar("_T")
# None in CPython but non-None in Jython
PyStringMap: Any
# Note: memo and _nil are internal kwargs.
def deepcopy(x: _T, memo: dict[int, Any] | None = None, _nil: Any = []) -> _T: ...
def copy(x: _T) -> _T: ...
class Error(Exception): ...
error = Error

View File

@@ -1,25 +0,0 @@
from typing import Any
from ..cmd import Command
def show_formats() -> None: ...
class bdist(Command):
description: str
user_options: Any
boolean_options: Any
help_options: Any
no_format_option: Any
default_format: Any
format_commands: Any
format_command: Any
bdist_base: Any
plat_name: Any
formats: Any
dist_dir: Any
skip_build: int
group: Any
owner: Any
def initialize_options(self) -> None: ...
def finalize_options(self) -> None: ...
def run(self) -> None: ...

View File

@@ -1,67 +0,0 @@
ENDMARKER: int
NAME: int
NUMBER: int
STRING: int
NEWLINE: int
INDENT: int
DEDENT: int
LPAR: int
RPAR: int
LSQB: int
RSQB: int
COLON: int
COMMA: int
SEMI: int
PLUS: int
MINUS: int
STAR: int
SLASH: int
VBAR: int
AMPER: int
LESS: int
GREATER: int
EQUAL: int
DOT: int
PERCENT: int
BACKQUOTE: int
LBRACE: int
RBRACE: int
EQEQUAL: int
NOTEQUAL: int
LESSEQUAL: int
GREATEREQUAL: int
TILDE: int
CIRCUMFLEX: int
LEFTSHIFT: int
RIGHTSHIFT: int
DOUBLESTAR: int
PLUSEQUAL: int
MINEQUAL: int
STAREQUAL: int
SLASHEQUAL: int
PERCENTEQUAL: int
AMPEREQUAL: int
VBAREQUAL: int
CIRCUMFLEXEQUAL: int
LEFTSHIFTEQUAL: int
RIGHTSHIFTEQUAL: int
DOUBLESTAREQUAL: int
DOUBLESLASH: int
DOUBLESLASHEQUAL: int
OP: int
COMMENT: int
NL: int
RARROW: int
AT: int
ATEQUAL: int
AWAIT: int
ASYNC: int
ERRORTOKEN: int
COLONEQUAL: int
N_TOKENS: int
NT_OFFSET: int
tok_name: dict[int, str]
def ISTERMINAL(x: int) -> bool: ...
def ISNONTERMINAL(x: int) -> bool: ...
def ISEOF(x: int) -> bool: ...

View File

@@ -1,55 +0,0 @@
import sys
from typing import Literal, overload
if sys.platform != "win32":
LOG_ALERT: Literal[1]
LOG_AUTH: Literal[32]
LOG_AUTHPRIV: Literal[80]
LOG_CONS: Literal[2]
LOG_CRIT: Literal[2]
LOG_CRON: Literal[72]
LOG_DAEMON: Literal[24]
LOG_DEBUG: Literal[7]
LOG_EMERG: Literal[0]
LOG_ERR: Literal[3]
LOG_INFO: Literal[6]
LOG_KERN: Literal[0]
LOG_LOCAL0: Literal[128]
LOG_LOCAL1: Literal[136]
LOG_LOCAL2: Literal[144]
LOG_LOCAL3: Literal[152]
LOG_LOCAL4: Literal[160]
LOG_LOCAL5: Literal[168]
LOG_LOCAL6: Literal[176]
LOG_LOCAL7: Literal[184]
LOG_LPR: Literal[48]
LOG_MAIL: Literal[16]
LOG_NDELAY: Literal[8]
LOG_NEWS: Literal[56]
LOG_NOTICE: Literal[5]
LOG_NOWAIT: Literal[16]
LOG_ODELAY: Literal[4]
LOG_PERROR: Literal[32]
LOG_PID: Literal[1]
LOG_SYSLOG: Literal[40]
LOG_USER: Literal[8]
LOG_UUCP: Literal[64]
LOG_WARNING: Literal[4]
if sys.version_info >= (3, 13):
LOG_FTP: Literal[88]
LOG_INSTALL: Literal[112]
LOG_LAUNCHD: Literal[192]
LOG_NETINFO: Literal[96]
LOG_RAS: Literal[120]
LOG_REMOTEAUTH: Literal[104]
def LOG_MASK(pri: int, /) -> int: ...
def LOG_UPTO(pri: int, /) -> int: ...
def closelog() -> None: ...
def openlog(ident: str = ..., logoption: int = ..., facility: int = ...) -> None: ...
def setlogmask(maskpri: int, /) -> int: ...
@overload
def syslog(priority: int, message: str) -> None: ...
@overload
def syslog(message: str) -> None: ...

View File

@@ -1,80 +0,0 @@
from typing import Literal
# These are not actually bools. See #4669
NO: bool
YES: bool
TRUE: bool
FALSE: bool
ON: bool
OFF: bool
N: Literal["n"]
S: Literal["s"]
W: Literal["w"]
E: Literal["e"]
NW: Literal["nw"]
SW: Literal["sw"]
NE: Literal["ne"]
SE: Literal["se"]
NS: Literal["ns"]
EW: Literal["ew"]
NSEW: Literal["nsew"]
CENTER: Literal["center"]
NONE: Literal["none"]
X: Literal["x"]
Y: Literal["y"]
BOTH: Literal["both"]
LEFT: Literal["left"]
TOP: Literal["top"]
RIGHT: Literal["right"]
BOTTOM: Literal["bottom"]
RAISED: Literal["raised"]
SUNKEN: Literal["sunken"]
FLAT: Literal["flat"]
RIDGE: Literal["ridge"]
GROOVE: Literal["groove"]
SOLID: Literal["solid"]
HORIZONTAL: Literal["horizontal"]
VERTICAL: Literal["vertical"]
NUMERIC: Literal["numeric"]
CHAR: Literal["char"]
WORD: Literal["word"]
BASELINE: Literal["baseline"]
INSIDE: Literal["inside"]
OUTSIDE: Literal["outside"]
SEL: Literal["sel"]
SEL_FIRST: Literal["sel.first"]
SEL_LAST: Literal["sel.last"]
END: Literal["end"]
INSERT: Literal["insert"]
CURRENT: Literal["current"]
ANCHOR: Literal["anchor"]
ALL: Literal["all"]
NORMAL: Literal["normal"]
DISABLED: Literal["disabled"]
ACTIVE: Literal["active"]
HIDDEN: Literal["hidden"]
CASCADE: Literal["cascade"]
CHECKBUTTON: Literal["checkbutton"]
COMMAND: Literal["command"]
RADIOBUTTON: Literal["radiobutton"]
SEPARATOR: Literal["separator"]
SINGLE: Literal["single"]
BROWSE: Literal["browse"]
MULTIPLE: Literal["multiple"]
EXTENDED: Literal["extended"]
DOTBOX: Literal["dotbox"]
UNDERLINE: Literal["underline"]
PIESLICE: Literal["pieslice"]
CHORD: Literal["chord"]
ARC: Literal["arc"]
FIRST: Literal["first"]
LAST: Literal["last"]
BUTT: Literal["butt"]
PROJECTING: Literal["projecting"]
ROUND: Literal["round"]
BEVEL: Literal["bevel"]
MITER: Literal["miter"]
MOVETO: Literal["moveto"]
SCROLL: Literal["scroll"]
UNITS: Literal["units"]
PAGES: Literal["pages"]

View File

@@ -1,28 +0,0 @@
import sys
from _typeshed import ReadableBuffer
from typing import Literal, overload
if sys.platform == "win32":
SND_APPLICATION: Literal[128]
SND_FILENAME: Literal[131072]
SND_ALIAS: Literal[65536]
SND_LOOP: Literal[8]
SND_MEMORY: Literal[4]
SND_PURGE: Literal[64]
SND_ASYNC: Literal[1]
SND_NODEFAULT: Literal[2]
SND_NOSTOP: Literal[16]
SND_NOWAIT: Literal[8192]
MB_ICONASTERISK: Literal[64]
MB_ICONEXCLAMATION: Literal[48]
MB_ICONHAND: Literal[16]
MB_ICONQUESTION: Literal[32]
MB_OK: Literal[0]
def Beep(frequency: int, duration: int) -> None: ...
# Can actually accept anything ORed with 4, and if not it's definitely str, but that's inexpressible
@overload
def PlaySound(sound: ReadableBuffer | None, flags: Literal[4]) -> None: ...
@overload
def PlaySound(sound: str | ReadableBuffer | None, flags: int) -> None: ...
def MessageBeep(type: int = 0) -> None: ...

View File

@@ -18,18 +18,30 @@ ruff_python_stdlib = { workspace = true }
ruff_text_size = { workspace = true }
bitflags = { workspace = true }
indexmap = { workspace = true }
camino = { workspace = true }
compact_str = { workspace = true }
countme = { workspace = true }
once_cell = { workspace = true }
ordermap = { workspace = true }
salsa = { workspace = true }
smallvec = { workspace = true }
smol_str = { workspace = true }
tracing = { workspace = true }
rustc-hash = { workspace = true }
hashbrown = { workspace = true }
[build-dependencies]
path-slash = { workspace = true }
walkdir = { workspace = true }
zip = { workspace = true, features = ["zstd", "deflate"] }
[dev-dependencies]
anyhow = { workspace = true }
ruff_db = { workspace = true, features = ["os", "testing"]}
ruff_python_parser = { workspace = true }
anyhow = { workspace = true }
insta = { workspace = true }
tempfile = { workspace = true }
walkdir = { workspace = true }
zip = { workspace = true }
[lints]
workspace = true

View File

@@ -0,0 +1,9 @@
# Red Knot
Semantic analysis for the red-knot project.
## Vendored types for the stdlib
This crate vendors [typeshed](https://github.com/python/typeshed)'s stubs for the standard library. The vendored stubs can be found in `crates/red_knot_python_semantic/vendor/typeshed`. The file `crates/red_knot_python_semantic/vendor/typeshed/source_commit.txt` tells you the typeshed commit that our vendored stdlib stubs currently correspond to.
The typeshed stubs are updated every two weeks via an automated PR using the `sync_typeshed.yaml` workflow in the `.github/workflows` directory. This workflow can also be triggered at any time via [workflow dispatch](https://docs.github.com/en/actions/using-workflows/manually-running-a-workflow#running-a-workflow).

View File

@@ -3,11 +3,12 @@
//!
//! This script should be automatically run at build time
//! whenever the script itself changes, or whenever any files
//! in `crates/red_knot/vendor/typeshed` change.
//! in `crates/red_knot_python_semantic/vendor/typeshed` change.
use std::fs::File;
use std::path::Path;
use path_slash::PathExt;
use zip::result::ZipResult;
use zip::write::{FileOptions, ZipWriter};
use zip::CompressionMethod;
@@ -22,31 +23,44 @@ const TYPESHED_ZIP_LOCATION: &str = "/zipped_typeshed.zip";
fn zip_dir(directory_path: &str, writer: File) -> ZipResult<File> {
let mut zip = ZipWriter::new(writer);
// Use deflated compression for WASM builds because compiling `zstd-sys` requires clang
// [source](https://github.com/gyscos/zstd-rs/wiki/Compile-for-WASM) which complicates the build
// by a lot. Deflated compression is slower but it shouldn't matter much for the WASM use case
// (WASM itself is already slower than a native build for a specific platform).
// We can't use `#[cfg(...)]` here because the target-arch in a build script is the
// architecture of the system running the build script and not the architecture of the build-target.
// That's why we use the `TARGET` environment variable here.
let method = if std::env::var("TARGET").unwrap().contains("wasm32") {
CompressionMethod::Deflated
} else {
CompressionMethod::Zstd
};
let options = FileOptions::default()
.compression_method(CompressionMethod::Zstd)
.compression_method(method)
.unix_permissions(0o644);
for entry in walkdir::WalkDir::new(directory_path) {
let dir_entry = entry.unwrap();
let relative_path = dir_entry.path();
let name = relative_path
let absolute_path = dir_entry.path();
let normalized_relative_path = absolute_path
.strip_prefix(Path::new(directory_path))
.unwrap()
.to_str()
.to_slash()
.expect("Unexpected non-utf8 typeshed path!");
// Write file or directory explicitly
// Some unzip tools unzip files with directory paths correctly, some do not!
if relative_path.is_file() {
println!("adding file {relative_path:?} as {name:?} ...");
zip.start_file(name, options)?;
let mut f = File::open(relative_path)?;
if absolute_path.is_file() {
println!("adding file {absolute_path:?} as {normalized_relative_path:?} ...");
zip.start_file(normalized_relative_path, options)?;
let mut f = File::open(absolute_path)?;
std::io::copy(&mut f, &mut zip).unwrap();
} else if !name.is_empty() {
} else if !normalized_relative_path.is_empty() {
// Only if not root! Avoids path spec / warning
// and mapname conversion failed error on unzip
println!("adding dir {relative_path:?} as {name:?} ...");
zip.add_directory(name, options)?;
println!("adding dir {absolute_path:?} as {normalized_relative_path:?} ...");
zip.add_directory(normalized_relative_path, options)?;
}
}
zip.finish()

View File

@@ -27,12 +27,13 @@ pub struct AstNodeRef<T> {
#[allow(unsafe_code)]
impl<T> AstNodeRef<T> {
/// Creates a new `AstNodeRef` that reference `node`. The `parsed` is the [`ParsedModule`] to which
/// the `AstNodeRef` belongs.
/// Creates a new `AstNodeRef` that reference `node`. The `parsed` is the [`ParsedModule`] to
/// which the `AstNodeRef` belongs.
///
/// ## Safety
/// Dereferencing the `node` can result in undefined behavior if `parsed` isn't the [`ParsedModule`] to
/// which `node` belongs. It's the caller's responsibility to ensure that the invariant `node belongs to parsed` is upheld.
/// Dereferencing the `node` can result in undefined behavior if `parsed` isn't the
/// [`ParsedModule`] to which `node` belongs. It's the caller's responsibility to ensure that
/// the invariant `node belongs to parsed` is upheld.
pub(super) unsafe fn new(parsed: ParsedModule, node: &T) -> Self {
Self {
@@ -43,8 +44,8 @@ impl<T> AstNodeRef<T> {
/// Returns a reference to the wrapped node.
pub fn node(&self) -> &T {
// SAFETY: Holding on to `parsed` ensures that the AST to which `node` belongs is still alive
// and not moved.
// SAFETY: Holding on to `parsed` ensures that the AST to which `node` belongs is still
// alive and not moved.
unsafe { self.node.as_ref() }
}
}

View File

@@ -0,0 +1,16 @@
use crate::module_name::ModuleName;
use crate::module_resolver::resolve_module;
use crate::semantic_index::global_scope;
use crate::semantic_index::symbol::ScopeId;
use crate::Db;
/// Salsa query to get the builtins scope.
///
/// Can return None if a custom typeshed is used that is missing `builtins.pyi`.
#[salsa::tracked]
pub(crate) fn builtins_scope(db: &dyn Db) -> Option<ScopeId<'_>> {
let builtins_name =
ModuleName::new_static("builtins").expect("Expected 'builtins' to be a valid module name");
let builtins_file = resolve_module(db, builtins_name)?.file();
Some(global_scope(db, builtins_file))
}

View File

@@ -1,57 +1,27 @@
use salsa::DbWithJar;
use ruff_db::{Db as SourceDb, Upcast};
use crate::module::resolver::{
file_to_module, internal::ModuleNameIngredient, internal::ModuleResolverSearchPaths,
resolve_module_query,
};
use crate::semantic_index::symbol::{public_symbols_map, scopes_map, PublicSymbolId, ScopeId};
use crate::semantic_index::{root_scope, semantic_index, symbol_table};
use crate::types::{infer_types, public_symbol_ty};
#[salsa::jar(db=Db)]
pub struct Jar(
ModuleNameIngredient,
ModuleResolverSearchPaths,
ScopeId,
PublicSymbolId,
symbol_table,
resolve_module_query,
file_to_module,
scopes_map,
root_scope,
semantic_index,
infer_types,
public_symbol_ty,
public_symbols_map,
);
/// Database giving access to semantic information about a Python program.
pub trait Db: SourceDb + DbWithJar<Jar> + Upcast<dyn SourceDb> {}
#[salsa::db]
pub trait Db: SourceDb + Upcast<dyn SourceDb> {}
#[cfg(test)]
pub(crate) mod tests {
use std::fmt::Formatter;
use std::marker::PhantomData;
use std::sync::Arc;
use salsa::ingredient::Ingredient;
use salsa::storage::HasIngredientsFor;
use salsa::{AsId, DebugWithDb};
use crate::module_resolver::vendored_typeshed_stubs;
use ruff_db::files::Files;
use ruff_db::system::{DbWithTestSystem, System, TestSystem};
use ruff_db::vendored::VendoredFileSystem;
use ruff_db::{Db as SourceDb, Upcast};
use ruff_db::file_system::{FileSystem, MemoryFileSystem, OsFileSystem};
use ruff_db::vfs::Vfs;
use ruff_db::{Db as SourceDb, Jar as SourceJar, Upcast};
use super::Db;
use super::{Db, Jar};
#[salsa::db(Jar, SourceJar)]
#[salsa::db]
pub(crate) struct TestDb {
storage: salsa::Storage<Self>,
vfs: Vfs,
file_system: TestFileSystem,
files: Files,
system: TestSystem,
vendored: VendoredFileSystem,
events: std::sync::Arc<std::sync::Mutex<Vec<salsa::Event>>>,
}
@@ -59,39 +29,13 @@ pub(crate) mod tests {
pub(crate) fn new() -> Self {
Self {
storage: salsa::Storage::default(),
file_system: TestFileSystem::Memory(MemoryFileSystem::default()),
system: TestSystem::default(),
vendored: vendored_typeshed_stubs().clone(),
events: std::sync::Arc::default(),
vfs: Vfs::with_stubbed_vendored(),
files: Files::default(),
}
}
/// Returns the memory file system.
///
/// ## Panics
/// If this test db isn't using a memory file system.
pub(crate) fn memory_file_system(&self) -> &MemoryFileSystem {
if let TestFileSystem::Memory(fs) = &self.file_system {
fs
} else {
panic!("The test db is not using a memory file system");
}
}
/// Uses the real file system instead of the memory file system.
///
/// This useful for testing advanced file system features like permissions, symlinks, etc.
///
/// Note that any files written to the memory file system won't be copied over.
#[allow(unused)]
pub(crate) fn with_os_file_system(&mut self) {
self.file_system = TestFileSystem::Os(OsFileSystem);
}
#[allow(unused)]
pub(crate) fn vfs_mut(&mut self) -> &mut Vfs {
&mut self.vfs
}
/// Takes the salsa events.
///
/// ## Panics
@@ -112,16 +56,28 @@ pub(crate) mod tests {
}
}
impl SourceDb for TestDb {
fn file_system(&self) -> &dyn FileSystem {
match &self.file_system {
TestFileSystem::Memory(fs) => fs,
TestFileSystem::Os(fs) => fs,
}
impl DbWithTestSystem for TestDb {
fn test_system(&self) -> &TestSystem {
&self.system
}
fn vfs(&self) -> &Vfs {
&self.vfs
fn test_system_mut(&mut self) -> &mut TestSystem {
&mut self.system
}
}
#[salsa::db]
impl SourceDb for TestDb {
fn vendored(&self) -> &VendoredFileSystem {
&self.vendored
}
fn system(&self) -> &dyn System {
&self.system
}
fn files(&self) -> &Files {
&self.files
}
}
@@ -129,137 +85,21 @@ pub(crate) mod tests {
fn upcast(&self) -> &(dyn SourceDb + 'static) {
self
}
fn upcast_mut(&mut self) -> &mut (dyn SourceDb + 'static) {
self
}
}
#[salsa::db]
impl Db for TestDb {}
#[salsa::db]
impl salsa::Database for TestDb {
fn salsa_event(&self, event: salsa::Event) {
tracing::trace!("event: {:?}", event.debug(self));
fn salsa_event(&self, event: &dyn Fn() -> salsa::Event) {
let event = event();
tracing::trace!("event: {event:?}");
let mut events = self.events.lock().unwrap();
events.push(event);
}
}
impl salsa::ParallelDatabase for TestDb {
fn snapshot(&self) -> salsa::Snapshot<Self> {
salsa::Snapshot::new(Self {
storage: self.storage.snapshot(),
vfs: self.vfs.snapshot(),
file_system: match &self.file_system {
TestFileSystem::Memory(memory) => TestFileSystem::Memory(memory.snapshot()),
TestFileSystem::Os(fs) => TestFileSystem::Os(fs.snapshot()),
},
events: self.events.clone(),
})
}
}
enum TestFileSystem {
Memory(MemoryFileSystem),
#[allow(unused)]
Os(OsFileSystem),
}
pub(crate) fn assert_will_run_function_query<C, Db, Jar>(
db: &Db,
to_function: impl FnOnce(&C) -> &salsa::function::FunctionIngredient<C>,
key: C::Key,
events: &[salsa::Event],
) where
C: salsa::function::Configuration<Jar = Jar>
+ salsa::storage::IngredientsFor<Jar = Jar, Ingredients = C>,
Jar: HasIngredientsFor<C>,
Db: salsa::DbWithJar<Jar>,
C::Key: AsId,
{
will_run_function_query(db, to_function, key, events, true);
}
pub(crate) fn assert_will_not_run_function_query<C, Db, Jar>(
db: &Db,
to_function: impl FnOnce(&C) -> &salsa::function::FunctionIngredient<C>,
key: C::Key,
events: &[salsa::Event],
) where
C: salsa::function::Configuration<Jar = Jar>
+ salsa::storage::IngredientsFor<Jar = Jar, Ingredients = C>,
Jar: HasIngredientsFor<C>,
Db: salsa::DbWithJar<Jar>,
C::Key: AsId,
{
will_run_function_query(db, to_function, key, events, false);
}
fn will_run_function_query<C, Db, Jar>(
db: &Db,
to_function: impl FnOnce(&C) -> &salsa::function::FunctionIngredient<C>,
key: C::Key,
events: &[salsa::Event],
should_run: bool,
) where
C: salsa::function::Configuration<Jar = Jar>
+ salsa::storage::IngredientsFor<Jar = Jar, Ingredients = C>,
Jar: HasIngredientsFor<C>,
Db: salsa::DbWithJar<Jar>,
C::Key: AsId,
{
let (jar, _) =
<_ as salsa::storage::HasJar<<C as salsa::storage::IngredientsFor>::Jar>>::jar(db);
let ingredient = jar.ingredient();
let function_ingredient = to_function(ingredient);
let ingredient_index =
<salsa::function::FunctionIngredient<C> as Ingredient<Db>>::ingredient_index(
function_ingredient,
);
let did_run = events.iter().any(|event| {
if let salsa::EventKind::WillExecute { database_key } = event.kind {
database_key.ingredient_index() == ingredient_index
&& database_key.key_index() == key.as_id()
} else {
false
}
});
if should_run && !did_run {
panic!(
"Expected query {:?} to run but it didn't",
DebugIdx {
db: PhantomData::<Db>,
value_id: key.as_id(),
ingredient: function_ingredient,
}
);
} else if !should_run && did_run {
panic!(
"Expected query {:?} not to run but it did",
DebugIdx {
db: PhantomData::<Db>,
value_id: key.as_id(),
ingredient: function_ingredient,
}
);
}
}
struct DebugIdx<'a, I, Db>
where
I: Ingredient<Db>,
{
value_id: salsa::Id,
ingredient: &'a I,
db: PhantomData<Db>,
}
impl<'a, I, Db> std::fmt::Debug for DebugIdx<'a, I, Db>
where
I: Ingredient<Db>,
{
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
self.ingredient.fmt_index(Some(self.value_id), f)
}
}
}

View File

@@ -1,13 +1,24 @@
use std::hash::BuildHasherDefault;
use rustc_hash::FxHasher;
pub use db::Db;
pub use module_name::ModuleName;
pub use module_resolver::{resolve_module, system_module_search_paths, vendored_typeshed_stubs};
pub use program::{Program, ProgramSettings, SearchPathSettings};
pub use python_version::PythonVersion;
pub use semantic_model::{HasTy, SemanticModel};
pub mod ast_node_ref;
mod builtins;
mod db;
pub mod module;
pub mod name;
mod module_name;
mod module_resolver;
mod node_key;
mod program;
mod python_version;
pub mod semantic_index;
mod semantic_model;
pub mod types;
type FxIndexSet<V> = indexmap::set::IndexSet<V, BuildHasherDefault<FxHasher>>;
pub use db::{Db, Jar};
use rustc_hash::FxHasher;
use std::hash::BuildHasherDefault;
type FxOrderSet<V> = ordermap::set::OrderSet<V, BuildHasherDefault<FxHasher>>;

View File

@@ -1,10 +0,0 @@
use std::hash::BuildHasherDefault;
use rustc_hash::FxHasher;
pub mod ast_node_ref;
mod node_key;
pub mod semantic_index;
pub mod types;
pub(crate) type FxIndexSet<V> = indexmap::set::IndexSet<V, BuildHasherDefault<FxHasher>>;

View File

@@ -1,332 +0,0 @@
use std::fmt::Formatter;
use std::ops::Deref;
use std::sync::Arc;
use ruff_db::file_system::FileSystemPath;
use ruff_db::vfs::{VfsFile, VfsPath};
use ruff_python_stdlib::identifiers::is_identifier;
use crate::Db;
pub mod resolver;
/// A module name, e.g. `foo.bar`.
///
/// Always normalized to the absolute form (never a relative module name, i.e., never `.foo`).
#[derive(Clone, Debug, Eq, PartialEq, Hash)]
pub struct ModuleName(smol_str::SmolStr);
impl ModuleName {
/// Creates a new module name for `name`. Returns `Some` if `name` is a valid, absolute
/// module name and `None` otherwise.
///
/// The module name is invalid if:
///
/// * The name is empty
/// * The name is relative
/// * The name ends with a `.`
/// * The name contains a sequence of multiple dots
/// * A component of a name (the part between two dots) isn't a valid python identifier.
#[inline]
pub fn new(name: &str) -> Option<Self> {
Self::new_from_smol(smol_str::SmolStr::new(name))
}
/// Creates a new module name for `name` where `name` is a static string.
/// Returns `Some` if `name` is a valid, absolute module name and `None` otherwise.
///
/// The module name is invalid if:
///
/// * The name is empty
/// * The name is relative
/// * The name ends with a `.`
/// * The name contains a sequence of multiple dots
/// * A component of a name (the part between two dots) isn't a valid python identifier.
///
/// ## Examples
///
/// ```
/// use red_knot_python_semantic::module::ModuleName;
///
/// assert_eq!(ModuleName::new_static("foo.bar").as_deref(), Some("foo.bar"));
/// assert_eq!(ModuleName::new_static(""), None);
/// assert_eq!(ModuleName::new_static("..foo"), None);
/// assert_eq!(ModuleName::new_static(".foo"), None);
/// assert_eq!(ModuleName::new_static("foo."), None);
/// assert_eq!(ModuleName::new_static("foo..bar"), None);
/// assert_eq!(ModuleName::new_static("2000"), None);
/// ```
#[inline]
pub fn new_static(name: &'static str) -> Option<Self> {
Self::new_from_smol(smol_str::SmolStr::new_static(name))
}
fn new_from_smol(name: smol_str::SmolStr) -> Option<Self> {
if name.is_empty() {
return None;
}
if name.split('.').all(is_identifier) {
Some(Self(name))
} else {
None
}
}
/// An iterator over the components of the module name:
///
/// # Examples
///
/// ```
/// use red_knot_python_semantic::module::ModuleName;
///
/// assert_eq!(ModuleName::new_static("foo.bar.baz").unwrap().components().collect::<Vec<_>>(), vec!["foo", "bar", "baz"]);
/// ```
pub fn components(&self) -> impl DoubleEndedIterator<Item = &str> {
self.0.split('.')
}
/// The name of this module's immediate parent, if it has a parent.
///
/// # Examples
///
/// ```
/// use red_knot_python_semantic::module::ModuleName;
///
/// assert_eq!(ModuleName::new_static("foo.bar").unwrap().parent(), Some(ModuleName::new_static("foo").unwrap()));
/// assert_eq!(ModuleName::new_static("foo.bar.baz").unwrap().parent(), Some(ModuleName::new_static("foo.bar").unwrap()));
/// assert_eq!(ModuleName::new_static("root").unwrap().parent(), None);
/// ```
pub fn parent(&self) -> Option<ModuleName> {
let (parent, _) = self.0.rsplit_once('.')?;
Some(Self(smol_str::SmolStr::new(parent)))
}
/// Returns `true` if the name starts with `other`.
///
/// This is equivalent to checking if `self` is a sub-module of `other`.
///
/// # Examples
///
/// ```
/// use red_knot_python_semantic::module::ModuleName;
///
/// assert!(ModuleName::new_static("foo.bar").unwrap().starts_with(&ModuleName::new_static("foo").unwrap()));
///
/// assert!(!ModuleName::new_static("foo.bar").unwrap().starts_with(&ModuleName::new_static("bar").unwrap()));
/// assert!(!ModuleName::new_static("foo_bar").unwrap().starts_with(&ModuleName::new_static("foo").unwrap()));
/// ```
pub fn starts_with(&self, other: &ModuleName) -> bool {
let mut self_components = self.components();
let other_components = other.components();
for other_component in other_components {
if self_components.next() != Some(other_component) {
return false;
}
}
true
}
#[inline]
pub fn as_str(&self) -> &str {
&self.0
}
fn from_relative_path(path: &FileSystemPath) -> Option<Self> {
let path = if path.ends_with("__init__.py") || path.ends_with("__init__.pyi") {
path.parent()?
} else {
path
};
let name = if let Some(parent) = path.parent() {
let mut name = String::with_capacity(path.as_str().len());
for component in parent.components() {
name.push_str(component.as_os_str().to_str()?);
name.push('.');
}
// SAFETY: Unwrap is safe here or `parent` would have returned `None`.
name.push_str(path.file_stem().unwrap());
smol_str::SmolStr::from(name)
} else {
smol_str::SmolStr::new(path.file_stem()?)
};
Some(Self(name))
}
}
impl Deref for ModuleName {
type Target = str;
#[inline]
fn deref(&self) -> &Self::Target {
self.as_str()
}
}
impl PartialEq<str> for ModuleName {
fn eq(&self, other: &str) -> bool {
self.as_str() == other
}
}
impl PartialEq<ModuleName> for str {
fn eq(&self, other: &ModuleName) -> bool {
self == other.as_str()
}
}
impl std::fmt::Display for ModuleName {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
f.write_str(&self.0)
}
}
/// Representation of a Python module.
#[derive(Clone, PartialEq, Eq)]
pub struct Module {
inner: Arc<ModuleInner>,
}
impl Module {
/// The absolute name of the module (e.g. `foo.bar`)
pub fn name(&self) -> &ModuleName {
&self.inner.name
}
/// The file to the source code that defines this module
pub fn file(&self) -> VfsFile {
self.inner.file
}
/// The search path from which the module was resolved.
pub fn search_path(&self) -> &ModuleSearchPath {
&self.inner.search_path
}
/// Determine whether this module is a single-file module or a package
pub fn kind(&self) -> ModuleKind {
self.inner.kind
}
}
impl std::fmt::Debug for Module {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
f.debug_struct("Module")
.field("name", &self.name())
.field("kind", &self.kind())
.field("file", &self.file())
.field("search_path", &self.search_path())
.finish()
}
}
impl salsa::DebugWithDb<dyn Db> for Module {
fn fmt(&self, f: &mut Formatter<'_>, db: &dyn Db) -> std::fmt::Result {
f.debug_struct("Module")
.field("name", &self.name())
.field("kind", &self.kind())
.field("file", &self.file().debug(db.upcast()))
.field("search_path", &self.search_path())
.finish()
}
}
#[derive(PartialEq, Eq)]
struct ModuleInner {
name: ModuleName,
kind: ModuleKind,
search_path: ModuleSearchPath,
file: VfsFile,
}
#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)]
pub enum ModuleKind {
/// A single-file module (e.g. `foo.py` or `foo.pyi`)
Module,
/// A python package (`foo/__init__.py` or `foo/__init__.pyi`)
Package,
}
/// A search path in which to search modules.
/// Corresponds to a path in [`sys.path`](https://docs.python.org/3/library/sys_path_init.html) at runtime.
///
/// Cloning a search path is cheap because it's an `Arc`.
#[derive(Clone, PartialEq, Eq)]
pub struct ModuleSearchPath {
inner: Arc<ModuleSearchPathInner>,
}
impl ModuleSearchPath {
pub fn new<P>(path: P, kind: ModuleSearchPathKind) -> Self
where
P: Into<VfsPath>,
{
Self {
inner: Arc::new(ModuleSearchPathInner {
path: path.into(),
kind,
}),
}
}
/// Determine whether this is a first-party, third-party or standard-library search path
pub fn kind(&self) -> ModuleSearchPathKind {
self.inner.kind
}
/// Return the location of the search path on the file system
pub fn path(&self) -> &VfsPath {
&self.inner.path
}
}
impl std::fmt::Debug for ModuleSearchPath {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
f.debug_struct("ModuleSearchPath")
.field("path", &self.inner.path)
.field("kind", &self.kind())
.finish()
}
}
#[derive(Eq, PartialEq)]
struct ModuleSearchPathInner {
path: VfsPath,
kind: ModuleSearchPathKind,
}
/// Enumeration of the different kinds of search paths type checkers are expected to support.
///
/// N.B. Although we don't implement `Ord` for this enum, they are ordered in terms of the
/// priority that we want to give these modules when resolving them.
/// This is roughly [the order given in the typing spec], but typeshed's stubs
/// for the standard library are moved higher up to match Python's semantics at runtime.
///
/// [the order given in the typing spec]: https://typing.readthedocs.io/en/latest/spec/distributing.html#import-resolution-ordering
#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)]
pub enum ModuleSearchPathKind {
/// "Extra" paths provided by the user in a config file, env var or CLI flag.
/// E.g. mypy's `MYPYPATH` env var, or pyright's `stubPath` configuration setting
Extra,
/// Files in the project we're directly being invoked on
FirstParty,
/// The `stdlib` directory of typeshed (either vendored or custom)
StandardLibrary,
/// Stubs or runtime modules installed in site-packages
SitePackagesThirdParty,
/// Vendored third-party stubs from typeshed
VendoredThirdParty,
}

View File

@@ -1,947 +0,0 @@
use salsa::DebugWithDb;
use std::ops::Deref;
use std::sync::Arc;
use ruff_db::file_system::{FileSystem, FileSystemPath, FileSystemPathBuf};
use ruff_db::vfs::{system_path_to_file, vfs_path_to_file, VfsFile, VfsPath};
use crate::module::resolver::internal::ModuleResolverSearchPaths;
use crate::module::{
Module, ModuleInner, ModuleKind, ModuleName, ModuleSearchPath, ModuleSearchPathKind,
};
use crate::Db;
const TYPESHED_STDLIB_DIRECTORY: &str = "stdlib";
/// Configures the module search paths for the module resolver.
///
/// Must be called before calling any other module resolution functions.
pub fn set_module_resolution_settings(db: &mut dyn Db, config: ModuleResolutionSettings) {
// There's no concurrency issue here because we hold a `&mut dyn Db` reference. No other
// thread can mutate the `Db` while we're in this call, so using `try_get` to test if
// the settings have already been set is safe.
if let Some(existing) = ModuleResolverSearchPaths::try_get(db) {
existing
.set_search_paths(db)
.to(config.into_ordered_search_paths());
} else {
ModuleResolverSearchPaths::new(db, config.into_ordered_search_paths());
}
}
/// Resolves a module name to a module.
pub fn resolve_module(db: &dyn Db, module_name: ModuleName) -> Option<Module> {
let interned_name = internal::ModuleNameIngredient::new(db, module_name);
resolve_module_query(db, interned_name)
}
/// Salsa query that resolves an interned [`ModuleNameIngredient`] to a module.
///
/// This query should not be called directly. Instead, use [`resolve_module`]. It only exists
/// because Salsa requires the module name to be an ingredient.
#[salsa::tracked]
pub(crate) fn resolve_module_query(
db: &dyn Db,
module_name: internal::ModuleNameIngredient,
) -> Option<Module> {
let _ = tracing::trace_span!("resolve_module", module_name = ?module_name.debug(db)).enter();
let name = module_name.name(db);
let (search_path, module_file, kind) = resolve_name(db, name)?;
let module = Module {
inner: Arc::new(ModuleInner {
name: name.clone(),
kind,
search_path,
file: module_file,
}),
};
Some(module)
}
/// Resolves the module for the given path.
///
/// Returns `None` if the path is not a module locatable via `sys.path`.
#[tracing::instrument(level = "debug", skip(db))]
pub fn path_to_module(db: &dyn Db, path: &VfsPath) -> Option<Module> {
// It's not entirely clear on first sight why this method calls `file_to_module` instead of
// it being the other way round, considering that the first thing that `file_to_module` does
// is to retrieve the file's path.
//
// The reason is that `file_to_module` is a tracked Salsa query and salsa queries require that
// all arguments are Salsa ingredients (something stored in Salsa). `Path`s aren't salsa ingredients but
// `VfsFile` is. So what we do here is to retrieve the `path`'s `VfsFile` so that we can make
// use of Salsa's caching and invalidation.
let file = vfs_path_to_file(db.upcast(), path)?;
file_to_module(db, file)
}
/// Resolves the module for the file with the given id.
///
/// Returns `None` if the file is not a module locatable via `sys.path`.
#[salsa::tracked]
pub(crate) fn file_to_module(db: &dyn Db, file: VfsFile) -> Option<Module> {
let _ = tracing::trace_span!("file_to_module", file = ?file.debug(db.upcast())).enter();
let path = file.path(db.upcast());
let search_paths = module_search_paths(db);
let relative_path = search_paths
.iter()
.find_map(|root| match (root.path(), path) {
(VfsPath::FileSystem(root_path), VfsPath::FileSystem(path)) => {
let relative_path = path.strip_prefix(root_path).ok()?;
Some(relative_path)
}
(VfsPath::Vendored(_), VfsPath::Vendored(_)) => {
todo!("Add support for vendored modules")
}
(VfsPath::Vendored(_), VfsPath::FileSystem(_))
| (VfsPath::FileSystem(_), VfsPath::Vendored(_)) => None,
})?;
let module_name = ModuleName::from_relative_path(relative_path)?;
// Resolve the module name to see if Python would resolve the name to the same path.
// If it doesn't, then that means that multiple modules have the same name in different
// root paths, but that the module corresponding to `path` is in a lower priority search path,
// in which case we ignore it.
let module = resolve_module(db, module_name)?;
if file == module.file() {
Some(module)
} else {
// This path is for a module with the same name but with a different precedence. For example:
// ```
// src/foo.py
// src/foo/__init__.py
// ```
// The module name of `src/foo.py` is `foo`, but the module loaded by Python is `src/foo/__init__.py`.
// That means we need to ignore `src/foo.py` even though it resolves to the same module name.
None
}
}
/// Configures the [`ModuleSearchPath`]s that are used to resolve modules.
#[derive(Eq, PartialEq, Debug)]
pub struct ModuleResolutionSettings {
/// List of user-provided paths that should take first priority in the module resolution.
/// Examples in other type checkers are mypy's MYPYPATH environment variable,
/// or pyright's stubPath configuration setting.
pub extra_paths: Vec<FileSystemPathBuf>,
/// The root of the workspace, used for finding first-party modules.
pub workspace_root: FileSystemPathBuf,
/// The path to the user's `site-packages` directory, where third-party packages from ``PyPI`` are installed.
pub site_packages: Option<FileSystemPathBuf>,
/// Optional path to standard-library typeshed stubs.
/// Currently this has to be a directory that exists on disk.
///
/// (TODO: fall back to vendored stubs if no custom directory is provided.)
pub custom_typeshed: Option<FileSystemPathBuf>,
}
impl ModuleResolutionSettings {
/// Implementation of PEP 561's module resolution order
/// (with some small, deliberate, differences)
fn into_ordered_search_paths(self) -> OrderedSearchPaths {
let ModuleResolutionSettings {
extra_paths,
workspace_root,
site_packages,
custom_typeshed,
} = self;
let mut paths: Vec<_> = extra_paths
.into_iter()
.map(|path| ModuleSearchPath::new(path, ModuleSearchPathKind::Extra))
.collect();
paths.push(ModuleSearchPath::new(
workspace_root,
ModuleSearchPathKind::FirstParty,
));
// TODO fallback to vendored typeshed stubs if no custom typeshed directory is provided by the user
if let Some(custom_typeshed) = custom_typeshed {
paths.push(ModuleSearchPath::new(
custom_typeshed.join(TYPESHED_STDLIB_DIRECTORY),
ModuleSearchPathKind::StandardLibrary,
));
}
// TODO vendor typeshed's third-party stubs as well as the stdlib and fallback to them as a final step
if let Some(site_packages) = site_packages {
paths.push(ModuleSearchPath::new(
site_packages,
ModuleSearchPathKind::SitePackagesThirdParty,
));
}
OrderedSearchPaths(paths)
}
}
/// A resolved module resolution order, implementing PEP 561
/// (with some small, deliberate differences)
#[derive(Clone, Debug, Default, Eq, PartialEq)]
pub(crate) struct OrderedSearchPaths(Vec<ModuleSearchPath>);
impl Deref for OrderedSearchPaths {
type Target = [ModuleSearchPath];
fn deref(&self) -> &Self::Target {
&self.0
}
}
// The singleton methods generated by salsa are all `pub` instead of `pub(crate)` which triggers
// `unreachable_pub`. Work around this by creating a module and allow `unreachable_pub` for it.
// Salsa also generates uses to `_db` variables for `interned` which triggers `clippy::used_underscore_binding`. Suppress that too
// TODO(micha): Contribute a fix for this upstream where the singleton methods have the same visibility as the struct.
#[allow(unreachable_pub, clippy::used_underscore_binding)]
pub(crate) mod internal {
use crate::module::resolver::OrderedSearchPaths;
use crate::module::ModuleName;
#[salsa::input(singleton)]
pub(crate) struct ModuleResolverSearchPaths {
#[return_ref]
pub(super) search_paths: OrderedSearchPaths,
}
/// A thin wrapper around `ModuleName` to make it a Salsa ingredient.
///
/// This is needed because Salsa requires that all query arguments are salsa ingredients.
#[salsa::interned]
pub(crate) struct ModuleNameIngredient {
#[return_ref]
pub(super) name: ModuleName,
}
}
fn module_search_paths(db: &dyn Db) -> &[ModuleSearchPath] {
ModuleResolverSearchPaths::get(db).search_paths(db)
}
/// Given a module name and a list of search paths in which to lookup modules,
/// attempt to resolve the module name
fn resolve_name(db: &dyn Db, name: &ModuleName) -> Option<(ModuleSearchPath, VfsFile, ModuleKind)> {
let search_paths = module_search_paths(db);
for search_path in search_paths {
let mut components = name.components();
let module_name = components.next_back()?;
let VfsPath::FileSystem(fs_search_path) = search_path.path() else {
todo!("Vendored search paths are not yet supported");
};
match resolve_package(db.file_system(), fs_search_path, components) {
Ok(resolved_package) => {
let mut package_path = resolved_package.path;
package_path.push(module_name);
// Must be a `__init__.pyi` or `__init__.py` or it isn't a package.
let kind = if db.file_system().is_directory(&package_path) {
package_path.push("__init__");
ModuleKind::Package
} else {
ModuleKind::Module
};
// TODO Implement full https://peps.python.org/pep-0561/#type-checker-module-resolution-order resolution
let stub = package_path.with_extension("pyi");
if let Some(stub) = system_path_to_file(db.upcast(), &stub) {
return Some((search_path.clone(), stub, kind));
}
let module = package_path.with_extension("py");
if let Some(module) = system_path_to_file(db.upcast(), &module) {
return Some((search_path.clone(), module, kind));
}
// For regular packages, don't search the next search path. All files of that
// package must be in the same location
if resolved_package.kind.is_regular_package() {
return None;
}
}
Err(parent_kind) => {
if parent_kind.is_regular_package() {
// For regular packages, don't search the next search path.
return None;
}
}
}
}
None
}
fn resolve_package<'a, I>(
fs: &dyn FileSystem,
module_search_path: &FileSystemPath,
components: I,
) -> Result<ResolvedPackage, PackageKind>
where
I: Iterator<Item = &'a str>,
{
let mut package_path = module_search_path.to_path_buf();
// `true` if inside a folder that is a namespace package (has no `__init__.py`).
// Namespace packages are special because they can be spread across multiple search paths.
// https://peps.python.org/pep-0420/
let mut in_namespace_package = false;
// `true` if resolving a sub-package. For example, `true` when resolving `bar` of `foo.bar`.
let mut in_sub_package = false;
// For `foo.bar.baz`, test that `foo` and `baz` both contain a `__init__.py`.
for folder in components {
package_path.push(folder);
let has_init_py = fs.is_file(&package_path.join("__init__.py"))
|| fs.is_file(&package_path.join("__init__.pyi"));
if has_init_py {
in_namespace_package = false;
} else if fs.is_directory(&package_path) {
// A directory without an `__init__.py` is a namespace package, continue with the next folder.
in_namespace_package = true;
} else if in_namespace_package {
// Package not found but it is part of a namespace package.
return Err(PackageKind::Namespace);
} else if in_sub_package {
// A regular sub package wasn't found.
return Err(PackageKind::Regular);
} else {
// We couldn't find `foo` for `foo.bar.baz`, search the next search path.
return Err(PackageKind::Root);
}
in_sub_package = true;
}
let kind = if in_namespace_package {
PackageKind::Namespace
} else if in_sub_package {
PackageKind::Regular
} else {
PackageKind::Root
};
Ok(ResolvedPackage {
kind,
path: package_path,
})
}
#[derive(Debug)]
struct ResolvedPackage {
path: FileSystemPathBuf,
kind: PackageKind,
}
#[derive(Copy, Clone, Eq, PartialEq, Debug)]
enum PackageKind {
/// A root package or module. E.g. `foo` in `foo.bar.baz` or just `foo`.
Root,
/// A regular sub-package where the parent contains an `__init__.py`.
///
/// For example, `bar` in `foo.bar` when the `foo` directory contains an `__init__.py`.
Regular,
/// A sub-package in a namespace package. A namespace package is a package without an `__init__.py`.
///
/// For example, `bar` in `foo.bar` if the `foo` directory contains no `__init__.py`.
Namespace,
}
impl PackageKind {
const fn is_regular_package(self) -> bool {
matches!(self, PackageKind::Regular)
}
}
#[cfg(test)]
mod tests {
use ruff_db::file_system::{FileSystemPath, FileSystemPathBuf};
use ruff_db::vfs::{system_path_to_file, VfsFile, VfsPath};
use crate::db::tests::TestDb;
use crate::module::{ModuleKind, ModuleName};
use super::{
path_to_module, resolve_module, set_module_resolution_settings, ModuleResolutionSettings,
TYPESHED_STDLIB_DIRECTORY,
};
struct TestCase {
db: TestDb,
src: FileSystemPathBuf,
custom_typeshed: FileSystemPathBuf,
site_packages: FileSystemPathBuf,
}
fn create_resolver() -> std::io::Result<TestCase> {
let mut db = TestDb::new();
let src = FileSystemPath::new("src").to_path_buf();
let site_packages = FileSystemPath::new("site_packages").to_path_buf();
let custom_typeshed = FileSystemPath::new("typeshed").to_path_buf();
let fs = db.memory_file_system();
fs.create_directory_all(&src)?;
fs.create_directory_all(&site_packages)?;
fs.create_directory_all(&custom_typeshed)?;
let settings = ModuleResolutionSettings {
extra_paths: vec![],
workspace_root: src.clone(),
site_packages: Some(site_packages.clone()),
custom_typeshed: Some(custom_typeshed.clone()),
};
set_module_resolution_settings(&mut db, settings);
Ok(TestCase {
db,
src,
custom_typeshed,
site_packages,
})
}
#[test]
fn first_party_module() -> anyhow::Result<()> {
let TestCase { db, src, .. } = create_resolver()?;
let foo_module_name = ModuleName::new_static("foo").unwrap();
let foo_path = src.join("foo.py");
db.memory_file_system()
.write_file(&foo_path, "print('Hello, world!')")?;
let foo_module = resolve_module(&db, foo_module_name.clone()).unwrap();
assert_eq!(
Some(&foo_module),
resolve_module(&db, foo_module_name.clone()).as_ref()
);
assert_eq!("foo", foo_module.name());
assert_eq!(&src, foo_module.search_path().path());
assert_eq!(ModuleKind::Module, foo_module.kind());
assert_eq!(&foo_path, foo_module.file().path(&db));
assert_eq!(
Some(foo_module),
path_to_module(&db, &VfsPath::FileSystem(foo_path))
);
Ok(())
}
#[test]
fn stdlib() -> anyhow::Result<()> {
let TestCase {
db,
custom_typeshed,
..
} = create_resolver()?;
let stdlib_dir = custom_typeshed.join(TYPESHED_STDLIB_DIRECTORY);
let functools_path = stdlib_dir.join("functools.py");
db.memory_file_system()
.write_file(&functools_path, "def update_wrapper(): ...")?;
let functools_module_name = ModuleName::new_static("functools").unwrap();
let functools_module = resolve_module(&db, functools_module_name.clone()).unwrap();
assert_eq!(
Some(&functools_module),
resolve_module(&db, functools_module_name).as_ref()
);
assert_eq!(&stdlib_dir, functools_module.search_path().path());
assert_eq!(ModuleKind::Module, functools_module.kind());
assert_eq!(&functools_path.clone(), functools_module.file().path(&db));
assert_eq!(
Some(functools_module),
path_to_module(&db, &VfsPath::FileSystem(functools_path))
);
Ok(())
}
#[test]
fn first_party_precedence_over_stdlib() -> anyhow::Result<()> {
let TestCase {
db,
src,
custom_typeshed,
..
} = create_resolver()?;
let stdlib_dir = custom_typeshed.join(TYPESHED_STDLIB_DIRECTORY);
let stdlib_functools_path = stdlib_dir.join("functools.py");
let first_party_functools_path = src.join("functools.py");
db.memory_file_system().write_files([
(&stdlib_functools_path, "def update_wrapper(): ..."),
(&first_party_functools_path, "def update_wrapper(): ..."),
])?;
let functools_module_name = ModuleName::new_static("functools").unwrap();
let functools_module = resolve_module(&db, functools_module_name.clone()).unwrap();
assert_eq!(
Some(&functools_module),
resolve_module(&db, functools_module_name).as_ref()
);
assert_eq!(&src, functools_module.search_path().path());
assert_eq!(ModuleKind::Module, functools_module.kind());
assert_eq!(
&first_party_functools_path.clone(),
functools_module.file().path(&db)
);
assert_eq!(
Some(functools_module),
path_to_module(&db, &VfsPath::FileSystem(first_party_functools_path))
);
Ok(())
}
// TODO: Port typeshed test case. Porting isn't possible at the moment because the vendored zip
// is part of the red knot crate
// #[test]
// fn typeshed_zip_created_at_build_time() -> anyhow::Result<()> {
// // The file path here is hardcoded in this crate's `build.rs` script.
// // Luckily this crate will fail to build if this file isn't available at build time.
// const TYPESHED_ZIP_BYTES: &[u8] =
// include_bytes!(concat!(env!("OUT_DIR"), "/zipped_typeshed.zip"));
// assert!(!TYPESHED_ZIP_BYTES.is_empty());
// let mut typeshed_zip_archive = ZipArchive::new(Cursor::new(TYPESHED_ZIP_BYTES))?;
//
// let path_to_functools = Path::new("stdlib").join("functools.pyi");
// let mut functools_module_stub = typeshed_zip_archive
// .by_name(path_to_functools.to_str().unwrap())
// .unwrap();
// assert!(functools_module_stub.is_file());
//
// let mut functools_module_stub_source = String::new();
// functools_module_stub.read_to_string(&mut functools_module_stub_source)?;
//
// assert!(functools_module_stub_source.contains("def update_wrapper("));
// Ok(())
// }
#[test]
fn resolve_package() -> anyhow::Result<()> {
let TestCase { src, db, .. } = create_resolver()?;
let foo_dir = src.join("foo");
let foo_path = foo_dir.join("__init__.py");
db.memory_file_system()
.write_file(&foo_path, "print('Hello, world!')")?;
let foo_module = resolve_module(&db, ModuleName::new_static("foo").unwrap()).unwrap();
assert_eq!("foo", foo_module.name());
assert_eq!(&src, foo_module.search_path().path());
assert_eq!(&foo_path, foo_module.file().path(&db));
assert_eq!(
Some(&foo_module),
path_to_module(&db, &VfsPath::FileSystem(foo_path)).as_ref()
);
// Resolving by directory doesn't resolve to the init file.
assert_eq!(None, path_to_module(&db, &VfsPath::FileSystem(foo_dir)));
Ok(())
}
#[test]
fn package_priority_over_module() -> anyhow::Result<()> {
let TestCase { db, src, .. } = create_resolver()?;
let foo_dir = src.join("foo");
let foo_init = foo_dir.join("__init__.py");
db.memory_file_system()
.write_file(&foo_init, "print('Hello, world!')")?;
let foo_py = src.join("foo.py");
db.memory_file_system()
.write_file(&foo_py, "print('Hello, world!')")?;
let foo_module = resolve_module(&db, ModuleName::new_static("foo").unwrap()).unwrap();
assert_eq!(&src, foo_module.search_path().path());
assert_eq!(&foo_init, foo_module.file().path(&db));
assert_eq!(ModuleKind::Package, foo_module.kind());
assert_eq!(
Some(foo_module),
path_to_module(&db, &VfsPath::FileSystem(foo_init))
);
assert_eq!(None, path_to_module(&db, &VfsPath::FileSystem(foo_py)));
Ok(())
}
#[test]
fn typing_stub_over_module() -> anyhow::Result<()> {
let TestCase { db, src, .. } = create_resolver()?;
let foo_stub = src.join("foo.pyi");
let foo_py = src.join("foo.py");
db.memory_file_system()
.write_files([(&foo_stub, "x: int"), (&foo_py, "print('Hello, world!')")])?;
let foo = resolve_module(&db, ModuleName::new_static("foo").unwrap()).unwrap();
assert_eq!(&src, foo.search_path().path());
assert_eq!(&foo_stub, foo.file().path(&db));
assert_eq!(
Some(foo),
path_to_module(&db, &VfsPath::FileSystem(foo_stub))
);
assert_eq!(None, path_to_module(&db, &VfsPath::FileSystem(foo_py)));
Ok(())
}
#[test]
fn sub_packages() -> anyhow::Result<()> {
let TestCase { db, src, .. } = create_resolver()?;
let foo = src.join("foo");
let bar = foo.join("bar");
let baz = bar.join("baz.py");
db.memory_file_system().write_files([
(&foo.join("__init__.py"), ""),
(&bar.join("__init__.py"), ""),
(&baz, "print('Hello, world!')"),
])?;
let baz_module =
resolve_module(&db, ModuleName::new_static("foo.bar.baz").unwrap()).unwrap();
assert_eq!(&src, baz_module.search_path().path());
assert_eq!(&baz, baz_module.file().path(&db));
assert_eq!(
Some(baz_module),
path_to_module(&db, &VfsPath::FileSystem(baz))
);
Ok(())
}
#[test]
fn namespace_package() -> anyhow::Result<()> {
let TestCase {
db,
src,
site_packages,
..
} = create_resolver()?;
// From [PEP420](https://peps.python.org/pep-0420/#nested-namespace-packages).
// But uses `src` for `project1` and `site_packages2` for `project2`.
// ```
// src
// parent
// child
// one.py
// site_packages
// parent
// child
// two.py
// ```
let parent1 = src.join("parent");
let child1 = parent1.join("child");
let one = child1.join("one.py");
let parent2 = site_packages.join("parent");
let child2 = parent2.join("child");
let two = child2.join("two.py");
db.memory_file_system().write_files([
(&one, "print('Hello, world!')"),
(&two, "print('Hello, world!')"),
])?;
let one_module =
resolve_module(&db, ModuleName::new_static("parent.child.one").unwrap()).unwrap();
assert_eq!(
Some(one_module),
path_to_module(&db, &VfsPath::FileSystem(one))
);
let two_module =
resolve_module(&db, ModuleName::new_static("parent.child.two").unwrap()).unwrap();
assert_eq!(
Some(two_module),
path_to_module(&db, &VfsPath::FileSystem(two))
);
Ok(())
}
#[test]
fn regular_package_in_namespace_package() -> anyhow::Result<()> {
let TestCase {
db,
src,
site_packages,
..
} = create_resolver()?;
// Adopted test case from the [PEP420 examples](https://peps.python.org/pep-0420/#nested-namespace-packages).
// The `src/parent/child` package is a regular package. Therefore, `site_packages/parent/child/two.py` should not be resolved.
// ```
// src
// parent
// child
// one.py
// site_packages
// parent
// child
// two.py
// ```
let parent1 = src.join("parent");
let child1 = parent1.join("child");
let one = child1.join("one.py");
let parent2 = site_packages.join("parent");
let child2 = parent2.join("child");
let two = child2.join("two.py");
db.memory_file_system().write_files([
(&child1.join("__init__.py"), "print('Hello, world!')"),
(&one, "print('Hello, world!')"),
(&two, "print('Hello, world!')"),
])?;
let one_module =
resolve_module(&db, ModuleName::new_static("parent.child.one").unwrap()).unwrap();
assert_eq!(
Some(one_module),
path_to_module(&db, &VfsPath::FileSystem(one))
);
assert_eq!(
None,
resolve_module(&db, ModuleName::new_static("parent.child.two").unwrap())
);
Ok(())
}
#[test]
fn module_search_path_priority() -> anyhow::Result<()> {
let TestCase {
db,
src,
site_packages,
..
} = create_resolver()?;
let foo_src = src.join("foo.py");
let foo_site_packages = site_packages.join("foo.py");
db.memory_file_system()
.write_files([(&foo_src, ""), (&foo_site_packages, "")])?;
let foo_module = resolve_module(&db, ModuleName::new_static("foo").unwrap()).unwrap();
assert_eq!(&src, foo_module.search_path().path());
assert_eq!(&foo_src, foo_module.file().path(&db));
assert_eq!(
Some(foo_module),
path_to_module(&db, &VfsPath::FileSystem(foo_src))
);
assert_eq!(
None,
path_to_module(&db, &VfsPath::FileSystem(foo_site_packages))
);
Ok(())
}
#[test]
#[cfg(target_family = "unix")]
fn symlink() -> anyhow::Result<()> {
let TestCase {
mut db,
src,
site_packages,
custom_typeshed,
} = create_resolver()?;
db.with_os_file_system();
let temp_dir = tempfile::tempdir()?;
let root = FileSystemPath::from_std_path(temp_dir.path()).unwrap();
let src = root.join(src);
let site_packages = root.join(site_packages);
let custom_typeshed = root.join(custom_typeshed);
let foo = src.join("foo.py");
let bar = src.join("bar.py");
std::fs::create_dir_all(src.as_std_path())?;
std::fs::create_dir_all(site_packages.as_std_path())?;
std::fs::create_dir_all(custom_typeshed.as_std_path())?;
std::fs::write(foo.as_std_path(), "")?;
std::os::unix::fs::symlink(foo.as_std_path(), bar.as_std_path())?;
let settings = ModuleResolutionSettings {
extra_paths: vec![],
workspace_root: src.clone(),
site_packages: Some(site_packages),
custom_typeshed: Some(custom_typeshed),
};
set_module_resolution_settings(&mut db, settings);
let foo_module = resolve_module(&db, ModuleName::new_static("foo").unwrap()).unwrap();
let bar_module = resolve_module(&db, ModuleName::new_static("bar").unwrap()).unwrap();
assert_ne!(foo_module, bar_module);
assert_eq!(&src, foo_module.search_path().path());
assert_eq!(&foo, foo_module.file().path(&db));
// `foo` and `bar` shouldn't resolve to the same file
assert_eq!(&src, bar_module.search_path().path());
assert_eq!(&bar, bar_module.file().path(&db));
assert_eq!(&foo, foo_module.file().path(&db));
assert_ne!(&foo_module, &bar_module);
assert_eq!(
Some(foo_module),
path_to_module(&db, &VfsPath::FileSystem(foo))
);
assert_eq!(
Some(bar_module),
path_to_module(&db, &VfsPath::FileSystem(bar))
);
Ok(())
}
#[test]
fn deleting_an_unrealted_file_doesnt_change_module_resolution() -> anyhow::Result<()> {
let TestCase { mut db, src, .. } = create_resolver()?;
let foo_path = src.join("foo.py");
let bar_path = src.join("bar.py");
db.memory_file_system()
.write_files([(&foo_path, "x = 1"), (&bar_path, "y = 2")])?;
let foo_module_name = ModuleName::new_static("foo").unwrap();
let foo_module = resolve_module(&db, foo_module_name.clone()).unwrap();
let bar = system_path_to_file(&db, &bar_path).expect("bar.py to exist");
db.clear_salsa_events();
// Delete `bar.py`
db.memory_file_system().remove_file(&bar_path)?;
bar.touch(&mut db);
// Re-query the foo module. The foo module should still be cached because `bar.py` isn't relevant
// for resolving `foo`.
let foo_module2 = resolve_module(&db, foo_module_name);
assert!(!db
.take_salsa_events()
.iter()
.any(|event| { matches!(event.kind, salsa::EventKind::WillExecute { .. }) }));
assert_eq!(Some(foo_module), foo_module2);
Ok(())
}
#[test]
fn adding_a_file_on_which_the_module_resolution_depends_on_invalidates_the_query(
) -> anyhow::Result<()> {
let TestCase { mut db, src, .. } = create_resolver()?;
let foo_path = src.join("foo.py");
let foo_module_name = ModuleName::new_static("foo").unwrap();
assert_eq!(resolve_module(&db, foo_module_name.clone()), None);
// Now write the foo file
db.memory_file_system().write_file(&foo_path, "x = 1")?;
VfsFile::touch_path(&mut db, &VfsPath::FileSystem(foo_path.clone()));
let foo_file = system_path_to_file(&db, &foo_path).expect("foo.py to exist");
let foo_module = resolve_module(&db, foo_module_name).expect("Foo module to resolve");
assert_eq!(foo_file, foo_module.file());
Ok(())
}
#[test]
fn removing_a_file_that_the_module_resolution_depends_on_invalidates_the_query(
) -> anyhow::Result<()> {
let TestCase { mut db, src, .. } = create_resolver()?;
let foo_path = src.join("foo.py");
let foo_init_path = src.join("foo/__init__.py");
db.memory_file_system()
.write_files([(&foo_path, "x = 1"), (&foo_init_path, "x = 2")])?;
let foo_module_name = ModuleName::new_static("foo").unwrap();
let foo_module = resolve_module(&db, foo_module_name.clone()).expect("foo module to exist");
assert_eq!(&foo_init_path, foo_module.file().path(&db));
// Delete `foo/__init__.py` and the `foo` folder. `foo` should now resolve to `foo.py`
db.memory_file_system().remove_file(&foo_init_path)?;
db.memory_file_system()
.remove_directory(foo_init_path.parent().unwrap())?;
VfsFile::touch_path(&mut db, &VfsPath::FileSystem(foo_init_path.clone()));
let foo_module = resolve_module(&db, foo_module_name).expect("Foo module to resolve");
assert_eq!(&foo_path, foo_module.file().path(&db));
Ok(())
}
}

View File

@@ -0,0 +1,198 @@
use std::fmt;
use std::ops::Deref;
use compact_str::{CompactString, ToCompactString};
use ruff_python_stdlib::identifiers::is_identifier;
/// A module name, e.g. `foo.bar`.
///
/// Always normalized to the absolute form (never a relative module name, i.e., never `.foo`).
#[derive(Clone, Debug, Eq, PartialEq, Hash, PartialOrd, Ord)]
pub struct ModuleName(compact_str::CompactString);
impl ModuleName {
/// Creates a new module name for `name`. Returns `Some` if `name` is a valid, absolute
/// module name and `None` otherwise.
///
/// The module name is invalid if:
///
/// * The name is empty
/// * The name is relative
/// * The name ends with a `.`
/// * The name contains a sequence of multiple dots
/// * A component of a name (the part between two dots) isn't a valid python identifier.
#[inline]
#[must_use]
pub fn new(name: &str) -> Option<Self> {
Self::is_valid_name(name).then(|| Self(CompactString::from(name)))
}
/// Creates a new module name for `name` where `name` is a static string.
/// Returns `Some` if `name` is a valid, absolute module name and `None` otherwise.
///
/// The module name is invalid if:
///
/// * The name is empty
/// * The name is relative
/// * The name ends with a `.`
/// * The name contains a sequence of multiple dots
/// * A component of a name (the part between two dots) isn't a valid python identifier.
///
/// ## Examples
///
/// ```
/// use red_knot_python_semantic::ModuleName;
///
/// assert_eq!(ModuleName::new_static("foo.bar").as_deref(), Some("foo.bar"));
/// assert_eq!(ModuleName::new_static(""), None);
/// assert_eq!(ModuleName::new_static("..foo"), None);
/// assert_eq!(ModuleName::new_static(".foo"), None);
/// assert_eq!(ModuleName::new_static("foo."), None);
/// assert_eq!(ModuleName::new_static("foo..bar"), None);
/// assert_eq!(ModuleName::new_static("2000"), None);
/// ```
#[inline]
#[must_use]
pub fn new_static(name: &'static str) -> Option<Self> {
Self::is_valid_name(name).then(|| Self(CompactString::const_new(name)))
}
#[must_use]
fn is_valid_name(name: &str) -> bool {
!name.is_empty() && name.split('.').all(is_identifier)
}
/// An iterator over the components of the module name:
///
/// # Examples
///
/// ```
/// use red_knot_python_semantic::ModuleName;
///
/// assert_eq!(ModuleName::new_static("foo.bar.baz").unwrap().components().collect::<Vec<_>>(), vec!["foo", "bar", "baz"]);
/// ```
#[must_use]
pub fn components(&self) -> impl DoubleEndedIterator<Item = &str> {
self.0.split('.')
}
/// The name of this module's immediate parent, if it has a parent.
///
/// # Examples
///
/// ```
/// use red_knot_python_semantic::ModuleName;
///
/// assert_eq!(ModuleName::new_static("foo.bar").unwrap().parent(), Some(ModuleName::new_static("foo").unwrap()));
/// assert_eq!(ModuleName::new_static("foo.bar.baz").unwrap().parent(), Some(ModuleName::new_static("foo.bar").unwrap()));
/// assert_eq!(ModuleName::new_static("root").unwrap().parent(), None);
/// ```
#[must_use]
pub fn parent(&self) -> Option<ModuleName> {
let (parent, _) = self.0.rsplit_once('.')?;
Some(Self(parent.to_compact_string()))
}
/// Returns `true` if the name starts with `other`.
///
/// This is equivalent to checking if `self` is a sub-module of `other`.
///
/// # Examples
///
/// ```
/// use red_knot_python_semantic::ModuleName;
///
/// assert!(ModuleName::new_static("foo.bar").unwrap().starts_with(&ModuleName::new_static("foo").unwrap()));
///
/// assert!(!ModuleName::new_static("foo.bar").unwrap().starts_with(&ModuleName::new_static("bar").unwrap()));
/// assert!(!ModuleName::new_static("foo_bar").unwrap().starts_with(&ModuleName::new_static("foo").unwrap()));
/// ```
#[must_use]
pub fn starts_with(&self, other: &ModuleName) -> bool {
let mut self_components = self.components();
let other_components = other.components();
for other_component in other_components {
if self_components.next() != Some(other_component) {
return false;
}
}
true
}
#[must_use]
#[inline]
pub fn as_str(&self) -> &str {
&self.0
}
/// Construct a [`ModuleName`] from a sequence of parts.
///
/// # Examples
///
/// ```
/// use red_knot_python_semantic::ModuleName;
///
/// assert_eq!(&*ModuleName::from_components(["a"]).unwrap(), "a");
/// assert_eq!(&*ModuleName::from_components(["a", "b"]).unwrap(), "a.b");
/// assert_eq!(&*ModuleName::from_components(["a", "b", "c"]).unwrap(), "a.b.c");
///
/// assert_eq!(ModuleName::from_components(["a-b"]), None);
/// assert_eq!(ModuleName::from_components(["a", "a-b"]), None);
/// assert_eq!(ModuleName::from_components(["a", "b", "a-b-c"]), None);
/// ```
#[must_use]
pub fn from_components<'a>(components: impl IntoIterator<Item = &'a str>) -> Option<Self> {
let mut components = components.into_iter();
let first_part = components.next()?;
if !is_identifier(first_part) {
return None;
}
let name = if let Some(second_part) = components.next() {
if !is_identifier(second_part) {
return None;
}
let mut name = format!("{first_part}.{second_part}");
for part in components {
if !is_identifier(part) {
return None;
}
name.push('.');
name.push_str(part);
}
CompactString::from(&name)
} else {
CompactString::from(first_part)
};
Some(Self(name))
}
}
impl Deref for ModuleName {
type Target = str;
#[inline]
fn deref(&self) -> &Self::Target {
self.as_str()
}
}
impl PartialEq<str> for ModuleName {
fn eq(&self, other: &str) -> bool {
self.as_str() == other
}
}
impl PartialEq<ModuleName> for str {
fn eq(&self, other: &ModuleName) -> bool {
self == other.as_str()
}
}
impl std::fmt::Display for ModuleName {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> std::fmt::Result {
f.write_str(&self.0)
}
}

View File

@@ -0,0 +1,45 @@
use std::iter::FusedIterator;
pub(crate) use module::Module;
pub use resolver::resolve_module;
use ruff_db::system::SystemPath;
pub use typeshed::vendored_typeshed_stubs;
use crate::Db;
use resolver::{module_resolution_settings, SearchPathIterator};
mod module;
mod path;
mod resolver;
mod state;
mod typeshed;
#[cfg(test)]
mod testing;
/// Returns an iterator over all search paths pointing to a system path
pub fn system_module_search_paths(db: &dyn Db) -> SystemModuleSearchPathsIter {
SystemModuleSearchPathsIter {
inner: module_resolution_settings(db).search_paths(db),
}
}
pub struct SystemModuleSearchPathsIter<'db> {
inner: SearchPathIterator<'db>,
}
impl<'db> Iterator for SystemModuleSearchPathsIter<'db> {
type Item = &'db SystemPath;
fn next(&mut self) -> Option<Self::Item> {
loop {
let next = self.inner.next()?;
if let Some(system_path) = next.as_system_path() {
return Some(system_path);
}
}
}
}
impl FusedIterator for SystemModuleSearchPathsIter<'_> {}

View File

@@ -0,0 +1,79 @@
use std::fmt::Formatter;
use std::sync::Arc;
use ruff_db::files::File;
use super::path::SearchPath;
use crate::module_name::ModuleName;
/// Representation of a Python module.
#[derive(Clone, PartialEq, Eq)]
pub struct Module {
inner: Arc<ModuleInner>,
}
impl Module {
pub(crate) fn new(
name: ModuleName,
kind: ModuleKind,
search_path: SearchPath,
file: File,
) -> Self {
Self {
inner: Arc::new(ModuleInner {
name,
kind,
search_path,
file,
}),
}
}
/// The absolute name of the module (e.g. `foo.bar`)
pub fn name(&self) -> &ModuleName {
&self.inner.name
}
/// The file to the source code that defines this module
pub fn file(&self) -> File {
self.inner.file
}
/// The search path from which the module was resolved.
pub(crate) fn search_path(&self) -> &SearchPath {
&self.inner.search_path
}
/// Determine whether this module is a single-file module or a package
pub fn kind(&self) -> ModuleKind {
self.inner.kind
}
}
impl std::fmt::Debug for Module {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
f.debug_struct("Module")
.field("name", &self.name())
.field("kind", &self.kind())
.field("file", &self.file())
.field("search_path", &self.search_path())
.finish()
}
}
#[derive(PartialEq, Eq)]
struct ModuleInner {
name: ModuleName,
kind: ModuleKind,
search_path: SearchPath,
file: File,
}
#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)]
pub enum ModuleKind {
/// A single-file module (e.g. `foo.py` or `foo.pyi`)
Module,
/// A python package (`foo/__init__.py` or `foo/__init__.pyi`)
Package,
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,25 @@
use ruff_db::vendored::VendoredFileSystem;
use super::typeshed::LazyTypeshedVersions;
use crate::db::Db;
use crate::python_version::PythonVersion;
pub(crate) struct ResolverState<'db> {
pub(crate) db: &'db dyn Db,
pub(crate) typeshed_versions: LazyTypeshedVersions<'db>,
pub(crate) target_version: PythonVersion,
}
impl<'db> ResolverState<'db> {
pub(crate) fn new(db: &'db dyn Db, target_version: PythonVersion) -> Self {
Self {
db,
typeshed_versions: LazyTypeshedVersions::new(),
target_version,
}
}
pub(crate) fn vendored(&self) -> &VendoredFileSystem {
self.db.vendored()
}
}

View File

@@ -0,0 +1,295 @@
use ruff_db::system::{DbWithTestSystem, SystemPath, SystemPathBuf};
use ruff_db::vendored::VendoredPathBuf;
use crate::db::tests::TestDb;
use crate::program::{Program, SearchPathSettings};
use crate::python_version::PythonVersion;
/// A test case for the module resolver.
///
/// You generally shouldn't construct instances of this struct directly;
/// instead, use the [`TestCaseBuilder`].
pub(crate) struct TestCase<T> {
pub(crate) db: TestDb,
pub(crate) src: SystemPathBuf,
pub(crate) stdlib: T,
// Most test cases only ever need a single `site-packages` directory,
// so this is a single directory instead of a `Vec` of directories,
// like it is in `ruff_db::Program`.
pub(crate) site_packages: SystemPathBuf,
pub(crate) target_version: PythonVersion,
}
/// A `(file_name, file_contents)` tuple
pub(crate) type FileSpec = (&'static str, &'static str);
/// Specification for a typeshed mock to be created as part of a test
#[derive(Debug, Clone, Copy, Default)]
pub(crate) struct MockedTypeshed {
/// The stdlib files to be created in the typeshed mock
pub(crate) stdlib_files: &'static [FileSpec],
/// The contents of the `stdlib/VERSIONS` file
/// to be created in the typeshed mock
pub(crate) versions: &'static str,
}
#[derive(Debug)]
pub(crate) struct VendoredTypeshed;
#[derive(Debug)]
pub(crate) struct UnspecifiedTypeshed;
/// A builder for a module-resolver test case.
///
/// The builder takes care of creating a [`TestDb`]
/// instance, applying the module resolver settings,
/// and creating mock directories for the stdlib, `site-packages`,
/// first-party code, etc.
///
/// For simple tests that do not involve typeshed,
/// test cases can be created as follows:
///
/// ```rs
/// let test_case = TestCaseBuilder::new()
/// .with_src_files(...)
/// .build();
///
/// let test_case2 = TestCaseBuilder::new()
/// .with_site_packages_files(...)
/// .build();
/// ```
///
/// Any tests can specify the target Python version that should be used
/// in the module resolver settings:
///
/// ```rs
/// let test_case = TestCaseBuilder::new()
/// .with_src_files(...)
/// .with_target_version(...)
/// .build();
/// ```
///
/// For tests checking that standard-library module resolution is working
/// correctly, you should usually create a [`MockedTypeshed`] instance
/// and pass it to the [`TestCaseBuilder::with_custom_typeshed`] method.
/// If you need to check something that involves the vendored typeshed stubs
/// we include as part of the binary, you can instead use the
/// [`TestCaseBuilder::with_vendored_typeshed`] method.
/// For either of these, you should almost always try to be explicit
/// about the Python version you want to be specified in the module-resolver
/// settings for the test:
///
/// ```rs
/// const TYPESHED = MockedTypeshed { ... };
///
/// let test_case = resolver_test_case()
/// .with_custom_typeshed(TYPESHED)
/// .with_target_version(...)
/// .build();
///
/// let test_case2 = resolver_test_case()
/// .with_vendored_typeshed()
/// .with_target_version(...)
/// .build();
/// ```
///
/// If you have not called one of those options, the `stdlib` field
/// on the [`TestCase`] instance created from `.build()` will be set
/// to `()`.
pub(crate) struct TestCaseBuilder<T> {
typeshed_option: T,
target_version: PythonVersion,
first_party_files: Vec<FileSpec>,
site_packages_files: Vec<FileSpec>,
}
impl<T> TestCaseBuilder<T> {
/// Specify files to be created in the `src` mock directory
pub(crate) fn with_src_files(mut self, files: &[FileSpec]) -> Self {
self.first_party_files.extend(files.iter().copied());
self
}
/// Specify files to be created in the `site-packages` mock directory
pub(crate) fn with_site_packages_files(mut self, files: &[FileSpec]) -> Self {
self.site_packages_files.extend(files.iter().copied());
self
}
/// Specify the target Python version the module resolver should assume
pub(crate) fn with_target_version(mut self, target_version: PythonVersion) -> Self {
self.target_version = target_version;
self
}
fn write_mock_directory(
db: &mut TestDb,
location: impl AsRef<SystemPath>,
files: impl IntoIterator<Item = FileSpec>,
) -> SystemPathBuf {
let root = location.as_ref().to_path_buf();
// Make sure to create the directory even if the list of files is empty:
db.memory_file_system().create_directory_all(&root).unwrap();
db.write_files(
files
.into_iter()
.map(|(relative_path, contents)| (root.join(relative_path), contents)),
)
.unwrap();
root
}
}
impl TestCaseBuilder<UnspecifiedTypeshed> {
pub(crate) fn new() -> TestCaseBuilder<UnspecifiedTypeshed> {
Self {
typeshed_option: UnspecifiedTypeshed,
target_version: PythonVersion::default(),
first_party_files: vec![],
site_packages_files: vec![],
}
}
/// Use the vendored stdlib stubs included in the Ruff binary for this test case
pub(crate) fn with_vendored_typeshed(self) -> TestCaseBuilder<VendoredTypeshed> {
let TestCaseBuilder {
typeshed_option: _,
target_version,
first_party_files,
site_packages_files,
} = self;
TestCaseBuilder {
typeshed_option: VendoredTypeshed,
target_version,
first_party_files,
site_packages_files,
}
}
/// Use a mock typeshed directory for this test case
pub(crate) fn with_custom_typeshed(
self,
typeshed: MockedTypeshed,
) -> TestCaseBuilder<MockedTypeshed> {
let TestCaseBuilder {
typeshed_option: _,
target_version,
first_party_files,
site_packages_files,
} = self;
TestCaseBuilder {
typeshed_option: typeshed,
target_version,
first_party_files,
site_packages_files,
}
}
pub(crate) fn build(self) -> TestCase<()> {
let TestCase {
db,
src,
stdlib: _,
site_packages,
target_version,
} = self.with_custom_typeshed(MockedTypeshed::default()).build();
TestCase {
db,
src,
stdlib: (),
site_packages,
target_version,
}
}
}
impl TestCaseBuilder<MockedTypeshed> {
pub(crate) fn build(self) -> TestCase<SystemPathBuf> {
let TestCaseBuilder {
typeshed_option,
target_version,
first_party_files,
site_packages_files,
} = self;
let mut db = TestDb::new();
let site_packages =
Self::write_mock_directory(&mut db, "/site-packages", site_packages_files);
let src = Self::write_mock_directory(&mut db, "/src", first_party_files);
let typeshed = Self::build_typeshed_mock(&mut db, &typeshed_option);
Program::new(
&db,
target_version,
SearchPathSettings {
extra_paths: vec![],
src_root: src.clone(),
custom_typeshed: Some(typeshed.clone()),
site_packages: vec![site_packages.clone()],
},
);
TestCase {
db,
src,
stdlib: typeshed.join("stdlib"),
site_packages,
target_version,
}
}
fn build_typeshed_mock(db: &mut TestDb, typeshed_to_build: &MockedTypeshed) -> SystemPathBuf {
let typeshed = SystemPathBuf::from("/typeshed");
let MockedTypeshed {
stdlib_files,
versions,
} = typeshed_to_build;
Self::write_mock_directory(
db,
typeshed.join("stdlib"),
stdlib_files
.iter()
.copied()
.chain(std::iter::once(("VERSIONS", *versions))),
);
typeshed
}
}
impl TestCaseBuilder<VendoredTypeshed> {
pub(crate) fn build(self) -> TestCase<VendoredPathBuf> {
let TestCaseBuilder {
typeshed_option: VendoredTypeshed,
target_version,
first_party_files,
site_packages_files,
} = self;
let mut db = TestDb::new();
let site_packages =
Self::write_mock_directory(&mut db, "/site-packages", site_packages_files);
let src = Self::write_mock_directory(&mut db, "/src", first_party_files);
Program::new(
&db,
target_version,
SearchPathSettings {
extra_paths: vec![],
src_root: src.clone(),
custom_typeshed: None,
site_packages: vec![site_packages.clone()],
},
);
TestCase {
db,
src,
stdlib: VendoredPathBuf::from("stdlib"),
site_packages,
target_version,
}
}
}

View File

@@ -0,0 +1,8 @@
pub use self::vendored::vendored_typeshed_stubs;
pub(super) use self::versions::{
parse_typeshed_versions, LazyTypeshedVersions, TypeshedVersionsParseError,
TypeshedVersionsQueryResult,
};
mod vendored;
mod versions;

View File

@@ -0,0 +1,99 @@
use once_cell::sync::Lazy;
use ruff_db::vendored::VendoredFileSystem;
// The file path here is hardcoded in this crate's `build.rs` script.
// Luckily this crate will fail to build if this file isn't available at build time.
static TYPESHED_ZIP_BYTES: &[u8] = include_bytes!(concat!(env!("OUT_DIR"), "/zipped_typeshed.zip"));
pub fn vendored_typeshed_stubs() -> &'static VendoredFileSystem {
static VENDORED_TYPESHED_STUBS: Lazy<VendoredFileSystem> =
Lazy::new(|| VendoredFileSystem::new_static(TYPESHED_ZIP_BYTES).unwrap());
&VENDORED_TYPESHED_STUBS
}
#[cfg(test)]
mod tests {
use std::io::{self, Read};
use std::path::Path;
use ruff_db::vendored::VendoredPath;
use super::*;
#[test]
fn typeshed_zip_created_at_build_time() {
let mut typeshed_zip_archive =
zip::ZipArchive::new(io::Cursor::new(TYPESHED_ZIP_BYTES)).unwrap();
let mut functools_module_stub = typeshed_zip_archive
.by_name("stdlib/functools.pyi")
.unwrap();
assert!(functools_module_stub.is_file());
let mut functools_module_stub_source = String::new();
functools_module_stub
.read_to_string(&mut functools_module_stub_source)
.unwrap();
assert!(functools_module_stub_source.contains("def update_wrapper("));
}
#[test]
fn typeshed_vfs_consistent_with_vendored_stubs() {
let vendored_typeshed_dir = Path::new("vendor/typeshed").canonicalize().unwrap();
let vendored_typeshed_stubs = vendored_typeshed_stubs();
let mut empty_iterator = true;
for entry in walkdir::WalkDir::new(&vendored_typeshed_dir).min_depth(1) {
empty_iterator = false;
let entry = entry.unwrap();
let absolute_path = entry.path();
let file_type = entry.file_type();
let relative_path = absolute_path
.strip_prefix(&vendored_typeshed_dir)
.unwrap_or_else(|_| {
panic!("Expected {absolute_path:?} to be a child of {vendored_typeshed_dir:?}")
});
let vendored_path = <&VendoredPath>::try_from(relative_path)
.unwrap_or_else(|_| panic!("Expected {relative_path:?} to be valid UTF-8"));
assert!(
vendored_typeshed_stubs.exists(vendored_path),
"Expected {vendored_path:?} to exist in the `VendoredFileSystem`!
Vendored file system:
{vendored_typeshed_stubs:#?}
"
);
let vendored_path_kind = vendored_typeshed_stubs
.metadata(vendored_path)
.unwrap_or_else(|_| {
panic!(
"Expected metadata for {vendored_path:?} to be retrievable from the `VendoredFileSystem!
Vendored file system:
{vendored_typeshed_stubs:#?}
"
)
})
.kind();
assert_eq!(
vendored_path_kind.is_directory(),
file_type.is_dir(),
"{vendored_path:?} had type {vendored_path_kind:?}, inconsistent with fs path {relative_path:?}: {file_type:?}"
);
}
assert!(
!empty_iterator,
"Expected there to be at least one file or directory in the vendored typeshed stubs!"
);
}
}

View File

@@ -0,0 +1,766 @@
use std::cell::OnceCell;
use std::collections::BTreeMap;
use std::fmt;
use std::num::{NonZeroU16, NonZeroUsize};
use std::ops::{RangeFrom, RangeInclusive};
use std::str::FromStr;
use once_cell::sync::Lazy;
use ruff_db::system::SystemPath;
use rustc_hash::FxHashMap;
use ruff_db::files::{system_path_to_file, File};
use super::vendored::vendored_typeshed_stubs;
use crate::db::Db;
use crate::module_name::ModuleName;
use crate::python_version::PythonVersion;
#[derive(Debug)]
pub(crate) struct LazyTypeshedVersions<'db>(OnceCell<&'db TypeshedVersions>);
impl<'db> LazyTypeshedVersions<'db> {
#[must_use]
pub(crate) fn new() -> Self {
Self(OnceCell::new())
}
/// Query whether a module exists at runtime in the stdlib on a certain Python version.
///
/// Simply probing whether a file exists in typeshed is insufficient for this question,
/// as a module in the stdlib may have been added in Python 3.10, but the typeshed stub
/// will still be available (either in a custom typeshed dir or in our vendored copy)
/// even if the user specified Python 3.8 as the target version.
///
/// For top-level modules and packages, the VERSIONS file can always provide an unambiguous answer
/// as to whether the module exists on the specified target version. However, VERSIONS does not
/// provide comprehensive information on all submodules, meaning that this method sometimes
/// returns [`TypeshedVersionsQueryResult::MaybeExists`].
/// See [`TypeshedVersionsQueryResult`] for more details.
#[must_use]
pub(crate) fn query_module(
&self,
db: &'db dyn Db,
module: &ModuleName,
stdlib_root: Option<&SystemPath>,
target_version: PythonVersion,
) -> TypeshedVersionsQueryResult {
let versions = self.0.get_or_init(|| {
let versions_path = if let Some(system_path) = stdlib_root {
system_path.join("VERSIONS")
} else {
return &VENDORED_VERSIONS;
};
let Ok(versions_file) = system_path_to_file(db.upcast(), &versions_path) else {
todo!(
"Still need to figure out how to handle VERSIONS files being deleted \
from custom typeshed directories! Expected a file to exist at {versions_path}"
)
};
// TODO(Alex/Micha): If VERSIONS is invalid,
// this should invalidate not just the specific module resolution we're currently attempting,
// but all type inference that depends on any standard-library types.
// Unwrapping here is not correct...
parse_typeshed_versions(db, versions_file).as_ref().unwrap()
});
versions.query_module(module, target_version)
}
}
#[salsa::tracked(return_ref)]
pub(crate) fn parse_typeshed_versions(
db: &dyn Db,
versions_file: File,
) -> Result<TypeshedVersions, TypeshedVersionsParseError> {
// TODO: Handle IO errors
let file_content = versions_file
.read_to_string(db.upcast())
.unwrap_or_default();
file_content.parse()
}
static VENDORED_VERSIONS: Lazy<TypeshedVersions> = Lazy::new(|| {
TypeshedVersions::from_str(
&vendored_typeshed_stubs()
.read_to_string("stdlib/VERSIONS")
.unwrap(),
)
.unwrap()
});
#[derive(Debug, PartialEq, Eq, Clone)]
pub(crate) struct TypeshedVersionsParseError {
line_number: Option<NonZeroU16>,
reason: TypeshedVersionsParseErrorKind,
}
impl fmt::Display for TypeshedVersionsParseError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let TypeshedVersionsParseError {
line_number,
reason,
} = self;
if let Some(line_number) = line_number {
write!(
f,
"Error while parsing line {line_number} of typeshed's VERSIONS file: {reason}"
)
} else {
write!(f, "Error while parsing typeshed's VERSIONS file: {reason}")
}
}
}
impl std::error::Error for TypeshedVersionsParseError {
fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
if let TypeshedVersionsParseErrorKind::IntegerParsingFailure { err, .. } = &self.reason {
Some(err)
} else {
None
}
}
}
#[derive(Debug, PartialEq, Eq, Clone)]
pub(super) enum TypeshedVersionsParseErrorKind {
TooManyLines(NonZeroUsize),
UnexpectedNumberOfColons,
InvalidModuleName(String),
UnexpectedNumberOfHyphens,
UnexpectedNumberOfPeriods(String),
IntegerParsingFailure {
version: String,
err: std::num::ParseIntError,
},
}
impl fmt::Display for TypeshedVersionsParseErrorKind {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::TooManyLines(num_lines) => write!(
f,
"File has too many lines ({num_lines}); maximum allowed is {}",
NonZeroU16::MAX
),
Self::UnexpectedNumberOfColons => {
f.write_str("Expected every non-comment line to have exactly one colon")
}
Self::InvalidModuleName(name) => write!(
f,
"Expected all components of '{name}' to be valid Python identifiers"
),
Self::UnexpectedNumberOfHyphens => {
f.write_str("Expected every non-comment line to have exactly one '-' character")
}
Self::UnexpectedNumberOfPeriods(format) => write!(
f,
"Expected all versions to be in the form {{MAJOR}}.{{MINOR}}; got '{format}'"
),
Self::IntegerParsingFailure { version, err } => write!(
f,
"Failed to convert '{version}' to a pair of integers due to {err}",
),
}
}
}
#[derive(Debug, PartialEq, Eq)]
pub(crate) struct TypeshedVersions(FxHashMap<ModuleName, PyVersionRange>);
impl TypeshedVersions {
#[must_use]
fn exact(&self, module_name: &ModuleName) -> Option<&PyVersionRange> {
self.0.get(module_name)
}
#[must_use]
fn query_module(
&self,
module: &ModuleName,
target_version: PythonVersion,
) -> TypeshedVersionsQueryResult {
if let Some(range) = self.exact(module) {
if range.contains(target_version) {
TypeshedVersionsQueryResult::Exists
} else {
TypeshedVersionsQueryResult::DoesNotExist
}
} else {
let mut module = module.parent();
while let Some(module_to_try) = module {
if let Some(range) = self.exact(&module_to_try) {
return {
if range.contains(target_version) {
TypeshedVersionsQueryResult::MaybeExists
} else {
TypeshedVersionsQueryResult::DoesNotExist
}
};
}
module = module_to_try.parent();
}
TypeshedVersionsQueryResult::DoesNotExist
}
}
}
/// Possible answers [`LazyTypeshedVersions::query_module()`] could give to the question:
/// "Does this module exist in the stdlib at runtime on a certain target version?"
#[derive(Debug, Copy, PartialEq, Eq, Clone, Hash)]
pub(crate) enum TypeshedVersionsQueryResult {
/// The module definitely exists in the stdlib at runtime on the user-specified target version.
///
/// For example:
/// - The target version is Python 3.8
/// - We're querying whether the `asyncio.tasks` module exists in the stdlib
/// - The VERSIONS file contains the line `asyncio.tasks: 3.8-`
Exists,
/// The module definitely does not exist in the stdlib on the user-specified target version.
///
/// For example:
/// - We're querying whether the `foo` module exists in the stdlib
/// - There is no top-level `foo` module in VERSIONS
///
/// OR:
/// - The target version is Python 3.8
/// - We're querying whether the module `importlib.abc` exists in the stdlib
/// - The VERSIONS file contains the line `importlib.abc: 3.10-`,
/// indicating that the module was added in 3.10
///
/// OR:
/// - The target version is Python 3.8
/// - We're querying whether the module `collections.abc` exists in the stdlib
/// - The VERSIONS file does not contain any information about the `collections.abc` submodule,
/// but *does* contain the line `collections: 3.10-`,
/// indicating that the entire `collections` package was added in Python 3.10.
DoesNotExist,
/// The module potentially exists in the stdlib and, if it does,
/// it definitely exists on the user-specified target version.
///
/// This variant is only relevant for submodules,
/// for which the typeshed VERSIONS file does not provide comprehensive information.
/// (The VERSIONS file is guaranteed to provide information about all top-level stdlib modules and packages,
/// but not necessarily about all submodules within each top-level package.)
///
/// For example:
/// - The target version is Python 3.8
/// - We're querying whether the `asyncio.staggered` module exists in the stdlib
/// - The typeshed VERSIONS file contains the line `asyncio: 3.8`,
/// indicating that the `asyncio` package was added in Python 3.8,
/// but does not contain any explicit information about the `asyncio.staggered` submodule.
MaybeExists,
}
impl FromStr for TypeshedVersions {
type Err = TypeshedVersionsParseError;
fn from_str(s: &str) -> Result<Self, Self::Err> {
let mut map = FxHashMap::default();
for (line_index, line) in s.lines().enumerate() {
// humans expect line numbers to be 1-indexed
let line_number = NonZeroUsize::new(line_index.saturating_add(1)).unwrap();
let Ok(line_number) = NonZeroU16::try_from(line_number) else {
return Err(TypeshedVersionsParseError {
line_number: None,
reason: TypeshedVersionsParseErrorKind::TooManyLines(line_number),
});
};
let Some(content) = line.split('#').map(str::trim).next() else {
continue;
};
if content.is_empty() {
continue;
}
let mut parts = content.split(':').map(str::trim);
let (Some(module_name), Some(rest), None) = (parts.next(), parts.next(), parts.next())
else {
return Err(TypeshedVersionsParseError {
line_number: Some(line_number),
reason: TypeshedVersionsParseErrorKind::UnexpectedNumberOfColons,
});
};
let Some(module_name) = ModuleName::new(module_name) else {
return Err(TypeshedVersionsParseError {
line_number: Some(line_number),
reason: TypeshedVersionsParseErrorKind::InvalidModuleName(
module_name.to_string(),
),
});
};
match PyVersionRange::from_str(rest) {
Ok(version) => map.insert(module_name, version),
Err(reason) => {
return Err(TypeshedVersionsParseError {
line_number: Some(line_number),
reason,
})
}
};
}
Ok(Self(map))
}
}
impl fmt::Display for TypeshedVersions {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let sorted_items: BTreeMap<&ModuleName, &PyVersionRange> = self.0.iter().collect();
for (module_name, range) in sorted_items {
writeln!(f, "{module_name}: {range}")?;
}
Ok(())
}
}
#[derive(Debug, Clone, Eq, PartialEq, Hash)]
enum PyVersionRange {
AvailableFrom(RangeFrom<PythonVersion>),
AvailableWithin(RangeInclusive<PythonVersion>),
}
impl PyVersionRange {
#[must_use]
fn contains(&self, version: PythonVersion) -> bool {
match self {
Self::AvailableFrom(inner) => inner.contains(&version),
Self::AvailableWithin(inner) => inner.contains(&version),
}
}
}
impl FromStr for PyVersionRange {
type Err = TypeshedVersionsParseErrorKind;
fn from_str(s: &str) -> Result<Self, Self::Err> {
let mut parts = s.split('-').map(str::trim);
match (parts.next(), parts.next(), parts.next()) {
(Some(lower), Some(""), None) => {
let lower = PythonVersion::from_versions_file_string(lower)?;
Ok(Self::AvailableFrom(lower..))
}
(Some(lower), Some(upper), None) => {
let lower = PythonVersion::from_versions_file_string(lower)?;
let upper = PythonVersion::from_versions_file_string(upper)?;
Ok(Self::AvailableWithin(lower..=upper))
}
_ => Err(TypeshedVersionsParseErrorKind::UnexpectedNumberOfHyphens),
}
}
}
impl fmt::Display for PyVersionRange {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::AvailableFrom(range_from) => write!(f, "{}-", range_from.start),
Self::AvailableWithin(range_inclusive) => {
write!(f, "{}-{}", range_inclusive.start(), range_inclusive.end())
}
}
}
}
impl PythonVersion {
fn from_versions_file_string(s: &str) -> Result<Self, TypeshedVersionsParseErrorKind> {
let mut parts = s.split('.').map(str::trim);
let (Some(major), Some(minor), None) = (parts.next(), parts.next(), parts.next()) else {
return Err(TypeshedVersionsParseErrorKind::UnexpectedNumberOfPeriods(
s.to_string(),
));
};
PythonVersion::try_from((major, minor)).map_err(|int_parse_error| {
TypeshedVersionsParseErrorKind::IntegerParsingFailure {
version: s.to_string(),
err: int_parse_error,
}
})
}
}
#[cfg(test)]
mod tests {
use std::num::{IntErrorKind, NonZeroU16};
use std::path::Path;
use insta::assert_snapshot;
use super::*;
const TYPESHED_STDLIB_DIR: &str = "stdlib";
#[allow(unsafe_code)]
const ONE: Option<NonZeroU16> = Some(unsafe { NonZeroU16::new_unchecked(1) });
impl TypeshedVersions {
#[must_use]
fn contains_exact(&self, module: &ModuleName) -> bool {
self.exact(module).is_some()
}
#[must_use]
fn len(&self) -> usize {
self.0.len()
}
}
#[test]
fn can_parse_vendored_versions_file() {
let versions_data = include_str!(concat!(
env!("CARGO_MANIFEST_DIR"),
"/vendor/typeshed/stdlib/VERSIONS"
));
let versions = TypeshedVersions::from_str(versions_data).unwrap();
assert!(versions.len() > 100);
assert!(versions.len() < 1000);
let asyncio = ModuleName::new_static("asyncio").unwrap();
let asyncio_staggered = ModuleName::new_static("asyncio.staggered").unwrap();
let audioop = ModuleName::new_static("audioop").unwrap();
assert!(versions.contains_exact(&asyncio));
assert_eq!(
versions.query_module(&asyncio, PythonVersion::PY310),
TypeshedVersionsQueryResult::Exists
);
assert!(versions.contains_exact(&asyncio_staggered));
assert_eq!(
versions.query_module(&asyncio_staggered, PythonVersion::PY38),
TypeshedVersionsQueryResult::Exists
);
assert_eq!(
versions.query_module(&asyncio_staggered, PythonVersion::PY37),
TypeshedVersionsQueryResult::DoesNotExist
);
assert!(versions.contains_exact(&audioop));
assert_eq!(
versions.query_module(&audioop, PythonVersion::PY312),
TypeshedVersionsQueryResult::Exists
);
assert_eq!(
versions.query_module(&audioop, PythonVersion::PY313),
TypeshedVersionsQueryResult::DoesNotExist
);
}
#[test]
fn typeshed_versions_consistent_with_vendored_stubs() {
const VERSIONS_DATA: &str = include_str!("../../../vendor/typeshed/stdlib/VERSIONS");
let vendored_typeshed_dir = Path::new("vendor/typeshed").canonicalize().unwrap();
let vendored_typeshed_versions = TypeshedVersions::from_str(VERSIONS_DATA).unwrap();
let mut empty_iterator = true;
let stdlib_stubs_path = vendored_typeshed_dir.join(TYPESHED_STDLIB_DIR);
for entry in std::fs::read_dir(&stdlib_stubs_path).unwrap() {
empty_iterator = false;
let entry = entry.unwrap();
let absolute_path = entry.path();
let relative_path = absolute_path
.strip_prefix(&stdlib_stubs_path)
.unwrap_or_else(|_| panic!("Expected path to be a child of {stdlib_stubs_path:?} but found {absolute_path:?}"));
let relative_path_str = relative_path.as_os_str().to_str().unwrap_or_else(|| {
panic!("Expected all typeshed paths to be valid UTF-8; got {relative_path:?}")
});
if relative_path_str == "VERSIONS" {
continue;
}
let top_level_module = if let Some(extension) = relative_path.extension() {
// It was a file; strip off the file extension to get the module name:
let extension = extension
.to_str()
.unwrap_or_else(||panic!("Expected all file extensions to be UTF-8; was not true for {relative_path:?}"));
relative_path_str
.strip_suffix(extension)
.and_then(|string| string.strip_suffix('.')).unwrap_or_else(|| {
panic!("Expected path {relative_path_str:?} to end with computed extension {extension:?}")
})
} else {
// It was a directory; no need to do anything to get the module name
relative_path_str
};
let top_level_module = ModuleName::new(top_level_module)
.unwrap_or_else(|| panic!("{top_level_module:?} was not a valid module name!"));
assert!(vendored_typeshed_versions.contains_exact(&top_level_module));
}
assert!(
!empty_iterator,
"Expected there to be at least one file or directory in the vendored typeshed stubs"
);
}
#[test]
fn can_parse_mock_versions_file() {
const VERSIONS: &str = "\
# a comment
# some more comment
# yet more comment
# and some more comment
bar: 2.7-3.10
# more comment
bar.baz: 3.1-3.9
foo: 3.8- # trailing comment
";
let parsed_versions = TypeshedVersions::from_str(VERSIONS).unwrap();
assert_eq!(parsed_versions.len(), 3);
assert_snapshot!(parsed_versions.to_string(), @r###"
bar: 2.7-3.10
bar.baz: 3.1-3.9
foo: 3.8-
"###
);
}
#[test]
fn version_within_range_parsed_correctly() {
let parsed_versions = TypeshedVersions::from_str("bar: 2.7-3.10").unwrap();
let bar = ModuleName::new_static("bar").unwrap();
assert!(parsed_versions.contains_exact(&bar));
assert_eq!(
parsed_versions.query_module(&bar, PythonVersion::PY37),
TypeshedVersionsQueryResult::Exists
);
assert_eq!(
parsed_versions.query_module(&bar, PythonVersion::PY310),
TypeshedVersionsQueryResult::Exists
);
assert_eq!(
parsed_versions.query_module(&bar, PythonVersion::PY311),
TypeshedVersionsQueryResult::DoesNotExist
);
}
#[test]
fn version_from_range_parsed_correctly() {
let parsed_versions = TypeshedVersions::from_str("foo: 3.8-").unwrap();
let foo = ModuleName::new_static("foo").unwrap();
assert!(parsed_versions.contains_exact(&foo));
assert_eq!(
parsed_versions.query_module(&foo, PythonVersion::PY37),
TypeshedVersionsQueryResult::DoesNotExist
);
assert_eq!(
parsed_versions.query_module(&foo, PythonVersion::PY38),
TypeshedVersionsQueryResult::Exists
);
assert_eq!(
parsed_versions.query_module(&foo, PythonVersion::PY311),
TypeshedVersionsQueryResult::Exists
);
}
#[test]
fn explicit_submodule_parsed_correctly() {
let parsed_versions = TypeshedVersions::from_str("bar.baz: 3.1-3.9").unwrap();
let bar_baz = ModuleName::new_static("bar.baz").unwrap();
assert!(parsed_versions.contains_exact(&bar_baz));
assert_eq!(
parsed_versions.query_module(&bar_baz, PythonVersion::PY37),
TypeshedVersionsQueryResult::Exists
);
assert_eq!(
parsed_versions.query_module(&bar_baz, PythonVersion::PY39),
TypeshedVersionsQueryResult::Exists
);
assert_eq!(
parsed_versions.query_module(&bar_baz, PythonVersion::PY310),
TypeshedVersionsQueryResult::DoesNotExist
);
}
#[test]
fn implicit_submodule_queried_correctly() {
let parsed_versions = TypeshedVersions::from_str("bar: 2.7-3.10").unwrap();
let bar_eggs = ModuleName::new_static("bar.eggs").unwrap();
assert!(!parsed_versions.contains_exact(&bar_eggs));
assert_eq!(
parsed_versions.query_module(&bar_eggs, PythonVersion::PY37),
TypeshedVersionsQueryResult::MaybeExists
);
assert_eq!(
parsed_versions.query_module(&bar_eggs, PythonVersion::PY310),
TypeshedVersionsQueryResult::MaybeExists
);
assert_eq!(
parsed_versions.query_module(&bar_eggs, PythonVersion::PY311),
TypeshedVersionsQueryResult::DoesNotExist
);
}
#[test]
fn nonexistent_module_queried_correctly() {
let parsed_versions = TypeshedVersions::from_str("eggs: 3.8-").unwrap();
let spam = ModuleName::new_static("spam").unwrap();
assert!(!parsed_versions.contains_exact(&spam));
assert_eq!(
parsed_versions.query_module(&spam, PythonVersion::PY37),
TypeshedVersionsQueryResult::DoesNotExist
);
assert_eq!(
parsed_versions.query_module(&spam, PythonVersion::PY313),
TypeshedVersionsQueryResult::DoesNotExist
);
}
#[test]
fn invalid_huge_versions_file() {
let offset = 100;
let too_many = u16::MAX as usize + offset;
let mut massive_versions_file = String::new();
for i in 0..too_many {
massive_versions_file.push_str(&format!("x{i}: 3.8-\n"));
}
assert_eq!(
TypeshedVersions::from_str(&massive_versions_file),
Err(TypeshedVersionsParseError {
line_number: None,
reason: TypeshedVersionsParseErrorKind::TooManyLines(
NonZeroUsize::new(too_many + 1 - offset).unwrap()
)
})
);
}
#[test]
fn invalid_typeshed_versions_bad_colon_number() {
assert_eq!(
TypeshedVersions::from_str("foo 3.7"),
Err(TypeshedVersionsParseError {
line_number: ONE,
reason: TypeshedVersionsParseErrorKind::UnexpectedNumberOfColons
})
);
assert_eq!(
TypeshedVersions::from_str("foo:: 3.7"),
Err(TypeshedVersionsParseError {
line_number: ONE,
reason: TypeshedVersionsParseErrorKind::UnexpectedNumberOfColons
})
);
}
#[test]
fn invalid_typeshed_versions_non_identifier_modules() {
assert_eq!(
TypeshedVersions::from_str("not!an!identifier!: 3.7"),
Err(TypeshedVersionsParseError {
line_number: ONE,
reason: TypeshedVersionsParseErrorKind::InvalidModuleName(
"not!an!identifier!".to_string()
)
})
);
assert_eq!(
TypeshedVersions::from_str("(also_not).(an_identifier): 3.7"),
Err(TypeshedVersionsParseError {
line_number: ONE,
reason: TypeshedVersionsParseErrorKind::InvalidModuleName(
"(also_not).(an_identifier)".to_string()
)
})
);
}
#[test]
fn invalid_typeshed_versions_bad_hyphen_number() {
assert_eq!(
TypeshedVersions::from_str("foo: 3.8"),
Err(TypeshedVersionsParseError {
line_number: ONE,
reason: TypeshedVersionsParseErrorKind::UnexpectedNumberOfHyphens
})
);
assert_eq!(
TypeshedVersions::from_str("foo: 3.8--"),
Err(TypeshedVersionsParseError {
line_number: ONE,
reason: TypeshedVersionsParseErrorKind::UnexpectedNumberOfHyphens
})
);
assert_eq!(
TypeshedVersions::from_str("foo: 3.8--3.9"),
Err(TypeshedVersionsParseError {
line_number: ONE,
reason: TypeshedVersionsParseErrorKind::UnexpectedNumberOfHyphens
})
);
}
#[test]
fn invalid_typeshed_versions_bad_period_number() {
assert_eq!(
TypeshedVersions::from_str("foo: 38-"),
Err(TypeshedVersionsParseError {
line_number: ONE,
reason: TypeshedVersionsParseErrorKind::UnexpectedNumberOfPeriods("38".to_string())
})
);
assert_eq!(
TypeshedVersions::from_str("foo: 3..8-"),
Err(TypeshedVersionsParseError {
line_number: ONE,
reason: TypeshedVersionsParseErrorKind::UnexpectedNumberOfPeriods(
"3..8".to_string()
)
})
);
assert_eq!(
TypeshedVersions::from_str("foo: 3.8-3..11"),
Err(TypeshedVersionsParseError {
line_number: ONE,
reason: TypeshedVersionsParseErrorKind::UnexpectedNumberOfPeriods(
"3..11".to_string()
)
})
);
}
#[test]
fn invalid_typeshed_versions_non_digits() {
let err = TypeshedVersions::from_str("foo: 1.two-").unwrap_err();
assert_eq!(err.line_number, ONE);
let TypeshedVersionsParseErrorKind::IntegerParsingFailure { version, err } = err.reason
else {
panic!()
};
assert_eq!(version, "1.two".to_string());
assert_eq!(*err.kind(), IntErrorKind::InvalidDigit);
let err = TypeshedVersions::from_str("foo: 3.8-four.9").unwrap_err();
assert_eq!(err.line_number, ONE);
let TypeshedVersionsParseErrorKind::IntegerParsingFailure { version, err } = err.reason
else {
panic!()
};
assert_eq!(version, "four.9".to_string());
assert_eq!(*err.kind(), IntErrorKind::InvalidDigit);
}
}

View File

@@ -1,56 +0,0 @@
use std::ops::Deref;
#[derive(Debug, Clone, Eq, PartialEq, Hash)]
pub struct Name(smol_str::SmolStr);
impl Name {
#[inline]
pub fn new(name: &str) -> Self {
Self(smol_str::SmolStr::new(name))
}
#[inline]
pub fn new_static(name: &'static str) -> Self {
Self(smol_str::SmolStr::new_static(name))
}
pub fn as_str(&self) -> &str {
self.0.as_str()
}
}
impl Deref for Name {
type Target = str;
#[inline]
fn deref(&self) -> &Self::Target {
self.as_str()
}
}
impl<T> From<T> for Name
where
T: Into<smol_str::SmolStr>,
{
fn from(value: T) -> Self {
Self(value.into())
}
}
impl std::fmt::Display for Name {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.write_str(self.as_str())
}
}
impl PartialEq<str> for Name {
fn eq(&self, other: &str) -> bool {
self.as_str() == other
}
}
impl PartialEq<Name> for str {
fn eq(&self, other: &Name) -> bool {
other == self
}
}

View File

@@ -0,0 +1,46 @@
use crate::python_version::PythonVersion;
use crate::Db;
use ruff_db::system::SystemPathBuf;
use salsa::Durability;
#[salsa::input(singleton)]
pub struct Program {
pub target_version: PythonVersion,
#[return_ref]
pub search_paths: SearchPathSettings,
}
impl Program {
pub fn from_settings(db: &dyn Db, settings: ProgramSettings) -> Self {
Program::builder(settings.target_version, settings.search_paths)
.durability(Durability::HIGH)
.new(db)
}
}
#[derive(Debug, Eq, PartialEq)]
pub struct ProgramSettings {
pub target_version: PythonVersion,
pub search_paths: SearchPathSettings,
}
/// Configures the search paths for module resolution.
#[derive(Eq, PartialEq, Debug, Clone, Default)]
pub struct SearchPathSettings {
/// List of user-provided paths that should take first priority in the module resolution.
/// Examples in other type checkers are mypy's MYPYPATH environment variable,
/// or pyright's stubPath configuration setting.
pub extra_paths: Vec<SystemPathBuf>,
/// The root of the workspace, used for finding first-party modules.
pub src_root: SystemPathBuf,
/// Optional path to a "custom typeshed" directory on disk for us to use for standard-library types.
/// If this is not provided, we will fallback to our vendored typeshed stubs for the stdlib,
/// bundled as a zip file in the binary
pub custom_typeshed: Option<SystemPathBuf>,
/// The path to the user's `site-packages` directory, where third-party packages from ``PyPI`` are installed.
pub site_packages: Vec<SystemPathBuf>,
}

View File

@@ -0,0 +1,62 @@
use std::fmt;
/// Representation of a Python version.
///
/// Unlike the `TargetVersion` enums in the CLI crates,
/// this does not necessarily represent a Python version that we actually support.
#[derive(Debug, Clone, Copy, Eq, PartialEq, Ord, PartialOrd, Hash)]
pub struct PythonVersion {
pub major: u8,
pub minor: u8,
}
impl PythonVersion {
pub const PY37: PythonVersion = PythonVersion { major: 3, minor: 7 };
pub const PY38: PythonVersion = PythonVersion { major: 3, minor: 8 };
pub const PY39: PythonVersion = PythonVersion { major: 3, minor: 9 };
pub const PY310: PythonVersion = PythonVersion {
major: 3,
minor: 10,
};
pub const PY311: PythonVersion = PythonVersion {
major: 3,
minor: 11,
};
pub const PY312: PythonVersion = PythonVersion {
major: 3,
minor: 12,
};
pub const PY313: PythonVersion = PythonVersion {
major: 3,
minor: 13,
};
pub fn free_threaded_build_available(self) -> bool {
self >= PythonVersion::PY313
}
}
impl Default for PythonVersion {
fn default() -> Self {
Self::PY38
}
}
impl TryFrom<(&str, &str)> for PythonVersion {
type Error = std::num::ParseIntError;
fn try_from(value: (&str, &str)) -> Result<Self, Self::Error> {
let (major, minor) = value;
Ok(Self {
major: major.parse()?,
minor: minor.parse()?,
})
}
}
impl fmt::Display for PythonVersion {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let PythonVersion { major, minor } = self;
write!(f, "{major}.{minor}")
}
}

View File

@@ -2,25 +2,30 @@ use std::iter::FusedIterator;
use std::sync::Arc;
use rustc_hash::FxHashMap;
use salsa::DebugWithDb;
use salsa::plumbing::AsId;
use ruff_db::files::File;
use ruff_db::parsed::parsed_module;
use ruff_db::vfs::VfsFile;
use ruff_index::{IndexSlice, IndexVec};
use ruff_python_ast as ast;
use crate::node_key::NodeKey;
use crate::semantic_index::ast_ids::{AstId, AstIds, ScopeClassId, ScopeFunctionId};
use crate::semantic_index::ast_ids::node_key::ExpressionNodeKey;
use crate::semantic_index::ast_ids::AstIds;
use crate::semantic_index::builder::SemanticIndexBuilder;
use crate::semantic_index::definition::{Definition, DefinitionNodeKey};
use crate::semantic_index::expression::Expression;
use crate::semantic_index::symbol::{
FileScopeId, PublicSymbolId, Scope, ScopeId, ScopeKind, ScopedSymbolId, SymbolTable,
FileScopeId, NodeWithScopeKey, NodeWithScopeRef, Scope, ScopeId, ScopedSymbolId, SymbolTable,
};
use crate::Db;
pub(crate) use self::use_def::UseDefMap;
pub mod ast_ids;
mod builder;
pub mod definition;
pub mod expression;
pub mod symbol;
mod use_def;
type SymbolMap = hashbrown::HashMap<ScopedSymbolId, (), ()>;
@@ -28,12 +33,12 @@ type SymbolMap = hashbrown::HashMap<ScopedSymbolId, (), ()>;
///
/// Prefer using [`symbol_table`] when working with symbols from a single scope.
#[salsa::tracked(return_ref, no_eq)]
pub(crate) fn semantic_index(db: &dyn Db, file: VfsFile) -> SemanticIndex {
let _ = tracing::trace_span!("semantic_index", file = ?file.debug(db.upcast())).enter();
pub(crate) fn semantic_index(db: &dyn Db, file: File) -> SemanticIndex<'_> {
let _span = tracing::trace_span!("semantic_index", file = %file.path(db)).entered();
let parsed = parsed_module(db.upcast(), file);
SemanticIndexBuilder::new(parsed).build()
SemanticIndexBuilder::new(db, file, parsed).build()
}
/// Returns the symbol table for a specific `scope`.
@@ -42,55 +47,75 @@ pub(crate) fn semantic_index(db: &dyn Db, file: VfsFile) -> SemanticIndex {
/// Salsa can avoid invalidating dependent queries if this scope's symbol table
/// is unchanged.
#[salsa::tracked]
pub(crate) fn symbol_table(db: &dyn Db, scope: ScopeId) -> Arc<SymbolTable> {
let _ = tracing::trace_span!("symbol_table", scope = ?scope.debug(db)).enter();
let index = semantic_index(db, scope.file(db));
pub(crate) fn symbol_table<'db>(db: &'db dyn Db, scope: ScopeId<'db>) -> Arc<SymbolTable> {
let file = scope.file(db);
let _span =
tracing::trace_span!("symbol_table", scope=?scope.as_id(), file=%file.path(db)).entered();
let index = semantic_index(db, file);
index.symbol_table(scope.file_scope_id(db))
}
/// Returns the root scope of `file`.
/// Returns the use-def map for a specific `scope`.
///
/// Using [`use_def_map`] over [`semantic_index`] has the advantage that
/// Salsa can avoid invalidating dependent queries if this scope's use-def map
/// is unchanged.
#[salsa::tracked]
pub(crate) fn root_scope(db: &dyn Db, file: VfsFile) -> ScopeId {
let _ = tracing::trace_span!("root_scope", file = ?file.debug(db.upcast())).enter();
pub(crate) fn use_def_map<'db>(db: &'db dyn Db, scope: ScopeId<'db>) -> Arc<UseDefMap<'db>> {
let file = scope.file(db);
let _span =
tracing::trace_span!("use_def_map", scope=?scope.as_id(), file=%file.path(db)).entered();
let index = semantic_index(db, file);
FileScopeId::root().to_scope_id(db, file)
index.use_def_map(scope.file_scope_id(db))
}
/// Returns the symbol with the given name in `file`'s public scope or `None` if
/// no symbol with the given name exists.
pub fn public_symbol(db: &dyn Db, file: VfsFile, name: &str) -> Option<PublicSymbolId> {
let root_scope = root_scope(db, file);
let symbol_table = symbol_table(db, root_scope);
let local = symbol_table.symbol_id_by_name(name)?;
Some(local.to_public_symbol(db, file))
/// Returns the module global scope of `file`.
#[salsa::tracked]
pub(crate) fn global_scope(db: &dyn Db, file: File) -> ScopeId<'_> {
let _span = tracing::trace_span!("global_scope", file = %file.path(db)).entered();
FileScopeId::global().to_scope_id(db, file)
}
/// The symbol tables for an entire file.
/// The symbol tables and use-def maps for all scopes in a file.
#[derive(Debug)]
pub struct SemanticIndex {
pub(crate) struct SemanticIndex<'db> {
/// List of all symbol tables in this file, indexed by scope.
symbol_tables: IndexVec<FileScopeId, Arc<SymbolTable>>,
/// List of all scopes in this file.
scopes: IndexVec<FileScopeId, Scope>,
/// Maps expressions to their corresponding scope.
/// Map expressions to their corresponding scope.
/// We can't use [`ExpressionId`] here, because the challenge is how to get from
/// an [`ast::Expr`] to an [`ExpressionId`] (which requires knowing the scope).
expression_scopes: FxHashMap<NodeKey, FileScopeId>,
scopes_by_expression: FxHashMap<ExpressionNodeKey, FileScopeId>,
/// Map from a node creating a definition to its definition.
definitions_by_node: FxHashMap<DefinitionNodeKey, Definition<'db>>,
/// Map from a standalone expression to its [`Expression`] ingredient.
expressions_by_node: FxHashMap<ExpressionNodeKey, Expression<'db>>,
/// Map from nodes that create a scope to the scope they create.
scopes_by_node: FxHashMap<NodeWithScopeKey, FileScopeId>,
/// Map from the file-local [`FileScopeId`] to the salsa-ingredient [`ScopeId`].
scope_ids_by_scope: IndexVec<FileScopeId, ScopeId<'db>>,
/// Use-def map for each scope in this file.
use_def_maps: IndexVec<FileScopeId, Arc<UseDefMap<'db>>>,
/// Lookup table to map between node ids and ast nodes.
///
/// Note: We should not depend on this map when analysing other files or
/// changing a file invalidates all dependents.
ast_ids: IndexVec<FileScopeId, AstIds>,
/// Map from scope to the node that introduces the scope.
scope_nodes: IndexVec<FileScopeId, NodeWithScopeId>,
}
impl SemanticIndex {
impl<'db> SemanticIndex<'db> {
/// Returns the symbol table for a specific scope.
///
/// Use the Salsa cached [`symbol_table`] query if you only need the
@@ -99,24 +124,33 @@ impl SemanticIndex {
self.symbol_tables[scope_id].clone()
}
/// Returns the use-def map for a specific scope.
///
/// Use the Salsa cached [`use_def_map`] query if you only need the
/// use-def map for a single scope.
pub(super) fn use_def_map(&self, scope_id: FileScopeId) -> Arc<UseDefMap> {
self.use_def_maps[scope_id].clone()
}
pub(crate) fn ast_ids(&self, scope_id: FileScopeId) -> &AstIds {
&self.ast_ids[scope_id]
}
/// Returns the ID of the `expression`'s enclosing scope.
#[allow(unused)]
pub(crate) fn expression_scope_id(&self, expression: &ast::Expr) -> FileScopeId {
self.expression_scopes[&NodeKey::from_node(expression)]
pub(crate) fn expression_scope_id(
&self,
expression: impl Into<ExpressionNodeKey>,
) -> FileScopeId {
self.scopes_by_expression[&expression.into()]
}
/// Returns the [`Scope`] of the `expression`'s enclosing scope.
#[allow(unused)]
pub(crate) fn expression_scope(&self, expression: &ast::Expr) -> &Scope {
pub(crate) fn expression_scope(&self, expression: impl Into<ExpressionNodeKey>) -> &Scope {
&self.scopes[self.expression_scope_id(expression)]
}
/// Returns the [`Scope`] with the given id.
#[allow(unused)]
pub(crate) fn scope(&self, id: FileScopeId) -> &Scope {
&self.scopes[id]
}
@@ -151,13 +185,32 @@ impl SemanticIndex {
AncestorsIter::new(self, scope)
}
pub(crate) fn scope_node(&self, scope_id: FileScopeId) -> NodeWithScopeId {
self.scope_nodes[scope_id]
/// Returns the [`Definition`] salsa ingredient for `definition_key`.
pub(crate) fn definition(
&self,
definition_key: impl Into<DefinitionNodeKey>,
) -> Definition<'db> {
self.definitions_by_node[&definition_key.into()]
}
/// Returns the [`Expression`] ingredient for an expression node.
/// Panics if we have no expression ingredient for that node. We can only call this method for
/// standalone-inferable expressions, which we call `add_standalone_expression` for in
/// [`SemanticIndexBuilder`].
pub(crate) fn expression(
&self,
expression_key: impl Into<ExpressionNodeKey>,
) -> Expression<'db> {
self.expressions_by_node[&expression_key.into()]
}
/// Returns the id of the scope that `node` creates. This is different from [`Definition::scope`] which
/// returns the scope in which that definition is defined in.
pub(crate) fn node_scope(&self, node: NodeWithScopeRef) -> FileScopeId {
self.scopes_by_node[&node.node_key()]
}
}
/// ID that uniquely identifies an expression inside a [`Scope`].
pub struct AncestorsIter<'a> {
scopes: &'a IndexSlice<FileScopeId, Scope>,
next_id: Option<FileScopeId>,
@@ -248,152 +301,153 @@ impl<'a> Iterator for ChildrenIter<'a> {
}
}
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
pub(crate) enum NodeWithScopeId {
Module,
Class(AstId<ScopeClassId>),
ClassTypeParams(AstId<ScopeClassId>),
Function(AstId<ScopeFunctionId>),
FunctionTypeParams(AstId<ScopeFunctionId>),
}
impl NodeWithScopeId {
fn scope_kind(self) -> ScopeKind {
match self {
NodeWithScopeId::Module => ScopeKind::Module,
NodeWithScopeId::Class(_) => ScopeKind::Class,
NodeWithScopeId::Function(_) => ScopeKind::Function,
NodeWithScopeId::ClassTypeParams(_) | NodeWithScopeId::FunctionTypeParams(_) => {
ScopeKind::Annotation
}
}
}
}
impl FusedIterator for ChildrenIter<'_> {}
#[cfg(test)]
mod tests {
use ruff_db::files::{system_path_to_file, File};
use ruff_db::parsed::parsed_module;
use ruff_db::vfs::{system_path_to_file, VfsFile};
use ruff_db::system::DbWithTestSystem;
use ruff_python_ast as ast;
use crate::db::tests::TestDb;
use crate::semantic_index::symbol::{FileScopeId, ScopeKind, SymbolTable};
use crate::semantic_index::{root_scope, semantic_index, symbol_table};
use crate::semantic_index::ast_ids::HasScopedUseId;
use crate::semantic_index::definition::DefinitionKind;
use crate::semantic_index::symbol::{FileScopeId, Scope, ScopeKind, SymbolTable};
use crate::semantic_index::{global_scope, semantic_index, symbol_table, use_def_map};
use crate::Db;
struct TestCase {
db: TestDb,
file: VfsFile,
file: File,
}
fn test_case(content: impl ToString) -> TestCase {
let db = TestDb::new();
db.memory_file_system()
.write_file("test.py", content)
.unwrap();
let mut db = TestDb::new();
db.write_file("test.py", content).unwrap();
let file = system_path_to_file(&db, "test.py").unwrap();
TestCase { db, file }
}
fn names(table: &SymbolTable) -> Vec<&str> {
fn names(table: &SymbolTable) -> Vec<String> {
table
.symbols()
.map(|symbol| symbol.name().as_str())
.map(|symbol| symbol.name().to_string())
.collect()
}
#[test]
fn empty() {
let TestCase { db, file } = test_case("");
let root_table = symbol_table(&db, root_scope(&db, file));
let global_table = symbol_table(&db, global_scope(&db, file));
assert_eq!(names(&root_table), Vec::<&str>::new());
let global_names = names(&global_table);
assert_eq!(global_names, Vec::<&str>::new());
}
#[test]
fn simple() {
let TestCase { db, file } = test_case("x");
let root_table = symbol_table(&db, root_scope(&db, file));
let global_table = symbol_table(&db, global_scope(&db, file));
assert_eq!(names(&root_table), vec!["x"]);
assert_eq!(names(&global_table), vec!["x"]);
}
#[test]
fn annotation_only() {
let TestCase { db, file } = test_case("x: int");
let root_table = symbol_table(&db, root_scope(&db, file));
let global_table = symbol_table(&db, global_scope(&db, file));
assert_eq!(names(&root_table), vec!["int", "x"]);
assert_eq!(names(&global_table), vec!["int", "x"]);
// TODO record definition
}
#[test]
fn import() {
let TestCase { db, file } = test_case("import foo");
let root_table = symbol_table(&db, root_scope(&db, file));
let scope = global_scope(&db, file);
let global_table = symbol_table(&db, scope);
assert_eq!(names(&root_table), vec!["foo"]);
let foo = root_table.symbol_by_name("foo").unwrap();
assert_eq!(names(&global_table), vec!["foo"]);
let foo = global_table.symbol_id_by_name("foo").unwrap();
assert_eq!(foo.definitions().len(), 1);
let use_def = use_def_map(&db, scope);
let [definition] = use_def.public_definitions(foo) else {
panic!("expected one definition");
};
assert!(matches!(definition.node(&db), DefinitionKind::Import(_)));
}
#[test]
fn import_sub() {
let TestCase { db, file } = test_case("import foo.bar");
let root_table = symbol_table(&db, root_scope(&db, file));
let global_table = symbol_table(&db, global_scope(&db, file));
assert_eq!(names(&root_table), vec!["foo"]);
assert_eq!(names(&global_table), vec!["foo"]);
}
#[test]
fn import_as() {
let TestCase { db, file } = test_case("import foo.bar as baz");
let root_table = symbol_table(&db, root_scope(&db, file));
let global_table = symbol_table(&db, global_scope(&db, file));
assert_eq!(names(&root_table), vec!["baz"]);
assert_eq!(names(&global_table), vec!["baz"]);
}
#[test]
fn import_from() {
let TestCase { db, file } = test_case("from bar import foo");
let root_table = symbol_table(&db, root_scope(&db, file));
let scope = global_scope(&db, file);
let global_table = symbol_table(&db, scope);
assert_eq!(names(&root_table), vec!["foo"]);
assert_eq!(
root_table
.symbol_by_name("foo")
.unwrap()
.definitions()
.len(),
1
);
assert_eq!(names(&global_table), vec!["foo"]);
assert!(
root_table
global_table
.symbol_by_name("foo")
.is_some_and(|symbol| { symbol.is_defined() || !symbol.is_used() }),
.is_some_and(|symbol| { symbol.is_defined() && !symbol.is_used() }),
"symbols that are defined get the defined flag"
);
let use_def = use_def_map(&db, scope);
let [definition] = use_def.public_definitions(
global_table
.symbol_id_by_name("foo")
.expect("symbol to exist"),
) else {
panic!("expected one definition");
};
assert!(matches!(
definition.node(&db),
DefinitionKind::ImportFrom(_)
));
}
#[test]
fn assign() {
let TestCase { db, file } = test_case("x = foo");
let root_table = symbol_table(&db, root_scope(&db, file));
let scope = global_scope(&db, file);
let global_table = symbol_table(&db, scope);
assert_eq!(names(&root_table), vec!["foo", "x"]);
assert_eq!(
root_table.symbol_by_name("x").unwrap().definitions().len(),
1
);
assert_eq!(names(&global_table), vec!["foo", "x"]);
assert!(
root_table
global_table
.symbol_by_name("foo")
.is_some_and(|symbol| { !symbol.is_defined() && symbol.is_used() }),
"a symbol used but not defined in a scope should have only the used flag"
);
let use_def = use_def_map(&db, scope);
let [definition] =
use_def.public_definitions(global_table.symbol_id_by_name("x").expect("symbol exists"))
else {
panic!("expected one definition");
};
assert!(matches!(
definition.node(&db),
DefinitionKind::Assignment(_)
));
}
#[test]
@@ -405,25 +459,34 @@ class C:
y = 2
",
);
let root_table = symbol_table(&db, root_scope(&db, file));
let global_table = symbol_table(&db, global_scope(&db, file));
assert_eq!(names(&root_table), vec!["C", "y"]);
assert_eq!(names(&global_table), vec!["C", "y"]);
let index = semantic_index(&db, file);
let scopes: Vec<_> = index.child_scopes(FileScopeId::root()).collect();
assert_eq!(scopes.len(), 1);
let (class_scope_id, class_scope) = scopes[0];
let [(class_scope_id, class_scope)] = index
.child_scopes(FileScopeId::global())
.collect::<Vec<_>>()[..]
else {
panic!("expected one child scope")
};
assert_eq!(class_scope.kind(), ScopeKind::Class);
assert_eq!(class_scope.name(), "C");
assert_eq!(class_scope_id.to_scope_id(&db, file).name(&db), "C");
let class_table = index.symbol_table(class_scope_id);
assert_eq!(names(&class_table), vec!["x"]);
assert_eq!(
class_table.symbol_by_name("x").unwrap().definitions().len(),
1
);
let use_def = index.use_def_map(class_scope_id);
let [definition] =
use_def.public_definitions(class_table.symbol_id_by_name("x").expect("symbol exists"))
else {
panic!("expected one definition");
};
assert!(matches!(
definition.node(&db),
DefinitionKind::Assignment(_)
));
}
#[test]
@@ -436,27 +499,34 @@ y = 2
",
);
let index = semantic_index(&db, file);
let root_table = index.symbol_table(FileScopeId::root());
let global_table = index.symbol_table(FileScopeId::global());
assert_eq!(names(&root_table), vec!["func", "y"]);
assert_eq!(names(&global_table), vec!["func", "y"]);
let scopes = index.child_scopes(FileScopeId::root()).collect::<Vec<_>>();
assert_eq!(scopes.len(), 1);
let (function_scope_id, function_scope) = scopes[0];
let [(function_scope_id, function_scope)] = index
.child_scopes(FileScopeId::global())
.collect::<Vec<_>>()[..]
else {
panic!("expected one child scope")
};
assert_eq!(function_scope.kind(), ScopeKind::Function);
assert_eq!(function_scope.name(), "func");
assert_eq!(function_scope_id.to_scope_id(&db, file).name(&db), "func");
let function_table = index.symbol_table(function_scope_id);
assert_eq!(names(&function_table), vec!["x"]);
assert_eq!(
let use_def = index.use_def_map(function_scope_id);
let [definition] = use_def.public_definitions(
function_table
.symbol_by_name("x")
.unwrap()
.definitions()
.len(),
1
);
.symbol_id_by_name("x")
.expect("symbol exists"),
) else {
panic!("expected one definition");
};
assert!(matches!(
definition.node(&db),
DefinitionKind::Assignment(_)
));
}
#[test]
@@ -470,32 +540,36 @@ def func():
",
);
let index = semantic_index(&db, file);
let root_table = index.symbol_table(FileScopeId::root());
let global_table = index.symbol_table(FileScopeId::global());
assert_eq!(names(&root_table), vec!["func"]);
let scopes: Vec<_> = index.child_scopes(FileScopeId::root()).collect();
assert_eq!(scopes.len(), 2);
let (func_scope1_id, func_scope_1) = scopes[0];
let (func_scope2_id, func_scope_2) = scopes[1];
assert_eq!(names(&global_table), vec!["func"]);
let [(func_scope1_id, func_scope_1), (func_scope2_id, func_scope_2)] = index
.child_scopes(FileScopeId::global())
.collect::<Vec<_>>()[..]
else {
panic!("expected two child scopes");
};
assert_eq!(func_scope_1.kind(), ScopeKind::Function);
assert_eq!(func_scope_1.name(), "func");
assert_eq!(func_scope1_id.to_scope_id(&db, file).name(&db), "func");
assert_eq!(func_scope_2.kind(), ScopeKind::Function);
assert_eq!(func_scope_2.name(), "func");
assert_eq!(func_scope2_id.to_scope_id(&db, file).name(&db), "func");
let func1_table = index.symbol_table(func_scope1_id);
let func2_table = index.symbol_table(func_scope2_id);
assert_eq!(names(&func1_table), vec!["x"]);
assert_eq!(names(&func2_table), vec!["y"]);
assert_eq!(
root_table
.symbol_by_name("func")
.unwrap()
.definitions()
.len(),
2
);
let use_def = index.use_def_map(FileScopeId::global());
let [definition] = use_def.public_definitions(
global_table
.symbol_id_by_name("func")
.expect("symbol exists"),
) else {
panic!("expected one definition");
};
assert!(matches!(definition.node(&db), DefinitionKind::Function(_)));
}
#[test]
@@ -508,24 +582,29 @@ def func[T]():
);
let index = semantic_index(&db, file);
let root_table = index.symbol_table(FileScopeId::root());
let global_table = index.symbol_table(FileScopeId::global());
assert_eq!(names(&root_table), vec!["func"]);
assert_eq!(names(&global_table), vec!["func"]);
let scopes: Vec<_> = index.child_scopes(FileScopeId::root()).collect();
assert_eq!(scopes.len(), 1);
let (ann_scope_id, ann_scope) = scopes[0];
let [(ann_scope_id, ann_scope)] = index
.child_scopes(FileScopeId::global())
.collect::<Vec<_>>()[..]
else {
panic!("expected one child scope");
};
assert_eq!(ann_scope.kind(), ScopeKind::Annotation);
assert_eq!(ann_scope.name(), "func");
assert_eq!(ann_scope_id.to_scope_id(&db, file).name(&db), "func");
let ann_table = index.symbol_table(ann_scope_id);
assert_eq!(names(&ann_table), vec!["T"]);
let scopes: Vec<_> = index.child_scopes(ann_scope_id).collect();
assert_eq!(scopes.len(), 1);
let (func_scope_id, func_scope) = scopes[0];
let [(func_scope_id, func_scope)] =
index.child_scopes(ann_scope_id).collect::<Vec<_>>()[..]
else {
panic!("expected one child scope");
};
assert_eq!(func_scope.kind(), ScopeKind::Function);
assert_eq!(func_scope.name(), "func");
assert_eq!(func_scope_id.to_scope_id(&db, file).name(&db), "func");
let func_table = index.symbol_table(func_scope_id);
assert_eq!(names(&func_table), vec!["x"]);
}
@@ -540,16 +619,19 @@ class C[T]:
);
let index = semantic_index(&db, file);
let root_table = index.symbol_table(FileScopeId::root());
let global_table = index.symbol_table(FileScopeId::global());
assert_eq!(names(&root_table), vec!["C"]);
assert_eq!(names(&global_table), vec!["C"]);
let scopes: Vec<_> = index.child_scopes(FileScopeId::root()).collect();
let [(ann_scope_id, ann_scope)] = index
.child_scopes(FileScopeId::global())
.collect::<Vec<_>>()[..]
else {
panic!("expected one child scope");
};
assert_eq!(scopes.len(), 1);
let (ann_scope_id, ann_scope) = scopes[0];
assert_eq!(ann_scope.kind(), ScopeKind::Annotation);
assert_eq!(ann_scope.name(), "C");
assert_eq!(ann_scope_id.to_scope_id(&db, file).name(&db), "C");
let ann_table = index.symbol_table(ann_scope_id);
assert_eq!(names(&ann_table), vec!["T"]);
assert!(
@@ -559,48 +641,49 @@ class C[T]:
"type parameters are defined by the scope that introduces them"
);
let scopes: Vec<_> = index.child_scopes(ann_scope_id).collect();
assert_eq!(scopes.len(), 1);
let (func_scope_id, func_scope) = scopes[0];
let [(class_scope_id, class_scope)] =
index.child_scopes(ann_scope_id).collect::<Vec<_>>()[..]
else {
panic!("expected one child scope");
};
assert_eq!(func_scope.kind(), ScopeKind::Class);
assert_eq!(func_scope.name(), "C");
assert_eq!(names(&index.symbol_table(func_scope_id)), vec!["x"]);
assert_eq!(class_scope.kind(), ScopeKind::Class);
assert_eq!(class_scope_id.to_scope_id(&db, file).name(&db), "C");
assert_eq!(names(&index.symbol_table(class_scope_id)), vec!["x"]);
}
// TODO: After porting the control flow graph.
// #[test]
// fn reachability_trivial() {
// let parsed = parse("x = 1; x");
// let ast = parsed.syntax();
// let index = SemanticIndex::from_ast(ast);
// let table = &index.symbol_table;
// let x_sym = table
// .root_symbol_id_by_name("x")
// .expect("x symbol should exist");
// let ast::Stmt::Expr(ast::StmtExpr { value: x_use, .. }) = &ast.body[1] else {
// panic!("should be an expr")
// };
// let x_defs: Vec<_> = index
// .reachable_definitions(x_sym, x_use)
// .map(|constrained_definition| constrained_definition.definition)
// .collect();
// assert_eq!(x_defs.len(), 1);
// let Definition::Assignment(node_key) = &x_defs[0] else {
// panic!("def should be an assignment")
// };
// let Some(def_node) = node_key.resolve(ast.into()) else {
// panic!("node key should resolve")
// };
// let ast::Expr::NumberLiteral(ast::ExprNumberLiteral {
// value: ast::Number::Int(num),
// ..
// }) = &*def_node.value
// else {
// panic!("should be a number literal")
// };
// assert_eq!(*num, 1);
// }
#[test]
fn reachability_trivial() {
let TestCase { db, file } = test_case("x = 1; x");
let parsed = parsed_module(&db, file);
let scope = global_scope(&db, file);
let ast = parsed.syntax();
let ast::Stmt::Expr(ast::StmtExpr {
value: x_use_expr, ..
}) = &ast.body[1]
else {
panic!("should be an expr")
};
let ast::Expr::Name(x_use_expr_name) = x_use_expr.as_ref() else {
panic!("expected a Name");
};
let x_use_id = x_use_expr_name.scoped_use_id(&db, scope);
let use_def = use_def_map(&db, scope);
let [definition] = use_def.use_definitions(x_use_id) else {
panic!("expected one definition");
};
let DefinitionKind::Assignment(assignment) = definition.node(&db) else {
panic!("should be an assignment definition")
};
let ast::Expr::NumberLiteral(ast::ExprNumberLiteral {
value: ast::Number::Int(num),
..
}) = &*assignment.assignment().value
else {
panic!("should be a number literal")
};
assert_eq!(*num, 1);
}
#[test]
fn expression_scope() {
@@ -614,7 +697,7 @@ class C[T]:
let x = &x_stmt.targets[0];
assert_eq!(index.expression_scope(x).kind(), ScopeKind::Module);
assert_eq!(index.expression_scope_id(x), FileScopeId::root());
assert_eq!(index.expression_scope_id(x), FileScopeId::global());
let def = ast.body[1].as_function_def_stmt().unwrap();
let y_stmt = def.body[0].as_assign_stmt().unwrap();
@@ -625,6 +708,17 @@ class C[T]:
#[test]
fn scope_iterators() {
fn scope_names<'a>(
scopes: impl Iterator<Item = (FileScopeId, &'a Scope)>,
db: &'a dyn Db,
file: File,
) -> Vec<&'a str> {
scopes
.into_iter()
.map(|(scope_id, _)| scope_id.to_scope_id(db, file).name(db))
.collect()
}
let TestCase { db, file } = test_case(
r#"
class Test:
@@ -640,35 +734,32 @@ def x():
let index = semantic_index(&db, file);
let descendents: Vec<_> = index
.descendent_scopes(FileScopeId::root())
.map(|(_, scope)| scope.name().as_str())
.collect();
assert_eq!(descendents, vec!["Test", "foo", "bar", "baz", "x"]);
let descendents = index.descendent_scopes(FileScopeId::global());
assert_eq!(
scope_names(descendents, &db, file),
vec!["Test", "foo", "bar", "baz", "x"]
);
let children: Vec<_> = index
.child_scopes(FileScopeId::root())
.map(|(_, scope)| scope.name.as_str())
.collect();
assert_eq!(children, vec!["Test", "x"]);
let children = index.child_scopes(FileScopeId::global());
assert_eq!(scope_names(children, &db, file), vec!["Test", "x"]);
let test_class = index.child_scopes(FileScopeId::root()).next().unwrap().0;
let test_child_scopes: Vec<_> = index
.child_scopes(test_class)
.map(|(_, scope)| scope.name.as_str())
.collect();
assert_eq!(test_child_scopes, vec!["foo", "baz"]);
let test_class = index.child_scopes(FileScopeId::global()).next().unwrap().0;
let test_child_scopes = index.child_scopes(test_class);
assert_eq!(
scope_names(test_child_scopes, &db, file),
vec!["foo", "baz"]
);
let bar_scope = index
.descendent_scopes(FileScopeId::root())
.descendent_scopes(FileScopeId::global())
.nth(2)
.unwrap()
.0;
let ancestors: Vec<_> = index
.ancestor_scopes(bar_scope)
.map(|(_, scope)| scope.name())
.collect();
let ancestors = index.ancestor_scopes(bar_scope);
assert_eq!(ancestors, vec!["bar", "foo", "Test", "<module>"]);
assert_eq!(
scope_names(ancestors, &db, file),
vec!["bar", "foo", "Test", "<module>"]
);
}
}

View File

@@ -1,15 +1,12 @@
use rustc_hash::FxHashMap;
use ruff_db::parsed::ParsedModule;
use ruff_db::vfs::VfsFile;
use ruff_index::{newtype_index, IndexVec};
use ruff_index::newtype_index;
use ruff_python_ast as ast;
use ruff_python_ast::AnyNodeRef;
use ruff_python_ast::ExpressionRef;
use crate::ast_node_ref::AstNodeRef;
use crate::node_key::NodeKey;
use crate::semantic_index::ast_ids::node_key::ExpressionNodeKey;
use crate::semantic_index::semantic_index;
use crate::semantic_index::symbol::{FileScopeId, ScopeId};
use crate::semantic_index::symbol::ScopeId;
use crate::Db;
/// AST ids for a single scope.
@@ -27,367 +24,187 @@ use crate::Db;
///
/// x = foo()
/// ```
#[derive(Debug)]
pub(crate) struct AstIds {
/// Maps expression ids to their expressions.
expressions: IndexVec<ScopeExpressionId, AstNodeRef<ast::Expr>>,
/// Maps expressions to their expression id. Uses `NodeKey` because it avoids cloning [`Parsed`].
expressions_map: FxHashMap<NodeKey, ScopeExpressionId>,
statements: IndexVec<ScopeStatementId, AstNodeRef<ast::Stmt>>,
statements_map: FxHashMap<NodeKey, ScopeStatementId>,
expressions_map: FxHashMap<ExpressionNodeKey, ScopedExpressionId>,
/// Maps expressions which "use" a symbol (that is, [`ExprName`]) to a use id.
uses_map: FxHashMap<ExpressionNodeKey, ScopedUseId>,
}
impl AstIds {
fn statement_id<'a, N>(&self, node: N) -> ScopeStatementId
where
N: Into<AnyNodeRef<'a>>,
{
self.statements_map[&NodeKey::from_node(node.into())]
fn expression_id(&self, key: impl Into<ExpressionNodeKey>) -> ScopedExpressionId {
self.expressions_map[&key.into()]
}
fn expression_id<'a, N>(&self, node: N) -> ScopeExpressionId
where
N: Into<AnyNodeRef<'a>>,
{
self.expressions_map[&NodeKey::from_node(node.into())]
fn use_id(&self, key: impl Into<ExpressionNodeKey>) -> ScopedUseId {
self.uses_map[&key.into()]
}
}
#[allow(clippy::missing_fields_in_debug)]
impl std::fmt::Debug for AstIds {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("AstIds")
.field("expressions", &self.expressions)
.field("statements", &self.statements)
.finish()
}
}
fn ast_ids(db: &dyn Db, scope: ScopeId) -> &AstIds {
fn ast_ids<'db>(db: &'db dyn Db, scope: ScopeId) -> &'db AstIds {
semantic_index(db, scope.file(db)).ast_ids(scope.file_scope_id(db))
}
/// Node that can be uniquely identified by an id in a [`FileScopeId`].
pub trait ScopeAstIdNode {
pub trait HasScopedUseId {
/// The type of the ID uniquely identifying the use.
type Id: Copy;
/// Returns the ID that uniquely identifies the use in `scope`.
fn scoped_use_id(&self, db: &dyn Db, scope: ScopeId) -> Self::Id;
}
/// Uniquely identifies a use of a name in a [`crate::semantic_index::symbol::FileScopeId`].
#[newtype_index]
pub struct ScopedUseId;
impl HasScopedUseId for ast::ExprName {
type Id = ScopedUseId;
fn scoped_use_id(&self, db: &dyn Db, scope: ScopeId) -> Self::Id {
let expression_ref = ExpressionRef::from(self);
expression_ref.scoped_use_id(db, scope)
}
}
impl HasScopedUseId for ast::ExpressionRef<'_> {
type Id = ScopedUseId;
fn scoped_use_id(&self, db: &dyn Db, scope: ScopeId) -> Self::Id {
let ast_ids = ast_ids(db, scope);
ast_ids.use_id(*self)
}
}
pub trait HasScopedAstId {
/// The type of the ID uniquely identifying the node.
type Id: Copy;
/// Returns the ID that uniquely identifies the node in `scope`.
///
/// ## Panics
/// Panics if the node doesn't belong to `file` or is outside `scope`.
fn scope_ast_id(&self, db: &dyn Db, file: VfsFile, scope: FileScopeId) -> Self::Id;
/// Looks up the AST node by its ID.
///
/// ## Panics
/// May panic if the `id` does not belong to the AST of `file`, or is outside `scope`.
fn lookup_in_scope(db: &dyn Db, file: VfsFile, scope: FileScopeId, id: Self::Id) -> &Self
where
Self: Sized;
fn scoped_ast_id(&self, db: &dyn Db, scope: ScopeId) -> Self::Id;
}
/// Extension trait for AST nodes that can be resolved by an `AstId`.
pub trait AstIdNode {
type ScopeId: Copy;
/// Resolves the AST id of the node.
///
/// ## Panics
/// May panic if the node does not belongs to `file`'s AST or is outside of `scope`. It may also
/// return an incorrect node if that's the case.
fn ast_id(&self, db: &dyn Db, file: VfsFile, scope: FileScopeId) -> AstId<Self::ScopeId>;
/// Resolves the AST node for `id`.
///
/// ## Panics
/// May panic if the `id` does not belong to the AST of `file` or it returns an incorrect node.
fn lookup(db: &dyn Db, file: VfsFile, id: AstId<Self::ScopeId>) -> &Self
where
Self: Sized;
}
impl<T> AstIdNode for T
where
T: ScopeAstIdNode,
{
type ScopeId = T::Id;
fn ast_id(&self, db: &dyn Db, file: VfsFile, scope: FileScopeId) -> AstId<Self::ScopeId> {
let in_scope_id = self.scope_ast_id(db, file, scope);
AstId { scope, in_scope_id }
}
fn lookup(db: &dyn Db, file: VfsFile, id: AstId<Self::ScopeId>) -> &Self
where
Self: Sized,
{
let scope = id.scope;
Self::lookup_in_scope(db, file, scope, id.in_scope_id)
}
}
/// Uniquely identifies an AST node in a file.
#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)]
pub struct AstId<L: Copy> {
/// The node's scope.
scope: FileScopeId,
/// The ID of the node inside [`Self::scope`].
in_scope_id: L,
}
impl<L: Copy> AstId<L> {
pub(super) fn new(scope: FileScopeId, in_scope_id: L) -> Self {
Self { scope, in_scope_id }
}
pub(super) fn in_scope_id(self) -> L {
self.in_scope_id
}
}
/// Uniquely identifies an [`ast::Expr`] in a [`FileScopeId`].
/// Uniquely identifies an [`ast::Expr`] in a [`crate::semantic_index::symbol::FileScopeId`].
#[newtype_index]
pub struct ScopeExpressionId;
pub struct ScopedExpressionId;
impl ScopeAstIdNode for ast::Expr {
type Id = ScopeExpressionId;
macro_rules! impl_has_scoped_expression_id {
($ty: ty) => {
impl HasScopedAstId for $ty {
type Id = ScopedExpressionId;
fn scope_ast_id(&self, db: &dyn Db, file: VfsFile, file_scope: FileScopeId) -> Self::Id {
let scope = file_scope.to_scope_id(db, file);
let ast_ids = ast_ids(db, scope);
ast_ids.expressions_map[&NodeKey::from_node(self)]
}
fn lookup_in_scope(db: &dyn Db, file: VfsFile, file_scope: FileScopeId, id: Self::Id) -> &Self {
let scope = file_scope.to_scope_id(db, file);
let ast_ids = ast_ids(db, scope);
ast_ids.expressions[id].node()
}
fn scoped_ast_id(&self, db: &dyn Db, scope: ScopeId) -> Self::Id {
let expression_ref = ExpressionRef::from(self);
expression_ref.scoped_ast_id(db, scope)
}
}
};
}
/// Uniquely identifies an [`ast::Stmt`] in a [`FileScopeId`].
#[newtype_index]
pub struct ScopeStatementId;
impl_has_scoped_expression_id!(ast::ExprBoolOp);
impl_has_scoped_expression_id!(ast::ExprName);
impl_has_scoped_expression_id!(ast::ExprBinOp);
impl_has_scoped_expression_id!(ast::ExprUnaryOp);
impl_has_scoped_expression_id!(ast::ExprLambda);
impl_has_scoped_expression_id!(ast::ExprIf);
impl_has_scoped_expression_id!(ast::ExprDict);
impl_has_scoped_expression_id!(ast::ExprSet);
impl_has_scoped_expression_id!(ast::ExprListComp);
impl_has_scoped_expression_id!(ast::ExprSetComp);
impl_has_scoped_expression_id!(ast::ExprDictComp);
impl_has_scoped_expression_id!(ast::ExprGenerator);
impl_has_scoped_expression_id!(ast::ExprAwait);
impl_has_scoped_expression_id!(ast::ExprYield);
impl_has_scoped_expression_id!(ast::ExprYieldFrom);
impl_has_scoped_expression_id!(ast::ExprCompare);
impl_has_scoped_expression_id!(ast::ExprCall);
impl_has_scoped_expression_id!(ast::ExprFString);
impl_has_scoped_expression_id!(ast::ExprStringLiteral);
impl_has_scoped_expression_id!(ast::ExprBytesLiteral);
impl_has_scoped_expression_id!(ast::ExprNumberLiteral);
impl_has_scoped_expression_id!(ast::ExprBooleanLiteral);
impl_has_scoped_expression_id!(ast::ExprNoneLiteral);
impl_has_scoped_expression_id!(ast::ExprEllipsisLiteral);
impl_has_scoped_expression_id!(ast::ExprAttribute);
impl_has_scoped_expression_id!(ast::ExprSubscript);
impl_has_scoped_expression_id!(ast::ExprStarred);
impl_has_scoped_expression_id!(ast::ExprNamed);
impl_has_scoped_expression_id!(ast::ExprList);
impl_has_scoped_expression_id!(ast::ExprTuple);
impl_has_scoped_expression_id!(ast::ExprSlice);
impl_has_scoped_expression_id!(ast::ExprIpyEscapeCommand);
impl_has_scoped_expression_id!(ast::Expr);
impl ScopeAstIdNode for ast::Stmt {
type Id = ScopeStatementId;
impl HasScopedAstId for ast::ExpressionRef<'_> {
type Id = ScopedExpressionId;
fn scope_ast_id(&self, db: &dyn Db, file: VfsFile, file_scope: FileScopeId) -> Self::Id {
let scope = file_scope.to_scope_id(db, file);
fn scoped_ast_id(&self, db: &dyn Db, scope: ScopeId) -> Self::Id {
let ast_ids = ast_ids(db, scope);
ast_ids.statement_id(self)
}
fn lookup_in_scope(db: &dyn Db, file: VfsFile, file_scope: FileScopeId, id: Self::Id) -> &Self {
let scope = file_scope.to_scope_id(db, file);
let ast_ids = ast_ids(db, scope);
ast_ids.statements[id].node()
}
}
#[derive(Debug, Eq, PartialEq, Copy, Clone, Hash)]
pub struct ScopeFunctionId(pub(super) ScopeStatementId);
impl ScopeAstIdNode for ast::StmtFunctionDef {
type Id = ScopeFunctionId;
fn scope_ast_id(&self, db: &dyn Db, file: VfsFile, file_scope: FileScopeId) -> Self::Id {
let scope = file_scope.to_scope_id(db, file);
let ast_ids = ast_ids(db, scope);
ScopeFunctionId(ast_ids.statement_id(self))
}
fn lookup_in_scope(db: &dyn Db, file: VfsFile, scope: FileScopeId, id: Self::Id) -> &Self {
ast::Stmt::lookup_in_scope(db, file, scope, id.0)
.as_function_def_stmt()
.unwrap()
}
}
#[derive(Debug, Eq, PartialEq, Copy, Clone, Hash)]
pub struct ScopeClassId(pub(super) ScopeStatementId);
impl ScopeAstIdNode for ast::StmtClassDef {
type Id = ScopeClassId;
fn scope_ast_id(&self, db: &dyn Db, file: VfsFile, file_scope: FileScopeId) -> Self::Id {
let scope = file_scope.to_scope_id(db, file);
let ast_ids = ast_ids(db, scope);
ScopeClassId(ast_ids.statement_id(self))
}
fn lookup_in_scope(db: &dyn Db, file: VfsFile, scope: FileScopeId, id: Self::Id) -> &Self {
let statement = ast::Stmt::lookup_in_scope(db, file, scope, id.0);
statement.as_class_def_stmt().unwrap()
}
}
#[derive(Debug, Eq, PartialEq, Copy, Clone, Hash)]
pub struct ScopeAssignmentId(pub(super) ScopeStatementId);
impl ScopeAstIdNode for ast::StmtAssign {
type Id = ScopeAssignmentId;
fn scope_ast_id(&self, db: &dyn Db, file: VfsFile, file_scope: FileScopeId) -> Self::Id {
let scope = file_scope.to_scope_id(db, file);
let ast_ids = ast_ids(db, scope);
ScopeAssignmentId(ast_ids.statement_id(self))
}
fn lookup_in_scope(db: &dyn Db, file: VfsFile, scope: FileScopeId, id: Self::Id) -> &Self {
let statement = ast::Stmt::lookup_in_scope(db, file, scope, id.0);
statement.as_assign_stmt().unwrap()
}
}
#[derive(Debug, Eq, PartialEq, Copy, Clone, Hash)]
pub struct ScopeAnnotatedAssignmentId(ScopeStatementId);
impl ScopeAstIdNode for ast::StmtAnnAssign {
type Id = ScopeAnnotatedAssignmentId;
fn scope_ast_id(&self, db: &dyn Db, file: VfsFile, file_scope: FileScopeId) -> Self::Id {
let scope = file_scope.to_scope_id(db, file);
let ast_ids = ast_ids(db, scope);
ScopeAnnotatedAssignmentId(ast_ids.statement_id(self))
}
fn lookup_in_scope(db: &dyn Db, file: VfsFile, scope: FileScopeId, id: Self::Id) -> &Self {
let statement = ast::Stmt::lookup_in_scope(db, file, scope, id.0);
statement.as_ann_assign_stmt().unwrap()
}
}
#[derive(Debug, Eq, PartialEq, Copy, Clone, Hash)]
pub struct ScopeImportId(pub(super) ScopeStatementId);
impl ScopeAstIdNode for ast::StmtImport {
type Id = ScopeImportId;
fn scope_ast_id(&self, db: &dyn Db, file: VfsFile, file_scope: FileScopeId) -> Self::Id {
let scope = file_scope.to_scope_id(db, file);
let ast_ids = ast_ids(db, scope);
ScopeImportId(ast_ids.statement_id(self))
}
fn lookup_in_scope(db: &dyn Db, file: VfsFile, scope: FileScopeId, id: Self::Id) -> &Self {
let statement = ast::Stmt::lookup_in_scope(db, file, scope, id.0);
statement.as_import_stmt().unwrap()
}
}
#[derive(Debug, Eq, PartialEq, Copy, Clone, Hash)]
pub struct ScopeImportFromId(pub(super) ScopeStatementId);
impl ScopeAstIdNode for ast::StmtImportFrom {
type Id = ScopeImportFromId;
fn scope_ast_id(&self, db: &dyn Db, file: VfsFile, file_scope: FileScopeId) -> Self::Id {
let scope = file_scope.to_scope_id(db, file);
let ast_ids = ast_ids(db, scope);
ScopeImportFromId(ast_ids.statement_id(self))
}
fn lookup_in_scope(db: &dyn Db, file: VfsFile, scope: FileScopeId, id: Self::Id) -> &Self {
let statement = ast::Stmt::lookup_in_scope(db, file, scope, id.0);
statement.as_import_from_stmt().unwrap()
}
}
#[derive(Debug, Eq, PartialEq, Copy, Clone, Hash)]
pub struct ScopeNamedExprId(pub(super) ScopeExpressionId);
impl ScopeAstIdNode for ast::ExprNamed {
type Id = ScopeNamedExprId;
fn scope_ast_id(&self, db: &dyn Db, file: VfsFile, file_scope: FileScopeId) -> Self::Id {
let scope = file_scope.to_scope_id(db, file);
let ast_ids = ast_ids(db, scope);
ScopeNamedExprId(ast_ids.expression_id(self))
}
fn lookup_in_scope(db: &dyn Db, file: VfsFile, scope: FileScopeId, id: Self::Id) -> &Self
where
Self: Sized,
{
let expression = ast::Expr::lookup_in_scope(db, file, scope, id.0);
expression.as_named_expr().unwrap()
ast_ids.expression_id(*self)
}
}
#[derive(Debug)]
pub(super) struct AstIdsBuilder {
expressions: IndexVec<ScopeExpressionId, AstNodeRef<ast::Expr>>,
expressions_map: FxHashMap<NodeKey, ScopeExpressionId>,
statements: IndexVec<ScopeStatementId, AstNodeRef<ast::Stmt>>,
statements_map: FxHashMap<NodeKey, ScopeStatementId>,
expressions_map: FxHashMap<ExpressionNodeKey, ScopedExpressionId>,
uses_map: FxHashMap<ExpressionNodeKey, ScopedUseId>,
}
impl AstIdsBuilder {
pub(super) fn new() -> Self {
Self {
expressions: IndexVec::default(),
expressions_map: FxHashMap::default(),
statements: IndexVec::default(),
statements_map: FxHashMap::default(),
uses_map: FxHashMap::default(),
}
}
/// Adds `stmt` to the AST ids map and returns its id.
///
/// ## Safety
/// The function is marked as unsafe because it calls [`AstNodeRef::new`] which requires
/// that `stmt` is a child of `parsed`.
#[allow(unsafe_code)]
pub(super) unsafe fn record_statement(
&mut self,
stmt: &ast::Stmt,
parsed: &ParsedModule,
) -> ScopeStatementId {
let statement_id = self.statements.push(AstNodeRef::new(parsed.clone(), stmt));
/// Adds `expr` to the expression ids map and returns its id.
pub(super) fn record_expression(&mut self, expr: &ast::Expr) -> ScopedExpressionId {
let expression_id = self.expressions_map.len().into();
self.statements_map
.insert(NodeKey::from_node(stmt), statement_id);
statement_id
}
/// Adds `expr` to the AST ids map and returns its id.
///
/// ## Safety
/// The function is marked as unsafe because it calls [`AstNodeRef::new`] which requires
/// that `expr` is a child of `parsed`.
#[allow(unsafe_code)]
pub(super) unsafe fn record_expression(
&mut self,
expr: &ast::Expr,
parsed: &ParsedModule,
) -> ScopeExpressionId {
let expression_id = self.expressions.push(AstNodeRef::new(parsed.clone(), expr));
self.expressions_map
.insert(NodeKey::from_node(expr), expression_id);
self.expressions_map.insert(expr.into(), expression_id);
expression_id
}
/// Adds `expr` to the use ids map and returns its id.
pub(super) fn record_use(&mut self, expr: &ast::Expr) -> ScopedUseId {
let use_id = self.uses_map.len().into();
self.uses_map.insert(expr.into(), use_id);
use_id
}
pub(super) fn finish(mut self) -> AstIds {
self.expressions.shrink_to_fit();
self.expressions_map.shrink_to_fit();
self.statements.shrink_to_fit();
self.statements_map.shrink_to_fit();
self.uses_map.shrink_to_fit();
AstIds {
expressions: self.expressions,
expressions_map: self.expressions_map,
statements: self.statements,
statements_map: self.statements_map,
uses_map: self.uses_map,
}
}
}
/// Node key that can only be constructed for expressions.
pub(crate) mod node_key {
use ruff_python_ast as ast;
use crate::node_key::NodeKey;
#[derive(Copy, Clone, Eq, PartialEq, Hash, Debug)]
pub(crate) struct ExpressionNodeKey(NodeKey);
impl From<ast::ExpressionRef<'_>> for ExpressionNodeKey {
fn from(value: ast::ExpressionRef<'_>) -> Self {
Self(NodeKey::from_node(value))
}
}
impl From<&ast::Expr> for ExpressionNodeKey {
fn from(value: &ast::Expr) -> Self {
Self(NodeKey::from_node(value))
}
}
}

View File

@@ -2,59 +2,75 @@ use std::sync::Arc;
use rustc_hash::FxHashMap;
use ruff_db::files::File;
use ruff_db::parsed::ParsedModule;
use ruff_index::IndexVec;
use ruff_python_ast as ast;
use ruff_python_ast::name::Name;
use ruff_python_ast::visitor::{walk_expr, walk_stmt, Visitor};
use crate::name::Name;
use crate::node_key::NodeKey;
use crate::semantic_index::ast_ids::{
AstId, AstIdsBuilder, ScopeAssignmentId, ScopeClassId, ScopeFunctionId, ScopeImportFromId,
ScopeImportId, ScopeNamedExprId,
use crate::ast_node_ref::AstNodeRef;
use crate::semantic_index::ast_ids::node_key::ExpressionNodeKey;
use crate::semantic_index::ast_ids::AstIdsBuilder;
use crate::semantic_index::definition::{
AssignmentDefinitionNodeRef, Definition, DefinitionNodeKey, DefinitionNodeRef,
ImportFromDefinitionNodeRef,
};
use crate::semantic_index::definition::{Definition, ImportDefinition, ImportFromDefinition};
use crate::semantic_index::expression::Expression;
use crate::semantic_index::symbol::{
FileScopeId, FileSymbolId, Scope, ScopedSymbolId, SymbolFlags, SymbolTableBuilder,
FileScopeId, NodeWithScopeKey, NodeWithScopeRef, Scope, ScopeId, ScopedSymbolId, SymbolFlags,
SymbolTableBuilder,
};
use crate::semantic_index::{NodeWithScopeId, SemanticIndex};
use crate::semantic_index::use_def::{FlowSnapshot, UseDefMapBuilder};
use crate::semantic_index::SemanticIndex;
use crate::Db;
pub(super) struct SemanticIndexBuilder<'a> {
pub(super) struct SemanticIndexBuilder<'db> {
// Builder state
module: &'a ParsedModule,
db: &'db dyn Db,
file: File,
module: &'db ParsedModule,
scope_stack: Vec<FileScopeId>,
/// the definition whose target(s) we are currently walking
current_definition: Option<Definition>,
/// The assignment we're currently visiting.
current_assignment: Option<CurrentAssignment<'db>>,
/// Flow states at each `break` in the current loop.
loop_break_states: Vec<FlowSnapshot>,
// Semantic Index fields
scopes: IndexVec<FileScopeId, Scope>,
scope_ids_by_scope: IndexVec<FileScopeId, ScopeId<'db>>,
symbol_tables: IndexVec<FileScopeId, SymbolTableBuilder>,
ast_ids: IndexVec<FileScopeId, AstIdsBuilder>,
expression_scopes: FxHashMap<NodeKey, FileScopeId>,
scope_nodes: IndexVec<FileScopeId, NodeWithScopeId>,
use_def_maps: IndexVec<FileScopeId, UseDefMapBuilder<'db>>,
scopes_by_node: FxHashMap<NodeWithScopeKey, FileScopeId>,
scopes_by_expression: FxHashMap<ExpressionNodeKey, FileScopeId>,
definitions_by_node: FxHashMap<DefinitionNodeKey, Definition<'db>>,
expressions_by_node: FxHashMap<ExpressionNodeKey, Expression<'db>>,
}
impl<'a> SemanticIndexBuilder<'a> {
pub(super) fn new(parsed: &'a ParsedModule) -> Self {
impl<'db> SemanticIndexBuilder<'db> {
pub(super) fn new(db: &'db dyn Db, file: File, parsed: &'db ParsedModule) -> Self {
let mut builder = Self {
db,
file,
module: parsed,
scope_stack: Vec::new(),
current_definition: None,
current_assignment: None,
loop_break_states: vec![],
scopes: IndexVec::new(),
symbol_tables: IndexVec::new(),
ast_ids: IndexVec::new(),
expression_scopes: FxHashMap::default(),
scope_nodes: IndexVec::new(),
scope_ids_by_scope: IndexVec::new(),
use_def_maps: IndexVec::new(),
scopes_by_expression: FxHashMap::default(),
scopes_by_node: FxHashMap::default(),
definitions_by_node: FxHashMap::default(),
expressions_by_node: FxHashMap::default(),
};
builder.push_scope_with_parent(
NodeWithScopeId::Module,
&Name::new_static("<module>"),
None,
None,
None,
);
builder.push_scope_with_parent(NodeWithScopeRef::Module, None);
builder
}
@@ -66,44 +82,41 @@ impl<'a> SemanticIndexBuilder<'a> {
.expect("Always to have a root scope")
}
fn push_scope(
&mut self,
node: NodeWithScopeId,
name: &Name,
defining_symbol: Option<FileSymbolId>,
definition: Option<Definition>,
) {
fn push_scope(&mut self, node: NodeWithScopeRef) {
let parent = self.current_scope();
self.push_scope_with_parent(node, name, defining_symbol, definition, Some(parent));
self.push_scope_with_parent(node, Some(parent));
}
fn push_scope_with_parent(
&mut self,
node: NodeWithScopeId,
name: &Name,
defining_symbol: Option<FileSymbolId>,
definition: Option<Definition>,
parent: Option<FileScopeId>,
) {
fn push_scope_with_parent(&mut self, node: NodeWithScopeRef, parent: Option<FileScopeId>) {
let children_start = self.scopes.next_index() + 1;
let scope = Scope {
name: name.clone(),
parent,
defining_symbol,
definition,
kind: node.scope_kind(),
descendents: children_start..children_start,
};
let scope_id = self.scopes.push(scope);
let file_scope_id = self.scopes.push(scope);
self.symbol_tables.push(SymbolTableBuilder::new());
self.use_def_maps.push(UseDefMapBuilder::new());
let ast_id_scope = self.ast_ids.push(AstIdsBuilder::new());
let scope_node_id = self.scope_nodes.push(node);
debug_assert_eq!(ast_id_scope, scope_id);
debug_assert_eq!(scope_id, scope_node_id);
self.scope_stack.push(scope_id);
#[allow(unsafe_code)]
// SAFETY: `node` is guaranteed to be a child of `self.module`
let scope_id = ScopeId::new(
self.db,
self.file,
file_scope_id,
unsafe { node.to_kind(self.module.clone()) },
countme::Count::default(),
);
self.scope_ids_by_scope.push(scope_id);
self.scopes_by_node.insert(node.node_key(), file_scope_id);
debug_assert_eq!(ast_id_scope, file_scope_id);
self.scope_stack.push(file_scope_id);
}
fn pop_scope(&mut self) -> FileScopeId {
@@ -119,56 +132,120 @@ impl<'a> SemanticIndexBuilder<'a> {
&mut self.symbol_tables[scope_id]
}
fn current_use_def_map_mut(&mut self) -> &mut UseDefMapBuilder<'db> {
let scope_id = self.current_scope();
&mut self.use_def_maps[scope_id]
}
fn current_use_def_map(&self) -> &UseDefMapBuilder<'db> {
let scope_id = self.current_scope();
&self.use_def_maps[scope_id]
}
fn current_ast_ids(&mut self) -> &mut AstIdsBuilder {
let scope_id = self.current_scope();
&mut self.ast_ids[scope_id]
}
fn add_or_update_symbol(&mut self, name: Name, flags: SymbolFlags) -> ScopedSymbolId {
let symbol_table = self.current_symbol_table();
symbol_table.add_or_update_symbol(name, flags, None)
fn flow_snapshot(&self) -> FlowSnapshot {
self.current_use_def_map().snapshot()
}
fn add_or_update_symbol_with_definition(
&mut self,
name: Name,
fn flow_restore(&mut self, state: FlowSnapshot) {
self.current_use_def_map_mut().restore(state);
}
definition: Definition,
) -> ScopedSymbolId {
fn flow_merge(&mut self, state: &FlowSnapshot) {
self.current_use_def_map_mut().merge(state);
}
fn add_or_update_symbol(&mut self, name: Name, flags: SymbolFlags) -> ScopedSymbolId {
let symbol_table = self.current_symbol_table();
let (symbol_id, added) = symbol_table.add_or_update_symbol(name, flags);
if added {
let use_def_map = self.current_use_def_map_mut();
use_def_map.add_symbol(symbol_id);
}
symbol_id
}
symbol_table.add_or_update_symbol(name, SymbolFlags::IS_DEFINED, Some(definition))
fn add_definition<'a>(
&mut self,
symbol: ScopedSymbolId,
definition_node: impl Into<DefinitionNodeRef<'a>>,
) -> Definition<'db> {
let definition_node = definition_node.into();
let definition = Definition::new(
self.db,
self.file,
self.current_scope(),
symbol,
#[allow(unsafe_code)]
unsafe {
definition_node.into_owned(self.module.clone())
},
countme::Count::default(),
);
self.definitions_by_node
.insert(definition_node.key(), definition);
self.current_use_def_map_mut()
.record_definition(symbol, definition);
definition
}
/// Record an expression that needs to be a Salsa ingredient, because we need to infer its type
/// standalone (type narrowing tests, RHS of an assignment.)
fn add_standalone_expression(&mut self, expression_node: &ast::Expr) {
let expression = Expression::new(
self.db,
self.file,
self.current_scope(),
#[allow(unsafe_code)]
unsafe {
AstNodeRef::new(self.module.clone(), expression_node)
},
countme::Count::default(),
);
self.expressions_by_node
.insert(expression_node.into(), expression);
}
fn with_type_params(
&mut self,
name: &Name,
with_params: &WithTypeParams,
defining_symbol: FileSymbolId,
with_scope: NodeWithScopeRef,
type_params: Option<&'db ast::TypeParams>,
nested: impl FnOnce(&mut Self) -> FileScopeId,
) -> FileScopeId {
let type_params = with_params.type_parameters();
if let Some(type_params) = type_params {
let type_node = match with_params {
WithTypeParams::ClassDef { id, .. } => NodeWithScopeId::ClassTypeParams(*id),
WithTypeParams::FunctionDef { id, .. } => NodeWithScopeId::FunctionTypeParams(*id),
};
self.push_scope(with_scope);
self.push_scope(
type_node,
name,
Some(defining_symbol),
Some(with_params.definition()),
);
for type_param in &type_params.type_params {
let name = match type_param {
ast::TypeParam::TypeVar(ast::TypeParamTypeVar { name, .. }) => name,
ast::TypeParam::ParamSpec(ast::TypeParamParamSpec { name, .. }) => name,
ast::TypeParam::TypeVarTuple(ast::TypeParamTypeVarTuple { name, .. }) => name,
let (name, bound, default) = match type_param {
ast::TypeParam::TypeVar(ast::TypeParamTypeVar {
range: _,
name,
bound,
default,
}) => (name, bound, default),
ast::TypeParam::ParamSpec(ast::TypeParamParamSpec {
name, default, ..
}) => (name, &None, default),
ast::TypeParam::TypeVarTuple(ast::TypeParamTypeVarTuple {
name,
default,
..
}) => (name, &None, default),
};
self.add_or_update_symbol(Name::new(name), SymbolFlags::IS_DEFINED);
// TODO create Definition for typevars
self.add_or_update_symbol(name.id.clone(), SymbolFlags::IS_DEFINED);
if let Some(bound) = bound {
self.visit_expr(bound);
}
if let Some(default) = default {
self.visit_expr(default);
}
}
}
@@ -181,7 +258,7 @@ impl<'a> SemanticIndexBuilder<'a> {
nested_scope
}
pub(super) fn build(mut self) -> SemanticIndex {
pub(super) fn build(mut self) -> SemanticIndex<'db> {
let module = self.module;
self.visit_body(module.suite());
@@ -189,7 +266,7 @@ impl<'a> SemanticIndexBuilder<'a> {
self.pop_scope();
assert!(self.scope_stack.is_empty());
assert!(self.current_definition.is_none());
assert!(self.current_assignment.is_none());
let mut symbol_tables: IndexVec<_, _> = self
.symbol_tables
@@ -197,6 +274,12 @@ impl<'a> SemanticIndexBuilder<'a> {
.map(|builder| Arc::new(builder.finish()))
.collect();
let mut use_def_maps: IndexVec<_, _> = self
.use_def_maps
.into_iter()
.map(|builder| Arc::new(builder.finish()))
.collect();
let mut ast_ids: IndexVec<_, _> = self
.ast_ids
.into_iter()
@@ -204,63 +287,54 @@ impl<'a> SemanticIndexBuilder<'a> {
.collect();
self.scopes.shrink_to_fit();
ast_ids.shrink_to_fit();
symbol_tables.shrink_to_fit();
self.expression_scopes.shrink_to_fit();
self.scope_nodes.shrink_to_fit();
use_def_maps.shrink_to_fit();
ast_ids.shrink_to_fit();
self.scopes_by_expression.shrink_to_fit();
self.definitions_by_node.shrink_to_fit();
self.scope_ids_by_scope.shrink_to_fit();
self.scopes_by_node.shrink_to_fit();
SemanticIndex {
symbol_tables,
scopes: self.scopes,
scope_nodes: self.scope_nodes,
definitions_by_node: self.definitions_by_node,
expressions_by_node: self.expressions_by_node,
scope_ids_by_scope: self.scope_ids_by_scope,
ast_ids,
expression_scopes: self.expression_scopes,
scopes_by_expression: self.scopes_by_expression,
scopes_by_node: self.scopes_by_node,
use_def_maps,
}
}
}
impl Visitor<'_> for SemanticIndexBuilder<'_> {
fn visit_stmt(&mut self, stmt: &ast::Stmt) {
let module = self.module;
#[allow(unsafe_code)]
let statement_id = unsafe {
// SAFETY: The builder only visits nodes that are part of `module`. This guarantees that
// the current statement must be a child of `module`.
self.current_ast_ids().record_statement(stmt, module)
};
impl<'db, 'ast> Visitor<'ast> for SemanticIndexBuilder<'db>
where
'ast: 'db,
{
fn visit_stmt(&mut self, stmt: &'ast ast::Stmt) {
match stmt {
ast::Stmt::FunctionDef(function_def) => {
for decorator in &function_def.decorator_list {
self.visit_decorator(decorator);
}
let name = Name::new(&function_def.name.id);
let function_id = ScopeFunctionId(statement_id);
let definition = Definition::FunctionDef(function_id);
let scope = self.current_scope();
let symbol = FileSymbolId::new(
scope,
self.add_or_update_symbol_with_definition(name.clone(), definition),
);
let symbol = self
.add_or_update_symbol(function_def.name.id.clone(), SymbolFlags::IS_DEFINED);
self.add_definition(symbol, function_def);
self.with_type_params(
&name,
&WithTypeParams::FunctionDef {
node: function_def,
id: AstId::new(scope, function_id),
},
symbol,
NodeWithScopeRef::FunctionTypeParameters(function_def),
function_def.type_params.as_deref(),
|builder| {
builder.visit_parameters(&function_def.parameters);
for expr in &function_def.returns {
if let Some(expr) = &function_def.returns {
builder.visit_annotation(expr);
}
builder.push_scope(
NodeWithScopeId::Function(AstId::new(scope, function_id)),
&name,
Some(symbol),
Some(definition),
);
builder.push_scope(NodeWithScopeRef::Function(function_def));
builder.visit_body(&function_def.body);
builder.pop_scope()
},
@@ -271,81 +345,124 @@ impl Visitor<'_> for SemanticIndexBuilder<'_> {
self.visit_decorator(decorator);
}
let name = Name::new(&class.name.id);
let class_id = ScopeClassId(statement_id);
let definition = Definition::from(class_id);
let scope = self.current_scope();
let id = FileSymbolId::new(
self.current_scope(),
self.add_or_update_symbol_with_definition(name.clone(), definition),
);
let symbol =
self.add_or_update_symbol(class.name.id.clone(), SymbolFlags::IS_DEFINED);
self.add_definition(symbol, class);
self.with_type_params(
&name,
&WithTypeParams::ClassDef {
node: class,
id: AstId::new(scope, class_id),
},
id,
NodeWithScopeRef::ClassTypeParameters(class),
class.type_params.as_deref(),
|builder| {
if let Some(arguments) = &class.arguments {
builder.visit_arguments(arguments);
}
builder.push_scope(
NodeWithScopeId::Class(AstId::new(scope, class_id)),
&name,
Some(id),
Some(definition),
);
builder.push_scope(NodeWithScopeRef::Class(class));
builder.visit_body(&class.body);
builder.pop_scope()
},
);
}
ast::Stmt::Import(ast::StmtImport { names, .. }) => {
for (i, alias) in names.iter().enumerate() {
ast::Stmt::Import(node) => {
for alias in &node.names {
let symbol_name = if let Some(asname) = &alias.asname {
asname.id.as_str()
asname.id.clone()
} else {
alias.name.id.split('.').next().unwrap()
Name::new(alias.name.id.split('.').next().unwrap())
};
let def = Definition::Import(ImportDefinition {
import_id: ScopeImportId(statement_id),
alias: u32::try_from(i).unwrap(),
});
self.add_or_update_symbol_with_definition(Name::new(symbol_name), def);
let symbol = self.add_or_update_symbol(symbol_name, SymbolFlags::IS_DEFINED);
self.add_definition(symbol, alias);
}
}
ast::Stmt::ImportFrom(ast::StmtImportFrom {
module: _,
names,
level: _,
..
}) => {
for (i, alias) in names.iter().enumerate() {
ast::Stmt::ImportFrom(node) => {
for (alias_index, alias) in node.names.iter().enumerate() {
let symbol_name = if let Some(asname) = &alias.asname {
asname.id.as_str()
&asname.id
} else {
alias.name.id.as_str()
&alias.name.id
};
let def = Definition::ImportFrom(ImportFromDefinition {
import_id: ScopeImportFromId(statement_id),
name: u32::try_from(i).unwrap(),
});
self.add_or_update_symbol_with_definition(Name::new(symbol_name), def);
let symbol =
self.add_or_update_symbol(symbol_name.clone(), SymbolFlags::IS_DEFINED);
self.add_definition(symbol, ImportFromDefinitionNodeRef { node, alias_index });
}
}
ast::Stmt::Assign(node) => {
debug_assert!(self.current_definition.is_none());
debug_assert!(self.current_assignment.is_none());
self.visit_expr(&node.value);
self.current_definition =
Some(Definition::Assignment(ScopeAssignmentId(statement_id)));
self.add_standalone_expression(&node.value);
self.current_assignment = Some(node.into());
for target in &node.targets {
self.visit_expr(target);
}
self.current_definition = None;
self.current_assignment = None;
}
ast::Stmt::AnnAssign(node) => {
debug_assert!(self.current_assignment.is_none());
// TODO deferred annotation visiting
self.visit_expr(&node.annotation);
if let Some(value) = &node.value {
self.visit_expr(value);
}
self.current_assignment = Some(node.into());
self.visit_expr(&node.target);
self.current_assignment = None;
}
ast::Stmt::If(node) => {
self.visit_expr(&node.test);
let pre_if = self.flow_snapshot();
self.visit_body(&node.body);
let mut post_clauses: Vec<FlowSnapshot> = vec![];
for clause in &node.elif_else_clauses {
// snapshot after every block except the last; the last one will just become
// the state that we merge the other snapshots into
post_clauses.push(self.flow_snapshot());
// we can only take an elif/else branch if none of the previous ones were
// taken, so the block entry state is always `pre_if`
self.flow_restore(pre_if.clone());
self.visit_elif_else_clause(clause);
}
for post_clause_state in post_clauses {
self.flow_merge(&post_clause_state);
}
let has_else = node
.elif_else_clauses
.last()
.is_some_and(|clause| clause.test.is_none());
if !has_else {
// if there's no else clause, then it's possible we took none of the branches,
// and the pre_if state can reach here
self.flow_merge(&pre_if);
}
}
ast::Stmt::While(node) => {
self.visit_expr(&node.test);
let pre_loop = self.flow_snapshot();
// Save aside any break states from an outer loop
let saved_break_states = std::mem::take(&mut self.loop_break_states);
self.visit_body(&node.body);
// Get the break states from the body of this loop, and restore the saved outer
// ones.
let break_states =
std::mem::replace(&mut self.loop_break_states, saved_break_states);
// We may execute the `else` clause without ever executing the body, so merge in
// the pre-loop state before visiting `else`.
self.flow_merge(&pre_loop);
self.visit_body(&node.orelse);
// Breaking out of a while loop bypasses the `else` clause, so merge in the break
// states after visiting `else`.
for break_state in break_states {
self.flow_merge(&break_state);
}
}
ast::Stmt::Break(_) => {
self.loop_break_states.push(self.flow_snapshot());
}
_ => {
walk_stmt(self, stmt);
@@ -353,71 +470,78 @@ impl Visitor<'_> for SemanticIndexBuilder<'_> {
}
}
fn visit_expr(&mut self, expr: &'_ ast::Expr) {
let module = self.module;
#[allow(unsafe_code)]
let expression_id = unsafe {
// SAFETY: The builder only visits nodes that are part of `module`. This guarantees that
// the current expression must be a child of `module`.
self.current_ast_ids().record_expression(expr, module)
};
self.expression_scopes
.insert(NodeKey::from_node(expr), self.current_scope());
fn visit_expr(&mut self, expr: &'ast ast::Expr) {
self.scopes_by_expression
.insert(expr.into(), self.current_scope());
self.current_ast_ids().record_expression(expr);
match expr {
ast::Expr::Name(ast::ExprName { id, ctx, .. }) => {
ast::Expr::Name(name_node) => {
let ast::ExprName { id, ctx, .. } = name_node;
let flags = match ctx {
ast::ExprContext::Load => SymbolFlags::IS_USED,
ast::ExprContext::Store => SymbolFlags::IS_DEFINED,
ast::ExprContext::Del => SymbolFlags::IS_DEFINED,
ast::ExprContext::Invalid => SymbolFlags::empty(),
};
match self.current_definition {
Some(definition) if flags.contains(SymbolFlags::IS_DEFINED) => {
self.add_or_update_symbol_with_definition(Name::new(id), definition);
}
_ => {
self.add_or_update_symbol(Name::new(id), flags);
let symbol = self.add_or_update_symbol(id.clone(), flags);
if flags.contains(SymbolFlags::IS_DEFINED) {
match self.current_assignment {
Some(CurrentAssignment::Assign(assignment)) => {
self.add_definition(
symbol,
AssignmentDefinitionNodeRef {
assignment,
target: name_node,
},
);
}
Some(CurrentAssignment::AnnAssign(ann_assign)) => {
self.add_definition(symbol, ann_assign);
}
Some(CurrentAssignment::Named(named)) => {
self.add_definition(symbol, named);
}
None => {}
}
}
if flags.contains(SymbolFlags::IS_USED) {
let use_id = self.current_ast_ids().record_use(expr);
self.current_use_def_map_mut().record_use(symbol, use_id);
}
walk_expr(self, expr);
}
ast::Expr::Named(node) => {
debug_assert!(self.current_definition.is_none());
self.current_definition =
Some(Definition::NamedExpr(ScopeNamedExprId(expression_id)));
debug_assert!(self.current_assignment.is_none());
self.current_assignment = Some(node.into());
// TODO walrus in comprehensions is implicitly nonlocal
self.visit_expr(&node.target);
self.current_definition = None;
self.current_assignment = None;
self.visit_expr(&node.value);
}
ast::Expr::Lambda(lambda) => {
if let Some(parameters) = &lambda.parameters {
self.visit_parameters(parameters);
}
self.push_scope(NodeWithScopeRef::Lambda(lambda));
self.visit_expr(lambda.body.as_ref());
self.pop_scope();
}
ast::Expr::If(ast::ExprIf {
body, test, orelse, ..
}) => {
// TODO detect statically known truthy or falsy test (via type inference, not naive
// AST inspection, so we can't simplify here, need to record test expression in CFG
// for later checking)
// AST inspection, so we can't simplify here, need to record test expression for
// later checking)
self.visit_expr(test);
// let if_branch = self.flow_graph_builder.add_branch(self.current_flow_node());
// self.set_current_flow_node(if_branch);
// self.insert_constraint(test);
let pre_if = self.flow_snapshot();
self.visit_expr(body);
// let post_body = self.current_flow_node();
// self.set_current_flow_node(if_branch);
let post_body = self.flow_snapshot();
self.flow_restore(pre_if);
self.visit_expr(orelse);
// let post_else = self
// .flow_graph_builder
// .add_phi(self.current_flow_node(), post_body);
// self.set_current_flow_node(post_else);
self.flow_merge(&post_body);
}
_ => {
walk_expr(self, expr);
@@ -426,29 +550,27 @@ impl Visitor<'_> for SemanticIndexBuilder<'_> {
}
}
enum WithTypeParams<'a> {
ClassDef {
node: &'a ast::StmtClassDef,
id: AstId<ScopeClassId>,
},
FunctionDef {
node: &'a ast::StmtFunctionDef,
id: AstId<ScopeFunctionId>,
},
#[derive(Copy, Clone, Debug)]
enum CurrentAssignment<'a> {
Assign(&'a ast::StmtAssign),
AnnAssign(&'a ast::StmtAnnAssign),
Named(&'a ast::ExprNamed),
}
impl<'a> WithTypeParams<'a> {
fn type_parameters(&self) -> Option<&'a ast::TypeParams> {
match self {
WithTypeParams::ClassDef { node, .. } => node.type_params.as_deref(),
WithTypeParams::FunctionDef { node, .. } => node.type_params.as_deref(),
}
}
fn definition(&self) -> Definition {
match self {
WithTypeParams::ClassDef { id, .. } => Definition::ClassDef(id.in_scope_id()),
WithTypeParams::FunctionDef { id, .. } => Definition::FunctionDef(id.in_scope_id()),
}
impl<'a> From<&'a ast::StmtAssign> for CurrentAssignment<'a> {
fn from(value: &'a ast::StmtAssign) -> Self {
Self::Assign(value)
}
}
impl<'a> From<&'a ast::StmtAnnAssign> for CurrentAssignment<'a> {
fn from(value: &'a ast::StmtAnnAssign) -> Self {
Self::AnnAssign(value)
}
}
impl<'a> From<&'a ast::ExprNamed> for CurrentAssignment<'a> {
fn from(value: &'a ast::ExprNamed) -> Self {
Self::Named(value)
}
}

Some files were not shown because too many files have changed in this diff Show More