Compare commits

...

78 Commits

Author SHA1 Message Date
Micha Reiser
98f29bdcec Discard changes to crates/ruff_python_formatter/src/lib.rs 2023-10-27 17:09:07 -05:00
Micha Reiser
215b1c9f25 Split tuples in return positions by comma first 2023-10-27 17:09:07 -05:00
Zanie
88bf2ac43f Lint 2023-10-27 15:48:21 -05:00
Zanie
16e511a734 Use FormatOptions; exclude file from demisto/content with syntax error 2023-10-27 15:46:33 -05:00
Zanie
7d5122603e Show a different summary if there are only errors 2023-10-27 15:39:49 -05:00
Zanie
7d4b59d6cb Merge branch 'main' into zanie/ecosystem-format 2023-10-27 15:16:20 -05:00
Zanie
ee1883ac6c Fix missing space 2023-10-27 13:28:09 -05:00
Zanie
0c1a7ffbdc Fix chmod 2023-10-27 13:27:40 -05:00
Zanie
1337c4ad2a Add docstrings for formatting 2023-10-27 12:49:29 -05:00
Zanie
c3354ad843 Lint 2023-10-27 12:48:14 -05:00
Zanie
b6066c6bef Fix missing newline 2023-10-27 12:41:17 -05:00
Zanie
eb769ac03d Improve handling of ruff executable lookup 2023-10-27 12:40:42 -05:00
Zanie
39c04c8335 Fix spacing in examples 2023-10-27 12:30:49 -05:00
Zanie
5f28912a0d Remove format ignore lines re 2023-10-27 12:29:36 -05:00
Zanie
05ab316609 Rename cache variable; update clone type 2023-10-27 12:29:03 -05:00
Zanie
a16c04c6db Fix limited_parallelism type annotation 2023-10-27 12:25:55 -05:00
Zanie
2a14b94995 Explicitly cast checkout directory to string 2023-10-27 12:23:51 -05:00
Zanie
c4bddb88c9 Use cls in DiagnosticLine.try_from_string 2023-10-27 12:23:04 -05:00
Zanie
8441db69ae Fix total_changes_by_rule type annotation 2023-10-27 12:22:25 -05:00
Zanie
e304d46637 Add mutability note for RuleChanges 2023-10-27 12:21:03 -05:00
Zanie
23050b3653 Guard against dataclass types in jsonable implementation 2023-10-27 12:15:59 -05:00
Zanie
54bc49303f Add deprecation message to old ecosystem script 2023-10-27 12:10:06 -05:00
Zanie
63f1122a5a Run CI workflow when its changed 2023-10-27 12:08:50 -05:00
Zanie
d5127842d7 Undo changes to CI 2023-10-27 12:08:50 -05:00
Zanie Blue
7dc97fd358 Improve readme description.
Co-authored-by: konsti <konstin@mailbox.org>
2023-10-27 12:08:50 -05:00
Zanie
3936bc119a Drop coloring 2023-10-27 12:08:50 -05:00
Zanie
a1bec32a7b Fix error rendering 2023-10-27 12:08:50 -05:00
Zanie
6f593cac68 in files -> across files 2023-10-27 12:08:50 -05:00
Zanie
823ec4cf88 Strip all newlines from error reports 2023-10-27 12:08:50 -05:00
Zanie
087d9c705e Test adding color to diff; fix newline in error report 2023-10-27 12:08:50 -05:00
Zanie
06f71f5235 Remove comma for consistency 2023-10-27 12:08:50 -05:00
Zanie
2270485c09 Display errors in code fences 2023-10-27 12:08:50 -05:00
Zanie
1bbe88f0a0 Fix project truncation 2023-10-27 12:08:50 -05:00
Zanie
e0def9ef95 Truncate more aggressively 2023-10-27 12:08:50 -05:00
Zanie
85de1e97e2 Add completed project count to summary title 2023-10-27 12:08:50 -05:00
Zanie
9cdf864043 Enable all projects again 2023-10-27 12:08:50 -05:00
Zanie
9a8bd09ae6 Fix table 2023-10-27 12:08:50 -05:00
Zanie
2e36781a8a Add more truncation? 2023-10-27 12:08:50 -05:00
Zanie
615e56906e Fix added / removed counts for format 2023-10-27 12:08:50 -05:00
Zanie
ce290076b8 Restore display of fixes 2023-10-27 12:08:50 -05:00
Zanie
1f6b28c29c Fix table 2023-10-27 12:08:50 -05:00
Zanie
d4e9418825 Fix calculation to ignore fixes 2023-10-27 12:08:50 -05:00
Zanie
89f9803f4a Restructuring parsing 2023-10-27 12:08:50 -05:00
Zanie
250b226f16 Restore changelog 2023-10-27 12:08:50 -05:00
Zanie
25bda80e48 Improve limits per rule code 2023-10-27 12:08:50 -05:00
Zanie
88e3663dbe Fix affected rules total 2023-10-27 12:08:50 -05:00
Zanie
5a055aadb0 Fix titles 2023-10-27 12:08:50 -05:00
Zanie
333aedc903 Refactor fixable detection 2023-10-27 12:08:50 -05:00
Zanie
6025081825 Add messy fixable detection to reduce rule changes 2023-10-27 12:08:44 -05:00
Zanie
c806b4a35a Sort imports 2023-10-27 12:07:51 -05:00
Zanie
1c820b0200 Clean up Python implementation; enable all projects 2023-10-27 12:07:51 -05:00
Zanie
1f4e87f043 Use set instead of sorted in RuleChanges.from_diff 2023-10-27 12:07:51 -05:00
Zanie
db5dfd4eff Loosen linked ranges for format diffs since they are not correct 2023-10-27 12:07:51 -05:00
Zanie
c2cae71247 Improve rule table title 2023-10-27 12:07:51 -05:00
Zanie
11e99655b8 Fix formatting of command 2023-10-27 12:07:51 -05:00
Zanie
8898906ef4 Update options display 2023-10-27 12:07:51 -05:00
Zanie
9c63d80257 Change table title 2023-10-27 12:07:51 -05:00
Zanie
d6e35f72a9 Move rule change table into details block 2023-10-27 12:07:51 -05:00
Zanie
c547ec1417 Combine reports into a single artifact again 2023-10-27 12:07:51 -05:00
Zanie
b049c8651a Revert all changes to pr-comment 2023-10-27 12:07:51 -05:00
Zanie
8a48dfe3e3 Continue the battle for if-comment 2023-10-27 12:07:51 -05:00
Zanie
c437a899f8 Fix paths 2023-10-27 12:07:51 -05:00
Zanie
3fd64dac83 Fix file paths 2023-10-27 12:07:51 -05:00
Zanie
28bfab5eff Fix titles 2023-10-27 12:07:51 -05:00
Zanie
f7790698b7 Tweak pull request comment 2023-10-27 12:07:51 -05:00
Zanie
89501451d9 Remove file links (only do line links) 2023-10-27 12:07:51 -05:00
Zanie
368cb3ee95 Add max lines per project to ruff check 2023-10-27 12:07:51 -05:00
Zanie
cbc24a3de2 Use <pre> for check output 2023-10-27 12:07:51 -05:00
Zanie
fc6a3c1c71 Skip more CI checks to speed things up 2023-10-27 12:07:51 -05:00
Zanie
4288d8a6f4 Skip fuzz and bench as well 2023-10-27 12:07:51 -05:00
Zanie
246830c29d Skip tests for now 2023-10-27 12:07:51 -05:00
Zanie
f72c37ff46 Write results separately then combine 2023-10-27 12:07:51 -05:00
Zanie
071c54e89b Add unidiff dep 2023-10-27 12:07:51 -05:00
Zanie
ff3dc646b0 Check against v0.0.292 for debugging 2023-10-27 12:07:51 -05:00
Zanie
d6aa117949 Lint 2023-10-27 12:07:51 -05:00
Zanie
92f935eb2b Update formatter if 2023-10-27 12:07:51 -05:00
Zanie
ab49eaae61 Add formatter ecosystem checks 2023-10-27 12:07:51 -05:00
Zanie
4b79f57872 Refactor ecosystem checks into module 2023-10-27 12:07:51 -05:00
17 changed files with 1672 additions and 10 deletions

View File

@@ -43,6 +43,7 @@ jobs:
- "!crates/ruff_dev/**"
- "!crates/ruff_shrinking/**"
- scripts/*
- .github/workflows/ci.yaml
formatter:
- Cargo.toml
@@ -57,6 +58,7 @@ jobs:
- crates/ruff_python_parser/**
- crates/ruff_dev/**
- scripts/*
- .github/workflows/ci.yaml
cargo-fmt:
name: "cargo fmt"
@@ -182,7 +184,7 @@ jobs:
- cargo-test-linux
- determine_changes
# Only runs on pull requests, since that is the only we way we can find the base version for comparison.
if: github.event_name == 'pull_request' && needs.determine_changes.outputs.linter == 'true'
if: github.event_name == 'pull_request'
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v4
@@ -190,27 +192,48 @@ jobs:
python-version: ${{ env.PYTHON_VERSION }}
- uses: actions/download-artifact@v3
name: Download Ruff binary
name: Download comparison Ruff binary
id: ruff-target
with:
name: ruff
path: target/debug
- uses: dawidd6/action-download-artifact@v2
name: Download base results
name: Download baseline Ruff binary
with:
name: ruff
branch: ${{ github.event.pull_request.base.ref }}
check_artifacts: true
- name: Run ecosystem check
- name: Install ruff-ecosystem
run: |
pip install ./python/ruff-ecosystem
- name: Run `ruff check` ecosystem check
if: ${{ needs.determine_changes.outputs.linter == 'true' }}
run: |
# Make executable, since artifact download doesn't preserve this
chmod +x ruff ${{ steps.ruff-target.outputs.download-path }}/ruff
chmod +x ./ruff ${{ steps.ruff-target.outputs.download-path }}/ruff
scripts/check_ecosystem.py ruff ${{ steps.ruff-target.outputs.download-path }}/ruff | tee ecosystem-result
cat ecosystem-result > $GITHUB_STEP_SUMMARY
ruff-ecosystem check ./ruff ${{ steps.ruff-target.outputs.download-path }}/ruff --cache ./checkouts --output-format markdown | tee ecosystem-result-check
cat ecosystem-result-check > $GITHUB_STEP_SUMMARY
cat ecosystem-result-check > ecosystem-result
echo "" >> ecosystem-result
- name: Run `ruff format` ecosystem check
if: ${{ needs.determine_changes.outputs.formatter == 'true' }}
run: |
# Make executable, since artifact download doesn't preserve this
chmod +x ./ruff ${{ steps.ruff-target.outputs.download-path }}/ruff
ruff-ecosystem format ./ruff ${{ steps.ruff-target.outputs.download-path }}/ruff --cache ./checkouts --output-format markdown | tee ecosystem-result-format
cat ecosystem-result-format > $GITHUB_STEP_SUMMARY
cat ecosystem-result-format >> ecosystem-result
- name: Export pull request number
run: |
echo ${{ github.event.number }} > pr-number
- uses: actions/upload-artifact@v3

View File

@@ -14,3 +14,35 @@ return (
len(node.parents) for node in self.node_map.values()
)
)
# Regression tests for https://github.com/astral-sh/ruff/issues/8042
def f():
return (
self.get_filename() + ".csv" +
"text/csv" +
output.getvalue().encode("utf-8----------------"),
)
def f():
return (
self.get_filename() + ".csv" + "text/csv",
output.getvalue().encode("utf-8----------------")
)
def f():
return (
self.get_filename() + ".csv",
"text/csv",
output.getvalue().encode("utf-8----------------")
)
def f():
return self.get_filename() + ".csv" + "text/csv" + output.getvalue().encode("utf-8----------------"),
def f():
return self.get_filename() + ".csv" + "text/csv", output.getvalue().encode("utf-8----------------")
def f():
return self.get_filename() + ".csv", "text/csv", output.getvalue().encode("utf-8----------------")

View File

@@ -172,10 +172,13 @@ impl FormatNodeRule<ExprTuple> for FormatExprTuple {
.finish()
}
TupleParentheses::Preserve => group(&ExprSequence::new(item)).fmt(f),
TupleParentheses::NeverPreserve | TupleParentheses::OptionalParentheses => {
TupleParentheses::NeverPreserve => {
optional_parentheses(&ExprSequence::new(item)).fmt(f)
}
TupleParentheses::Default => {
TupleParentheses::OptionalParentheses if item.elts.len() == 2 => {
optional_parentheses(&ExprSequence::new(item)).fmt(f)
}
TupleParentheses::Default | TupleParentheses::OptionalParentheses => {
parenthesize_if_expands(&ExprSequence::new(item)).fmt(f)
}
},

View File

@@ -20,6 +20,38 @@ return (
len(node.parents) for node in self.node_map.values()
)
)
# Regression tests for https://github.com/astral-sh/ruff/issues/8042
def f():
return (
self.get_filename() + ".csv" +
"text/csv" +
output.getvalue().encode("utf-8----------------"),
)
def f():
return (
self.get_filename() + ".csv" + "text/csv",
output.getvalue().encode("utf-8----------------")
)
def f():
return (
self.get_filename() + ".csv",
"text/csv",
output.getvalue().encode("utf-8----------------")
)
def f():
return self.get_filename() + ".csv" + "text/csv" + output.getvalue().encode("utf-8----------------"),
def f():
return self.get_filename() + ".csv" + "text/csv", output.getvalue().encode("utf-8----------------")
def f():
return self.get_filename() + ".csv", "text/csv", output.getvalue().encode("utf-8----------------")
```
## Output
@@ -38,6 +70,54 @@ return (
len(self.nodeseeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee),
sum(len(node.parents) for node in self.node_map.values()),
)
# Regression tests for https://github.com/astral-sh/ruff/issues/8042
def f():
return (
self.get_filename()
+ ".csv"
+ "text/csv"
+ output.getvalue().encode("utf-8----------------"),
)
def f():
return (
self.get_filename() + ".csv" + "text/csv",
output.getvalue().encode("utf-8----------------"),
)
def f():
return (
self.get_filename() + ".csv",
"text/csv",
output.getvalue().encode("utf-8----------------"),
)
def f():
return (
self.get_filename()
+ ".csv"
+ "text/csv"
+ output.getvalue().encode("utf-8----------------"),
)
def f():
return self.get_filename() + ".csv" + "text/csv", output.getvalue().encode(
"utf-8----------------"
)
def f():
return (
self.get_filename() + ".csv",
"text/csv",
output.getvalue().encode("utf-8----------------"),
)
```

View File

@@ -0,0 +1,46 @@
# ruff-ecosystem
Compare lint and format results for two different ruff versions (e.g. main and a PR) on real world projects.
## Installation
From the Ruff project root, install with `pip`:
```shell
pip install -e ./python/ruff-ecosystem
```
## Usage
```shell
ruff-ecosystem <check | format> <baseline executable> <comparison executable>
```
Note executable paths may be absolute, relative to the current working directory, or will be looked up in the
current Python environment and PATH.
Run `ruff check` ecosystem checks comparing your debug build to your system Ruff:
```shell
ruff-ecosystem check ruff "./target/debug/ruff"
```
Run `ruff format` ecosystem checks comparing your debug build to your system Ruff:
```shell
ruff-ecosystem format ruff "./target/debug/ruff"
```
## Development
When developing, it can be useful to set the `--pdb` flag to drop into a debugger on failure:
```shell
ruff-ecosystem check ruff "./target/debug/ruff" --pdb
```
You can also provide a path to cache checkouts to speed up repeated runs:
```shell
ruff-ecosystem check ruff "./target/debug/ruff" --cache ./repos
```

View File

@@ -0,0 +1,15 @@
[build-system]
requires = ["hatchling"]
build-backend = "hatchling.build"
[project]
name = "ruff-ecosystem"
version = "0.0.0"
dependencies = ["unidiff==0.7.5"]
[project.scripts]
ruff-ecosystem = "ruff_ecosystem.cli:entrypoint"
[tool.ruff]
extend-select = ["I"]
preview = true

View File

@@ -0,0 +1,3 @@
import logging
logger = logging.getLogger("ruff-ecosystem")

View File

@@ -0,0 +1,8 @@
"""
Enables usage with `python -m ruff_ecosystem`
"""
import ruff_ecosystem.cli
if __name__ == "__main__":
ruff_ecosystem.cli.entrypoint()

View File

@@ -0,0 +1,565 @@
"""
Execution, comparison, and summary of `ruff check` ecosystem checks.
"""
from __future__ import annotations
import asyncio
import dataclasses
import re
import time
from asyncio import create_subprocess_exec
from collections import Counter
from dataclasses import dataclass, field
from pathlib import Path
from subprocess import PIPE
from typing import TYPE_CHECKING, Iterable, Iterator, Self, Sequence
from ruff_ecosystem import logger
from ruff_ecosystem.markdown import (
markdown_details,
markdown_plus_minus,
markdown_project_section,
)
from ruff_ecosystem.types import (
Comparison,
Diff,
Result,
RuffError,
Serializable,
)
if TYPE_CHECKING:
from ruff_ecosystem.projects import ClonedRepository, Project
# Matches lines that are summaries rather than diagnostics
CHECK_SUMMARY_LINE_RE = re.compile(r"^(Found \d+ error.*)|(.* fixable with .*)$")
# Parses a diagnostic line (in a diff) to retrieve path and line number
CHECK_DIFF_LINE_RE = re.compile(
r"^(?P<pre>[+-]) (?P<inner>(?P<path>[^:]+):(?P<lnum>\d+):\d+:) (?P<post>.*)$",
)
CHECK_DIAGNOSTIC_LINE_RE = re.compile(
r"^(?P<diff>[+-])? ?(?P<location>.*): (?P<code>[A-Z]{1,4}[0-9]{3,4})(?P<fixable> \[\*\])? (?P<message>.*)"
)
CHECK_VIOLATION_FIX_INDICATOR = " [*]"
GITHUB_MAX_COMMENT_LENGTH = 65536 # characters
def markdown_check_result(result: Result) -> str:
"""
Render a `ruff check` ecosystem check result as markdown.
"""
# Calculate the total number of rule changes
all_rule_changes = RuleChanges()
project_diffs = {
project: CheckDiff.from_simple_diff(comparison.diff)
for project, comparison in result.completed
}
project_rule_changes: dict[Project, RuleChanges] = {}
for project, diff in project_diffs.items():
project_rule_changes[project] = changes = RuleChanges.from_diff(diff)
all_rule_changes.update(changes)
lines = []
total_removed = all_rule_changes.total_removed_violations()
total_added = all_rule_changes.total_added_violations()
total_added_fixes = all_rule_changes.total_added_fixes()
total_removed_fixes = all_rule_changes.total_removed_fixes()
total_changes = (
total_added + total_removed + total_added_fixes + total_removed_fixes
)
error_count = len(result.errored)
total_affected_rules = len(all_rule_changes.rule_codes())
if total_affected_rules == 0 and error_count == 0:
return "\u2705 ecosystem check detected no linter changes."
# Summarize the total changes
if total_affected_rules == 0:
# Only errors
s = "s" if error_count != 1 else ""
lines.append(
f"\u2139\ufe0f ecosystem check **encountered linter errors**. (no lint changes; {error_count} project error{s})"
)
else:
change_summary = (
f"{markdown_plus_minus(total_added, total_removed)} violations, "
f"{markdown_plus_minus(total_added_fixes, total_removed_fixes)} fixes "
f"in {len(result.completed)} projects"
)
if error_count:
s = "s" if error_count != 1 else ""
change_summary += f"; {error_count} project error{s}"
lines.append(
f"\u2139\ufe0f ecosystem check **detected linter changes**. ({change_summary})"
)
lines.append("")
# Limit the number of items displayed per rule to between 5 and 50
max_display_per_rule = max(
5,
# Calculate the number of affected rules that we would display to increase
# the maximum if there are less rules affected
50 // total_affected_rules,
)
# Display per project changes
for project, comparison in result.completed:
# TODO: This is not a performant way to check the length but the whole
# GitHub comment length issue needs to be addressed anyway
if len(" ".join(lines)) > GITHUB_MAX_COMMENT_LENGTH // 3:
lines.append("")
lines.append(
"_... Truncated remaining completed projected reports due to GitHub comment length restrictions_"
)
lines.append("")
break
if not comparison.diff:
continue # Skip empty diffs
diff = project_diffs[project]
rule_changes = project_rule_changes[project]
project_removed_violations = rule_changes.total_removed_violations()
project_added_violations = rule_changes.total_added_violations()
project_added_fixes = rule_changes.total_added_fixes()
project_removed_fixes = rule_changes.total_removed_fixes()
project_changes = (
project_added_violations
+ project_removed_violations
+ project_added_fixes
+ project_removed_fixes
)
# Limit the number of items displayed per project to between 10 and 50
# based on the number of total changes present in this project
max_display_per_project = max(10, int((project_changes / total_changes) * 50))
# Display the diff
displayed_changes_per_rule = Counter()
displayed_changes = 0
# Wrap with `<pre>` for code-styling with support for links
diff_lines = ["<pre>"]
for line in diff.parsed_lines:
rule_code = line.rule_code
# Limit the number of changes we'll show per rule code
if displayed_changes_per_rule[rule_code] > max_display_per_rule:
continue
diff_lines.append(
add_permalink_to_diagnostic_line(comparison.repo, line.to_string())
)
displayed_changes_per_rule[rule_code] += 1
displayed_changes += 1
# If we just reached the maximum... display an omission line
if displayed_changes_per_rule[rule_code] > max_display_per_rule:
hidden_count = (
rule_changes.added_violations[rule_code]
+ rule_changes.removed_violations[rule_code]
+ rule_changes.added_fixes[rule_code]
+ rule_changes.removed_fixes[rule_code]
- max_display_per_rule
)
diff_lines.append(
f"... {hidden_count} additional changes omitted for rule {rule_code}"
)
if displayed_changes >= max_display_per_project:
break
if project_changes > max_display_per_project:
hidden_count = project_changes - displayed_changes
diff_lines.append(
f"... {hidden_count} additional changes omitted for project"
)
diff_lines.append("</pre>")
title = (
f"+{project_added_violations} "
f"-{project_removed_violations} violations, "
f"+{project_added_fixes} "
f"-{project_removed_fixes} fixes"
)
lines.extend(
markdown_project_section(
title=title,
content=diff_lines,
options=project.check_options,
project=project,
)
)
for project, error in result.errored:
lines.extend(
markdown_project_section(
title="error",
content=f"```\n{error}```",
options=project.check_options,
project=project,
)
)
# Display a summary table of changed rules
if all_rule_changes:
table_lines = []
table_lines.append(
"| code | total | + violation | - violation | + fix | - fix |"
)
table_lines.append("| ---- | ------- | --------- | -------- | ----- | ---- |")
for rule, total in sorted(
all_rule_changes.total_changes_by_rule(),
key=lambda item: item[1], # Sort by the total changes
reverse=True,
):
added_violations, removed_violations, added_fixes, removed_fixes = (
all_rule_changes.added_violations[rule],
all_rule_changes.removed_violations[rule],
all_rule_changes.added_fixes[rule],
all_rule_changes.removed_fixes[rule],
)
table_lines.append(
f"| {rule} | {total} | {added_violations} | {removed_violations} "
f"| {added_fixes} | {removed_fixes} |"
)
lines.extend(
markdown_details(
summary=f"Changes by rule ({total_affected_rules} rules affected)",
preface="",
content=table_lines,
)
)
return "\n".join(lines)
@dataclass(frozen=True)
class RuleChanges:
"""
The number of additions and removals by rule code.
While the attributes are frozen to avoid accidentally changing the value of an attribute,
the counters themselves are mutable and this class can be mutated with `+` and `update`.
"""
added_violations: Counter = field(default_factory=Counter)
removed_violations: Counter = field(default_factory=Counter)
added_fixes: Counter = field(default_factory=Counter)
removed_fixes: Counter = field(default_factory=Counter)
def rule_codes(self) -> set[str]:
return (
set(self.added_violations.keys())
.union(self.removed_violations.keys())
.union(self.added_fixes.keys())
.union(self.removed_fixes.keys())
)
def __add__(self, other: Self) -> Self:
if not isinstance(other, type(self)):
return NotImplemented
new = type(self)()
new.update(self)
new.update(other)
return new
def update(self, other: Self) -> Self:
self.added_violations.update(other.added_violations)
self.removed_violations.update(other.removed_violations)
self.added_fixes.update(other.added_fixes)
self.removed_fixes.update(other.removed_fixes)
return self
def total_added_violations(self) -> int:
return sum(self.added_violations.values())
def total_removed_violations(self) -> int:
return sum(self.removed_violations.values())
def total_added_fixes(self) -> int:
return sum(self.added_fixes.values())
def total_removed_fixes(self) -> int:
return sum(self.removed_fixes.values())
def total_changes_by_rule(self) -> Iterator[tuple[str, int]]:
"""
Yields the sum of changes for each rule
"""
totals = Counter()
totals.update(self.added_violations)
totals.update(self.removed_violations)
totals.update(self.added_fixes)
totals.update(self.removed_fixes)
yield from totals.items()
@classmethod
def from_diff(cls: type[Self], diff: CheckDiff) -> Self:
"""
Parse a diff from `ruff check` to determine the additions and removals for each rule
"""
rule_changes = cls()
for line in diff.parsed_lines:
if line.is_added:
if line in diff.fix_only_lines:
if line.fix_available:
rule_changes.added_fixes[line.rule_code] += 1
else:
rule_changes.removed_fixes[line.rule_code] += 1
else:
rule_changes.added_violations[line.rule_code] += 1
elif line.is_removed:
if line in diff.fix_only_lines:
if line.fix_available:
rule_changes.removed_fixes[line.rule_code] += 1
else:
rule_changes.added_fixes[line.rule_code] += 1
else:
rule_changes.removed_violations[line.rule_code] += 1
return rule_changes
def __bool__(self):
return bool(
self.added_violations
or self.removed_violations
or self.added_fixes
or self.removed_fixes
)
@dataclass(frozen=True)
class DiagnosticLine:
is_added: bool | None
is_removed: bool | None
fix_available: bool
rule_code: str
location: str
message: str
def to_string(self) -> str:
"""
Construct the line from the components
"""
line = ""
if self.is_added:
line += "+ "
elif self.is_removed:
line += "- "
line += f"{self.location}: {self.rule_code} "
if self.fix_available:
line += "[*] "
line += self.message
return line
def with_fix_available(self) -> DiagnosticLine:
return DiagnosticLine(**{**dataclasses.asdict(self), "fix_available": True})
def without_fix_available(self) -> DiagnosticLine:
return DiagnosticLine(**{**dataclasses.asdict(self), "fix_available": False})
def without_diff(self) -> DiagnosticLine:
return DiagnosticLine(
**{**dataclasses.asdict(self), "is_added": None, "is_removed": None}
)
@classmethod
def try_from_string(cls: type[Self], line: str) -> Self | None:
"""
Parse the rule code from a diagnostic line string
"""
match = CHECK_DIAGNOSTIC_LINE_RE.match(line)
if match is None:
# Handle case where there are no regex match e.g.
# + "?application=AIRFLOW&authenticator=TEST_AUTH&role=TEST_ROLE&warehouse=TEST_WAREHOUSE" # noqa: E501, ERA001
# Which was found in local testing
return None
match_items = match.groupdict()
return cls(
location=match_items["location"],
is_removed=match_items.get("diff") == "-",
is_added=match_items.get("diff") == "+",
fix_available=match_items.get("fixable") is not None,
rule_code=match_items["code"],
message=match_items["message"],
)
class CheckDiff(Diff):
"""
Extends the normal diff with diagnostic parsing
"""
def __init__(
self,
lines: Iterable[str],
parsed_lines: list[DiagnosticLine],
fix_only_lines: set[DiagnosticLine],
) -> None:
self.parsed_lines = parsed_lines
self.fix_only_lines = fix_only_lines
super().__init__(lines)
@classmethod
def from_simple_diff(cls, diff: Diff) -> CheckDiff:
"""
Parse a simple diff to include check-specific analyses.
"""
# Drop unchanged lines
diff = diff.without_unchanged_lines()
# Sort without account for the leading + / -
sorted_lines = list(sorted(diff, key=lambda line: line[2:]))
# Parse the lines, drop lines that cannot be parsed
parsed_lines: list[DiagnosticLine] = list(
filter(
None,
(DiagnosticLine.try_from_string(line) for line in sorted_lines),
)
)
# Calculate which lines only changed fix availability
fix_only: set[DiagnosticLine] = set()
# TODO(zanieb): There has to be a cleaner way to express this logic
# We check if each added line is available in the removed set with fix
# availability toggled and vice-versa
for line in parsed_lines:
other_set = diff.removed if line.is_added else diff.added
toggled = (
line.without_fix_available()
if line.fix_available
else line.with_fix_available()
)
if toggled.without_diff().to_string() in other_set:
fix_only.add(line)
return CheckDiff(
lines=sorted_lines, parsed_lines=parsed_lines, fix_only_lines=fix_only
)
def add_permalink_to_diagnostic_line(repo: ClonedRepository, line: str) -> str:
match = CHECK_DIFF_LINE_RE.match(line)
if match is None:
return line
pre, inner, path, lnum, post = match.groups()
url = repo.url_for(path, int(lnum))
return f"{pre} <a href='{url}'>{inner}</a> {post}"
async def compare_check(
ruff_baseline_executable: Path,
ruff_comparison_executable: Path,
options: CheckOptions,
cloned_repo: ClonedRepository,
) -> Comparison:
async with asyncio.TaskGroup() as tg:
baseline_task = tg.create_task(
ruff_check(
executable=ruff_baseline_executable.resolve(),
path=cloned_repo.path,
name=cloned_repo.fullname,
options=options,
),
)
comparison_task = tg.create_task(
ruff_check(
executable=ruff_comparison_executable.resolve(),
path=cloned_repo.path,
name=cloned_repo.fullname,
options=options,
),
)
baseline_output, comparison_output = (
baseline_task.result(),
comparison_task.result(),
)
diff = Diff.from_pair(baseline_output, comparison_output)
return Comparison(diff=diff, repo=cloned_repo)
async def ruff_check(
*, executable: Path, path: Path, name: str, options: CheckOptions
) -> Sequence[str]:
"""Run the given ruff binary against the specified path."""
logger.debug(f"Checking {name} with {executable}")
ruff_args = options.to_cli_args()
start = time.time()
proc = await create_subprocess_exec(
executable.absolute(),
*ruff_args,
".",
stdout=PIPE,
stderr=PIPE,
cwd=path,
)
result, err = await proc.communicate()
end = time.time()
logger.debug(f"Finished checking {name} with {executable} in {end - start:.2f}s")
if proc.returncode != 0:
raise RuffError(err.decode("utf8"))
# Strip summary lines so the diff is only diagnostic lines
lines = [
line
for line in result.decode("utf8").splitlines()
if not CHECK_SUMMARY_LINE_RE.match(line)
]
return lines
@dataclass(frozen=True)
class CheckOptions(Serializable):
"""
Ruff check options
"""
select: str = ""
ignore: str = ""
exclude: str = ""
# Generating fixes is slow and verbose
show_fixes: bool = False
# Limit the number of reported lines per rule
max_lines_per_rule: int | None = 50
def markdown(self) -> str:
return f"select {self.select} ignore {self.ignore} exclude {self.exclude}"
def to_cli_args(self) -> list[str]:
args = ["check", "--no-cache", "--exit-zero"]
if self.select:
args.extend(["--select", self.select])
if self.ignore:
args.extend(["--ignore", self.ignore])
if self.exclude:
args.extend(["--exclude", self.exclude])
if self.show_fixes:
args.extend(["--show-fixes", "--ecosystem-ci"])
return args

View File

@@ -0,0 +1,166 @@
import argparse
import asyncio
import logging
import os
import shutil
import sys
import sysconfig
import tempfile
from contextlib import nullcontext
from pathlib import Path
from signal import SIGINT, SIGTERM
from ruff_ecosystem import logger
from ruff_ecosystem.defaults import DEFAULT_TARGETS
from ruff_ecosystem.main import OutputFormat, main
from ruff_ecosystem.projects import RuffCommand
def excepthook(type, value, tb):
if hasattr(sys, "ps1") or not sys.stderr.isatty():
# we are in interactive mode or we don't have a tty so call the default
sys.__excepthook__(type, value, tb)
else:
import pdb
import traceback
traceback.print_exception(type, value, tb)
print()
pdb.post_mortem(tb)
def entrypoint():
args = parse_args()
if args.pdb:
sys.excepthook = excepthook
if args.verbose:
logging.basicConfig(level=logging.DEBUG)
else:
logging.basicConfig(level=logging.INFO)
# Use a temporary directory for caching if no cache is specified
cache_context = (
tempfile.TemporaryDirectory() if not args.cache else nullcontext(args.cache)
)
ruff_baseline = args.ruff_baseline
if not args.ruff_baseline.exists():
ruff_baseline = get_executable_path(str(args.ruff_baseline))
if not ruff_baseline:
print(
f"Could not find ruff baseline executable: {args.ruff_baseline}",
sys.stderr,
)
exit(1)
logger.info(
"Resolved baseline executable %s to %s", args.ruff_baseline, ruff_baseline
)
ruff_comparison = args.ruff_comparison
if not args.ruff_comparison.exists():
ruff_comparison = get_executable_path(str(args.ruff_comparison))
if not ruff_comparison:
print(
f"Could not find ruff comparison executable: {args.ruff_comparison}",
sys.stderr,
)
exit(1)
logger.info(
"Resolved comparison executable %s to %s",
args.ruff_comparison,
ruff_comparison,
)
with cache_context as cache:
loop = asyncio.get_event_loop()
main_task = asyncio.ensure_future(
main(
command=RuffCommand(args.ruff_command),
ruff_baseline_executable=ruff_baseline,
ruff_comparison_executable=ruff_comparison,
targets=DEFAULT_TARGETS,
format=OutputFormat(args.output_format),
project_dir=Path(cache),
raise_on_failure=args.pdb,
)
)
# https://stackoverflow.com/a/58840987/3549270
for signal in [SIGINT, SIGTERM]:
loop.add_signal_handler(signal, main_task.cancel)
try:
loop.run_until_complete(main_task)
finally:
loop.close()
def parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(
description="Check two versions of ruff against a corpus of open-source code.",
)
# TODO: Support non-default `--targets`
# parser.add_argument(
# "--targets",
# type=Path,
# help=(
# "Optional JSON files to use over the default repositories. "
# "Supports both github_search_*.jsonl and known-github-tomls.jsonl."
# ),
# )
parser.add_argument(
"--cache",
type=Path,
help="Location for caching cloned repositories",
)
parser.add_argument(
"--output-format",
choices=[option.name for option in OutputFormat],
default="json",
help="Location for caching cloned repositories",
)
parser.add_argument(
"-v",
"--verbose",
action="store_true",
help="Enable debug logging",
)
parser.add_argument(
"--pdb",
action="store_true",
help="Enable debugging on failure",
)
parser.add_argument(
"ruff_command",
choices=[option.name for option in RuffCommand],
help="The Ruff command to test",
)
parser.add_argument(
"ruff_baseline",
type=Path,
)
parser.add_argument(
"ruff_comparison",
type=Path,
)
return parser.parse_args()
def get_executable_path(name: str) -> Path | None:
# Add suffix for Windows executables
name += ".exe" if sys.platform == "win32" and not name.endswith(".exe") else ""
path = os.path.join(sysconfig.get_path("scripts"), name)
# The executable in the current interpreter's scripts directory.
if os.path.exists(path):
return Path(path)
# The executable in the global environment.
environment_path = shutil.which(name)
if environment_path:
return Path(environment_path)
return None

View File

@@ -0,0 +1,73 @@
"""
Default projects for ecosystem checks
"""
from ruff_ecosystem.projects import CheckOptions, FormatOptions, Project, Repository
# TODO(zanieb): Consider exporting this as JSON and loading from there instead
DEFAULT_TARGETS = [
Project(repo=Repository(owner="DisnakeDev", name="disnake", ref="master")),
Project(repo=Repository(owner="PostHog", name="HouseWatch", ref="main")),
Project(repo=Repository(owner="RasaHQ", name="rasa", ref="main")),
Project(repo=Repository(owner="Snowflake-Labs", name="snowcli", ref="main")),
Project(repo=Repository(owner="aiven", name="aiven-client", ref="main")),
Project(repo=Repository(owner="alteryx", name="featuretools", ref="main")),
Project(
repo=Repository(owner="apache", name="airflow", ref="main"),
check_options=CheckOptions(select="ALL"),
),
Project(repo=Repository(owner="aws", name="aws-sam-cli", ref="develop")),
Project(repo=Repository(owner="bloomberg", name="pytest-memray", ref="main")),
Project(
repo=Repository(owner="bokeh", name="bokeh", ref="branch-3.3"),
check_options=CheckOptions(select="ALL"),
),
Project(repo=Repository(owner="commaai", name="openpilot", ref="master")),
Project(
repo=Repository(owner="demisto", name="content", ref="master"),
format_options=FormatOptions(
# Syntax errors in this file
exclude="Packs/ThreatQ/Integrations/ThreatQ/ThreatQ.py"
),
),
Project(repo=Repository(owner="docker", name="docker-py", ref="main")),
Project(repo=Repository(owner="freedomofpress", name="securedrop", ref="develop")),
Project(repo=Repository(owner="fronzbot", name="blinkpy", ref="dev")),
Project(repo=Repository(owner="ibis-project", name="ibis", ref="master")),
Project(repo=Repository(owner="ing-bank", name="probatus", ref="main")),
Project(repo=Repository(owner="jrnl-org", name="jrnl", ref="develop")),
Project(repo=Repository(owner="latchbio", name="latch", ref="main")),
Project(repo=Repository(owner="lnbits", name="lnbits", ref="main")),
Project(repo=Repository(owner="milvus-io", name="pymilvus", ref="master")),
Project(repo=Repository(owner="mlflow", name="mlflow", ref="master")),
Project(repo=Repository(owner="model-bakers", name="model_bakery", ref="main")),
Project(repo=Repository(owner="pandas-dev", name="pandas", ref="main")),
Project(repo=Repository(owner="prefecthq", name="prefect", ref="main")),
Project(repo=Repository(owner="pypa", name="build", ref="main")),
Project(repo=Repository(owner="pypa", name="cibuildwheel", ref="main")),
Project(repo=Repository(owner="pypa", name="pip", ref="main")),
Project(repo=Repository(owner="pypa", name="setuptools", ref="main")),
Project(repo=Repository(owner="python", name="mypy", ref="master")),
Project(
repo=Repository(
owner="python",
name="typeshed",
ref="main",
),
check_options=CheckOptions(select="PYI"),
),
Project(repo=Repository(owner="python-poetry", name="poetry", ref="master")),
Project(repo=Repository(owner="reflex-dev", name="reflex", ref="main")),
Project(repo=Repository(owner="rotki", name="rotki", ref="develop")),
Project(repo=Repository(owner="scikit-build", name="scikit-build", ref="main")),
Project(
repo=Repository(owner="scikit-build", name="scikit-build-core", ref="main")
),
Project(repo=Repository(owner="sphinx-doc", name="sphinx", ref="master")),
Project(repo=Repository(owner="spruceid", name="siwe-py", ref="main")),
Project(repo=Repository(owner="tiangolo", name="fastapi", ref="master")),
Project(repo=Repository(owner="yandex", name="ch-backup", ref="main")),
Project(
repo=Repository(owner="zulip", name="zulip", ref="main"),
check_options=CheckOptions(select="ALL"),
),
]

View File

@@ -0,0 +1,195 @@
"""
Execution, comparison, and summary of `ruff format` ecosystem checks.
"""
from __future__ import annotations
import time
from asyncio import create_subprocess_exec
from dataclasses import dataclass
from pathlib import Path
from subprocess import PIPE
from typing import TYPE_CHECKING, Sequence
from unidiff import PatchSet
from ruff_ecosystem import logger
from ruff_ecosystem.markdown import markdown_project_section
from ruff_ecosystem.types import Comparison, Diff, Result, RuffError
if TYPE_CHECKING:
from ruff_ecosystem.projects import ClonedRepository
def markdown_format_result(result: Result) -> str:
"""
Render a `ruff format` ecosystem check result as markdown.
"""
lines = []
total_lines_removed = total_lines_added = 0
total_files_modified = 0
error_count = len(result.errored)
patch_sets = []
for project, comparison in result.completed:
total_lines_added += comparison.diff.lines_added
total_lines_removed += comparison.diff.lines_removed
patch_set = PatchSet("\n".join(comparison.diff.lines))
patch_sets.append(patch_set)
total_files_modified += len(patch_set.modified_files)
if total_lines_removed == 0 and total_lines_added == 0 and error_count == 0:
return "\u2705 ecosystem check detected no format changes."
# Summarize the total changes
if total_lines_added == 0 and total_lines_added == 0:
# Only errors
s = "s" if error_count != 1 else ""
lines.append(
f"\u2139\ufe0f ecosystem check **encountered format errors**. (no format changes; {error_count} project error{s})"
)
else:
s = "s" if total_files_modified != 1 else ""
changes = f"+{total_lines_added} -{total_lines_removed} lines in {total_files_modified} file{s} in {len(result.completed)} projects"
if error_count:
s = "s" if error_count != 1 else ""
changes += f"; {error_count} project error{s}"
lines.append(
f"\u2139\ufe0f ecosystem check **detected format changes**. ({changes})"
)
lines.append("")
# Then per-project changes
for (project, comparison), patch_set in zip(result.completed, patch_sets):
if not comparison.diff:
continue # Skip empty diffs
files = len(patch_set.modified_files)
s = "s" if files != 1 else ""
title = f"+{comparison.diff.lines_added} -{comparison.diff.lines_removed} lines across {files} file{s}"
lines.extend(
markdown_project_section(
title=title,
content=format_patchset(patch_set, comparison.repo),
options=project.format_options,
project=project,
)
)
for project, error in result.errored:
lines.extend(
markdown_project_section(
title="error",
content=f"```\n{error}```",
options=project.format_options,
project=project,
)
)
return "\n".join(lines)
def format_patchset(patch_set: PatchSet, repo: ClonedRepository) -> str:
"""
Convert a patchset to markdown, adding permalinks to the start of each hunk.
"""
lines = []
for file_patch in patch_set:
for hunk in file_patch:
# Note: When used for `format` checks, the line number is not exact because
# we formatted the repository for a baseline; we can't know the exact
# line number in the original
# source file.
hunk_link = repo.url_for(file_patch.path, hunk.source_start)
hunk_lines = str(hunk).splitlines()
# Add a link before the hunk
link_title = file_patch.path + "~L" + str(hunk.source_start)
lines.append(f"<a href='{hunk_link}'>{link_title}</a>")
# Wrap the contents of the hunk in a diff code block
lines.append("```diff")
lines.extend(hunk_lines[1:])
lines.append("```")
return "\n".join(lines)
async def compare_format(
ruff_baseline_executable: Path,
ruff_comparison_executable: Path,
options: FormatOptions,
cloned_repo: ClonedRepository,
):
# Run format without diff to get the baseline
await ruff_format(
executable=ruff_baseline_executable.resolve(),
path=cloned_repo.path,
name=cloned_repo.fullname,
options=options,
)
# Then get the diff from stdout
diff = await ruff_format(
executable=ruff_comparison_executable.resolve(),
path=cloned_repo.path,
name=cloned_repo.fullname,
options=options,
diff=True,
)
return Comparison(diff=Diff(diff), repo=cloned_repo)
async def ruff_format(
*,
executable: Path,
path: Path,
name: str,
options: FormatOptions,
diff: bool = False,
) -> Sequence[str]:
"""Run the given ruff binary against the specified path."""
logger.debug(f"Formatting {name} with {executable}")
ruff_args = options.to_cli_args()
if diff:
ruff_args.append("--diff")
start = time.time()
proc = await create_subprocess_exec(
executable.absolute(),
*ruff_args,
".",
stdout=PIPE,
stderr=PIPE,
cwd=path,
)
result, err = await proc.communicate()
end = time.time()
logger.debug(f"Finished formatting {name} with {executable} in {end - start:.2f}s")
if proc.returncode not in [0, 1]:
raise RuffError(err.decode("utf8"))
lines = result.decode("utf8").splitlines()
return lines
@dataclass(frozen=True)
class FormatOptions:
"""
Ruff format options.
"""
exclude: str = ""
def to_cli_args(self) -> list[str]:
args = ["format"]
if self.exclude:
args.extend(["--exclude", self.exclude])
return args

View File

@@ -0,0 +1,144 @@
import asyncio
import dataclasses
import json
from enum import Enum
from pathlib import Path
from typing import Awaitable, TypeVar
from ruff_ecosystem import logger
from ruff_ecosystem.check import compare_check, markdown_check_result
from ruff_ecosystem.format import compare_format, markdown_format_result
from ruff_ecosystem.projects import (
Project,
RuffCommand,
)
from ruff_ecosystem.types import Comparison, Result, Serializable
T = TypeVar("T")
GITHUB_MAX_COMMENT_LENGTH = 65536
class OutputFormat(Enum):
markdown = "markdown"
json = "json"
async def main(
command: RuffCommand,
ruff_baseline_executable: Path,
ruff_comparison_executable: Path,
targets: list[Project],
project_dir: Path,
format: OutputFormat,
max_parallelism: int = 50,
raise_on_failure: bool = False,
) -> None:
logger.debug("Using command %s", command.value)
logger.debug("Using baseline executable at %s", ruff_baseline_executable)
logger.debug("Using comparison executable at %s", ruff_comparison_executable)
logger.debug("Using checkout_dir directory %s", project_dir)
logger.debug("Checking %s targets", len(targets))
# Limit parallelism to avoid high memory consumption
semaphore = asyncio.Semaphore(max_parallelism)
async def limited_parallelism(coroutine: Awaitable[T]) -> T:
async with semaphore:
return await coroutine
comparisons: list[Exception | Comparison] = await asyncio.gather(
*[
limited_parallelism(
clone_and_compare(
command,
ruff_baseline_executable,
ruff_comparison_executable,
target,
project_dir,
)
)
for target in targets
],
return_exceptions=not raise_on_failure,
)
comparisons_by_target = dict(zip(targets, comparisons, strict=True))
# Split comparisons into errored / completed
errored, completed = [], []
for target, comparison in comparisons_by_target.items():
if isinstance(comparison, Exception):
errored.append((target, comparison))
else:
completed.append((target, comparison))
result = Result(completed=completed, errored=errored)
match format:
case OutputFormat.json:
print(json.dumps(result, indent=4, cls=JSONEncoder))
case OutputFormat.markdown:
match command:
case RuffCommand.check:
print(markdown_check_result(result))
case RuffCommand.format:
print(markdown_format_result(result))
case _:
raise ValueError(f"Unknown target Ruff command {command}")
case _:
raise ValueError(f"Unknown output format {format}")
return None
async def clone_and_compare(
command: RuffCommand,
ruff_baseline_executable: Path,
ruff_comparison_executable: Path,
target: Project,
project_dir: Path,
) -> Comparison:
"""Check a specific repository against two versions of ruff."""
assert ":" not in target.repo.owner
assert ":" not in target.repo.name
match command:
case RuffCommand.check:
compare, options = (
compare_check,
target.check_options,
)
case RuffCommand.format:
compare, options = (
compare_format,
target.format_options,
)
case _:
raise ValueError(f"Unknown target Ruff command {command}")
checkout_dir = project_dir.joinpath(f"{target.repo.owner}:{target.repo.name}")
cloned_repo = await target.repo.clone(checkout_dir)
try:
return await compare(
ruff_baseline_executable,
ruff_comparison_executable,
options,
cloned_repo,
)
except ExceptionGroup as e:
raise e.exceptions[0] from e
class JSONEncoder(json.JSONEncoder):
def default(self, o):
if isinstance(o, Serializable):
return o.jsonable()
if dataclasses.is_dataclass(o):
return dataclasses.asdict(o)
if isinstance(o, set):
return tuple(o)
if isinstance(o, Path):
return str(o)
if isinstance(o, Exception):
return str(o)
return super().default(o)

View File

@@ -0,0 +1,44 @@
from __future__ import annotations
from typing import TYPE_CHECKING
if TYPE_CHECKING:
from ruff_ecosystem.projects import Project
def markdown_project_section(
title: str, content: str | list[str], options: object, project: Project
) -> list[str]:
return markdown_details(
summary=f'<a href="{project.repo.url}">{project.repo.fullname}</a> ({title})',
# Show the command used for the check
preface="<pre>ruff " + " ".join(options.to_cli_args()) + "</pre>",
content=content,
)
def markdown_plus_minus(added: int, removed: int) -> str:
# TODO(zanieb): GitHub does not support coloring with <span> it seems like the only
# way is to use LateX `${\text{\color{green}+10 \color{red}-10}}$` but
# it renders so ugly it's not worth doing yet
return f"+{added} -{removed}"
def markdown_details(summary: str, preface: str, content: str | list[str]):
lines = []
lines.append(f"<details><summary>{summary}</summary>")
lines.append("<p>")
lines.append(preface)
lines.append("</p>")
lines.append("<p>")
lines.append("")
if isinstance(content, str):
lines.append(content)
else:
lines.extend(content)
lines.append("")
lines.append("</p>")
lines.append("</details>")
return lines

View File

@@ -0,0 +1,168 @@
"""
Abstractions and utilities for working with projects to run ecosystem checks on.
"""
from __future__ import annotations
from asyncio import create_subprocess_exec
from dataclasses import dataclass, field
from enum import Enum
from pathlib import Path
from subprocess import PIPE
from typing import Self
from ruff_ecosystem import logger
from ruff_ecosystem.check import CheckOptions
from ruff_ecosystem.format import FormatOptions
from ruff_ecosystem.types import Serializable
@dataclass(frozen=True)
class Project(Serializable):
"""
An ecosystem target
"""
repo: Repository
check_options: CheckOptions = field(default_factory=lambda: CheckOptions())
format_options: FormatOptions = field(default_factory=lambda: FormatOptions())
class RuffCommand(Enum):
check = "check"
format = "format"
class ProjectSetupError(Exception):
"""An error setting up a project."""
@dataclass(frozen=True)
class Repository(Serializable):
"""
A remote GitHub repository.
"""
owner: str
name: str
ref: str | None
@property
def fullname(self) -> str:
return f"{self.owner}/{self.name}"
@property
def url(self: Self) -> str:
return f"https://github.com/{self.owner}/{self.name}"
async def clone(self: Self, checkout_dir: Path) -> ClonedRepository:
"""
Shallow clone this repository
"""
if checkout_dir.exists():
logger.debug(f"Reusing {self.owner}:{self.name}")
if self.ref:
logger.debug(f"Checking out ref {self.ref}")
process = await create_subprocess_exec(
*["git", "checkout", "-f", self.ref],
cwd=checkout_dir,
env={"GIT_TERMINAL_PROMPT": "0"},
stdout=PIPE,
stderr=PIPE,
)
if await process.wait() != 0:
_, stderr = await process.communicate()
raise ProjectSetupError(
f"Failed to checkout {self.ref}: {stderr.decode()}"
)
return await ClonedRepository.from_path(checkout_dir, self)
logger.debug(f"Cloning {self.owner}:{self.name} to {checkout_dir}")
command = [
"git",
"clone",
"--config",
"advice.detachedHead=false",
"--quiet",
"--depth",
"1",
"--no-tags",
]
if self.ref:
command.extend(["--branch", self.ref])
command.extend(
[
f"https://github.com/{self.owner}/{self.name}",
str(checkout_dir),
],
)
process = await create_subprocess_exec(
*command, env={"GIT_TERMINAL_PROMPT": "0"}
)
status_code = await process.wait()
logger.debug(
f"Finished cloning {self.fullname} with status {status_code}",
)
return await ClonedRepository.from_path(checkout_dir, self)
@dataclass(frozen=True)
class ClonedRepository(Repository, Serializable):
"""
A cloned GitHub repository, which includes the hash of the current commit.
"""
commit_hash: str
path: Path
def url_for(
self: Self,
path: str,
line_number: int | None = None,
end_line_number: int | None = None,
) -> str:
"""
Return the remote GitHub URL for the given path in this repository.
"""
url = f"https://github.com/{self.owner}/{self.name}/blob/{self.commit_hash}/{path}"
if line_number:
url += f"#L{line_number}"
if end_line_number:
url += f"-L{end_line_number}"
return url
@property
def url(self: Self) -> str:
return f"https://github.com/{self.owner}/{self.name}@{self.commit_hash}"
@classmethod
async def from_path(cls, path: Path, repo: Repository):
return cls(
name=repo.name,
owner=repo.owner,
ref=repo.ref,
path=path,
commit_hash=await cls._get_head_commit(path),
)
@staticmethod
async def _get_head_commit(checkout_dir: Path) -> str:
"""
Return the commit sha for the repository in the checkout directory.
"""
process = await create_subprocess_exec(
*["git", "rev-parse", "HEAD"],
cwd=checkout_dir,
stdout=PIPE,
)
stdout, _ = await process.communicate()
if await process.wait() != 0:
raise ProjectSetupError(f"Failed to retrieve commit sha at {checkout_dir}")
return stdout.decode().strip()

View File

@@ -0,0 +1,93 @@
from __future__ import annotations
import abc
import dataclasses
import difflib
from dataclasses import dataclass, is_dataclass
from typing import TYPE_CHECKING, Any, Generator, Iterable, Sequence
if TYPE_CHECKING:
from ruff_ecosystem.projects import ClonedRepository, Project
class Serializable(abc.ABC):
"""
Allows serialization of content by casting to a JSON-compatible type.
"""
def jsonable(self) -> Any:
# Default implementation for dataclasses
if is_dataclass(self) and not isinstance(self, type):
return dataclasses.asdict(self)
raise NotImplementedError()
class Diff(Serializable):
def __init__(self, lines: Iterable[str], leading_spaces: int = 0) -> None:
self.lines = list(lines)
# Compute added and removed lines once
self.added = list(
line[2:]
for line in self.lines
if line.startswith("+" + " " * leading_spaces)
)
self.removed = list(
line[2:]
for line in self.lines
if line.startswith("-" + " " * leading_spaces)
)
def __bool__(self) -> bool:
return bool(self.added or self.removed)
def __iter__(self) -> Generator[str, None, None]:
yield from self.lines
@property
def lines_added(self):
return len(self.added)
@property
def lines_removed(self):
return len(self.removed)
@classmethod
def from_pair(cls, baseline: Sequence[str], comparison: Sequence[str]):
"""
Construct a diff from before and after.
"""
return cls(difflib.ndiff(baseline, comparison), leading_spaces=1)
def without_unchanged_lines(self) -> Diff:
return Diff(
line for line in self.lines if line.startswith("+") or line.startswith("-")
)
def jsonable(self) -> Any:
return self.lines
@dataclass(frozen=True)
class Result(Serializable):
"""
The result of an ecosystem check for a collection of projects.
"""
errored: list[tuple[Project, Exception]]
completed: list[tuple[Project, Comparison]]
@dataclass(frozen=True)
class Comparison(Serializable):
"""
The result of a completed ecosystem comparison for a single project.
"""
diff: Diff
repo: ClonedRepository
class RuffError(Exception):
"""An error reported by Ruff."""

View File

@@ -1,5 +1,9 @@
#!/usr/bin/env python3
"""Check two versions of ruff against a corpus of open-source code.
"""
**DEPRECATED** This script is being replaced by the ruff-ecosystem package.
Check two versions of ruff against a corpus of open-source code.
Example usage: