Files
ruff/crates/ruff_macros/src/rule_code_prefix.rs
Brent Westbrook 155fd603e8 Document when a rule was added (#21035)
Summary
--

Inspired by #20859, this PR adds the version a rule was added, and the
file and line where it was defined, to `ViolationMetadata`. The file and
line just use the standard `file!` and `line!` macros, while the more
interesting version field uses a new `violation_metadata` attribute
parsed by our `ViolationMetadata` derive macro.

I moved the commit modifying all of the rule files to the end, so it
should be a lot easier to review by omitting that one.

As a curiosity and a bit of a sanity check, I also plotted the rule
numbers over time:

<img width="640" height="480" alt="image"
src="https://github.com/user-attachments/assets/75b0b5cc-3521-4d40-a395-8807e6f4925f"
/>

I think this looks pretty reasonable and avoids some of the artifacts
the earlier versions of the script ran into, such as the `rule`
sub-command not being available or `--explain` requiring a file
argument.

<details><summary>Script and summary data</summary>

```shell
gawk --csv '
NR > 1 {
    split($2, a, ".")
    major = a[1]; minor = a[2]; micro = a[3]
    # sum the number of rules added per minor version
    versions[minor] += 1
}
END {
    tot = 0
    for (i = 0; i <= 14; i++) {
        tot += versions[i]
        print i, tot
    }
}
' ruff_rules_metadata.csv > summary.dat
```

```
0 696
1 768
2 778
3 803
4 822
5 848
6 855
7 865
8 893
9 915
10 916
11 924
12 929
13 932
14 933
```

</details>

Test Plan
--

I built and viewed the documentation locally, and it looks pretty good!

<img width="1466" height="676" alt="image"
src="https://github.com/user-attachments/assets/5e227df4-7294-4d12-bdaa-31cac4e9ad5c"
/>

The spacing seems a bit awkward following the `h1` at the top, so I'm
wondering if this might look nicer as a footer in Ruff. The links work
well too:
- [v0.0.271](https://github.com/astral-sh/ruff/releases/tag/v0.0.271)
- [Related
issues](https://github.com/astral-sh/ruff/issues?q=sort%3Aupdated-desc%20is%3Aissue%20is%3Aopen%20airflow-variable-name-task-id-mismatch)
- [View
source](https://github.com/astral-sh/ruff/blob/main/crates%2Fruff_linter%2Fsrc%2Frules%2Fairflow%2Frules%2Ftask_variable_name.rs#L34)

The last one even works on `main` now since it points to the
`derive(ViolationMetadata)` line.

In terms of binary size, this branch is a bit bigger than main with
38,654,520 bytes compared to 38,635,728 (+20 KB). I guess that's not
_too_ much of an increase, but I wanted to check since we're generating
a lot more code with macros.

---------

Co-authored-by: GiGaGon <107241144+MeGaGiGaGon@users.noreply.github.com>
2025-10-23 14:48:41 -04:00

130 lines
3.8 KiB
Rust

use std::collections::{BTreeMap, BTreeSet};
use proc_macro2::Span;
use quote::quote;
use syn::{Attribute, Ident};
pub(crate) fn expand<'a>(
prefix_ident: &Ident,
variants: impl Iterator<Item = (&'a str, &'a Vec<Attribute>)>,
) -> proc_macro2::TokenStream {
// Build up a map from prefix to matching RuleCodes.
let mut prefix_to_codes: BTreeMap<String, BTreeSet<String>> = BTreeMap::default();
let mut code_to_attributes: BTreeMap<String, &[Attribute]> = BTreeMap::default();
for (variant, .., attr) in variants {
let code_str = variant.to_string();
for i in 1..=code_str.len() {
let prefix = code_str[..i].to_string();
prefix_to_codes
.entry(prefix)
.or_default()
.insert(code_str.clone());
}
code_to_attributes.insert(code_str, attr);
}
let variant_strs: Vec<_> = prefix_to_codes.keys().collect();
let variant_idents: Vec<_> = prefix_to_codes
.keys()
.map(|prefix| {
let ident = get_prefix_ident(prefix);
quote! {
#ident
}
})
.collect();
let attributes: Vec<_> = prefix_to_codes
.values()
.map(|codes| attributes_for_prefix(codes, &code_to_attributes))
.collect();
quote! {
#[derive(
::strum_macros::EnumIter,
Debug,
PartialEq,
Eq,
PartialOrd,
Ord,
Clone,
Hash,
)]
pub enum #prefix_ident {
#(#attributes #variant_idents,)*
}
impl std::str::FromStr for #prefix_ident {
type Err = crate::registry::FromCodeError;
fn from_str(code: &str) -> Result<Self, Self::Err> {
match code {
#(#attributes #variant_strs => Ok(Self::#variant_idents),)*
_ => Err(crate::registry::FromCodeError::Unknown)
}
}
}
impl From<&#prefix_ident> for &'static str {
fn from(code: &#prefix_ident) -> Self {
match code {
#(#attributes #prefix_ident::#variant_idents => #variant_strs,)*
}
}
}
impl AsRef<str> for #prefix_ident {
fn as_ref(&self) -> &str {
match self {
#(#attributes Self::#variant_idents => #variant_strs,)*
}
}
}
}
}
fn attributes_for_prefix(
codes: &BTreeSet<String>,
attributes: &BTreeMap<String, &[Attribute]>,
) -> proc_macro2::TokenStream {
let attrs = intersection_all(codes.iter().map(|code| attributes[code]));
if attrs.is_empty() {
quote!()
} else {
quote!(#(#attrs)*)
}
}
/// Collect all the items from an iterable of slices that are present in all slices.
pub(crate) fn intersection_all<'a, T: PartialEq>(
mut slices: impl Iterator<Item = &'a [T]>,
) -> Vec<&'a T> {
if let Some(slice) = slices.next() {
// Collect all the items in the first slice
let mut intersection = Vec::with_capacity(slice.len());
for item in slice {
intersection.push(item);
}
// Then only keep items that are present in each of the remaining slices
for slice in slices {
intersection.retain(|item| slice.contains(item));
}
intersection
} else {
Vec::new()
}
}
/// Returns an identifier for the given prefix.
pub(crate) fn get_prefix_ident(prefix: &str) -> Ident {
let prefix = if prefix.as_bytes()[0].is_ascii_digit() {
// Identifiers in Rust may not start with a number.
format!("_{prefix}")
} else {
prefix.to_string()
};
Ident::new(&prefix, Span::call_site())
}