Files
ruff/crates/ruff_macros/src/map_codes.rs
konstin 763d38cafb Refactor top llvm-lines entry (#5147)
## Summary

This refactors the top entry in terms of llvm lines,
`RuleCodePrefix::iter()`. It's only used for generating the schema and
the clap completion so no effect on performance.

I've confirmed with
```
CARGO_TARGET_DIR=target-llvm-lines RUSTFLAGS="-Csymbol-mangling-version=v0" cargo llvm-lines -p ruff --lib | head -n 20
```
that this indeed remove the method from the list of heaviest symbols in
terms of llvm-lines

Before:
```
  Lines                  Copies               Function name
  -----                  ------               -------------
  1768469                40538                (TOTAL)
    10391 (0.6%,  0.6%)      1 (0.0%,  0.0%)  <ruff[fa0f2e8ef07114da]::codes::RuleCodePrefix>::iter
     8250 (0.5%,  1.1%)      1 (0.0%,  0.0%)  <ruff[fa0f2e8ef07114da]::codes::Rule>::noqa_code
     7427 (0.4%,  1.5%)      1 (0.0%,  0.0%)  <ruff[fa0f2e8ef07114da]::checkers::ast::Checker as ruff_python_ast[c4c9eadfa5741dd4]::visitor::Visitor>::visit_stmt
     6536 (0.4%,  1.8%)      1 (0.0%,  0.0%)  <<ruff[fa0f2e8ef07114da]::settings::options::Options as serde[1a28808d63625aed]::de::Deserialize>::deserialize::__Visitor as serde[1a28808d63625aed]::de::Visitor>::visit_map::<toml_edit[de4ca26332d39787]::de::spanned::SpannedDeserializer<toml_edit[de4ca26332d39787]::de::value::ValueDeserializer>>
     6536 (0.4%,  2.2%)      1 (0.0%,  0.0%)  <<ruff[fa0f2e8ef07114da]::settings::options::Options as serde[1a28808d63625aed]::de::Deserialize>::deserialize::__Visitor as serde[1a28808d63625aed]::de::Visitor>::visit_map::<toml_edit[de4ca26332d39787]::de::table::TableMapAccess>
     6533 (0.4%,  2.6%)      1 (0.0%,  0.0%)  <<ruff[fa0f2e8ef07114da]::settings::options::Options as serde[1a28808d63625aed]::de::Deserialize>::deserialize::__Visitor as serde[1a28808d63625aed]::de::Visitor>::visit_map::<toml_edit[de4ca26332d39787]::de::datetime::DatetimeDeserializer>
     5727 (0.3%,  2.9%)      1 (0.0%,  0.0%)  <ruff[fa0f2e8ef07114da]::checkers::ast::Checker as ruff_python_ast[c4c9eadfa5741dd4]::visitor::Visitor>::visit_expr
     4453 (0.3%,  3.2%)      1 (0.0%,  0.0%)  ruff[fa0f2e8ef07114da]::flake8_to_ruff::converter::convert
     3790 (0.2%,  3.4%)      1 (0.0%,  0.0%)  <&ruff[fa0f2e8ef07114da]::registry::Linter as core[da82827a87f140f9]::iter::traits::collect::IntoIterator>::into_iter
     3416 (0.2%,  3.6%)      1 (0.0%,  0.0%)  <ruff[fa0f2e8ef07114da]::registry::Linter>::code_for_rule
     3187 (0.2%,  3.7%)      1 (0.0%,  0.0%)  <ruff[fa0f2e8ef07114da]::codes::Rule as core[da82827a87f140f9]::fmt::Debug>::fmt
     3185 (0.2%,  3.9%)      1 (0.0%,  0.0%)  <&str as core[da82827a87f140f9]::convert::From<&ruff[fa0f2e8ef07114da]::codes::Rule>>::from
     3185 (0.2%,  4.1%)      1 (0.0%,  0.0%)  <&str as core[da82827a87f140f9]::convert::From<ruff[fa0f2e8ef07114da]::codes::Rule>>::from
     3185 (0.2%,  4.3%)      1 (0.0%,  0.0%)  <ruff[fa0f2e8ef07114da]::codes::Rule as core[da82827a87f140f9]::convert::AsRef<str>>::as_ref
     3183 (0.2%,  4.5%)      1 (0.0%,  0.0%)  <ruff[fa0f2e8ef07114da]::codes::RuleIter>::get
     2718 (0.2%,  4.6%)      1 (0.0%,  0.0%)  <<ruff[fa0f2e8ef07114da]::settings::options::Options as serde[1a28808d63625aed]::de::Deserialize>::deserialize::__Visitor as serde[1a28808d63625aed]::de::Visitor>::visit_seq::<toml_edit[de4ca26332d39787]::de::array::ArraySeqAccess>
     2706 (0.2%,  4.8%)      1 (0.0%,  0.0%)  <&ruff[fa0f2e8ef07114da]::codes::Pylint as core[da82827a87f140f9]::iter::traits::collect::IntoIterator>::into_iter
```
After:
```
  Lines                  Copies               Function name
  -----                  ------               -------------
  1763380                40806                (TOTAL)
     8250 (0.5%,  0.5%)      1 (0.0%,  0.0%)  <ruff[fa0f2e8ef07114da]::codes::Rule>::noqa_code
     7427 (0.4%,  0.9%)      1 (0.0%,  0.0%)  <ruff[fa0f2e8ef07114da]::checkers::ast::Checker as ruff_python_ast[c4c9eadfa5741dd4]::visitor::Visitor>::visit_stmt
     6536 (0.4%,  1.3%)      1 (0.0%,  0.0%)  <<ruff[fa0f2e8ef07114da]::settings::options::Options as serde[1a28808d63625aed]::de::Deserialize>::deserialize::__Visitor as serde[1a28808d63625aed]::de::Visitor>::visit_map::<toml_edit[de4ca26332d39787]::de::spanned::SpannedDeserializer<toml_edit[de4ca26332d39787]::de::value::ValueDeserializer>>
     6536 (0.4%,  1.6%)      1 (0.0%,  0.0%)  <<ruff[fa0f2e8ef07114da]::settings::options::Options as serde[1a28808d63625aed]::de::Deserialize>::deserialize::__Visitor as serde[1a28808d63625aed]::de::Visitor>::visit_map::<toml_edit[de4ca26332d39787]::de::table::TableMapAccess>
     6533 (0.4%,  2.0%)      1 (0.0%,  0.0%)  <<ruff[fa0f2e8ef07114da]::settings::options::Options as serde[1a28808d63625aed]::de::Deserialize>::deserialize::__Visitor as serde[1a28808d63625aed]::de::Visitor>::visit_map::<toml_edit[de4ca26332d39787]::de::datetime::DatetimeDeserializer>
     5727 (0.3%,  2.3%)      1 (0.0%,  0.0%)  <ruff[fa0f2e8ef07114da]::checkers::ast::Checker as ruff_python_ast[c4c9eadfa5741dd4]::visitor::Visitor>::visit_expr
     4453 (0.3%,  2.6%)      1 (0.0%,  0.0%)  ruff[fa0f2e8ef07114da]::flake8_to_ruff::converter::convert
     3790 (0.2%,  2.8%)      1 (0.0%,  0.0%)  <&ruff[fa0f2e8ef07114da]::registry::Linter as core[da82827a87f140f9]::iter::traits::collect::IntoIterator>::into_iter
     3416 (0.2%,  3.0%)      1 (0.0%,  0.0%)  <ruff[fa0f2e8ef07114da]::registry::Linter>::code_for_rule
     3187 (0.2%,  3.2%)      1 (0.0%,  0.0%)  <ruff[fa0f2e8ef07114da]::codes::Rule as core[da82827a87f140f9]::fmt::Debug>::fmt
     3185 (0.2%,  3.3%)      1 (0.0%,  0.0%)  <&str as core[da82827a87f140f9]::convert::From<&ruff[fa0f2e8ef07114da]::codes::Rule>>::from
     3185 (0.2%,  3.5%)      1 (0.0%,  0.0%)  <&str as core[da82827a87f140f9]::convert::From<ruff[fa0f2e8ef07114da]::codes::Rule>>::from
     3185 (0.2%,  3.7%)      1 (0.0%,  0.0%)  <ruff[fa0f2e8ef07114da]::codes::Rule as core[da82827a87f140f9]::convert::AsRef<str>>::as_ref
     3183 (0.2%,  3.9%)      1 (0.0%,  0.0%)  <ruff[fa0f2e8ef07114da]::codes::RuleIter>::get
     2718 (0.2%,  4.0%)      1 (0.0%,  0.0%)  <<ruff[fa0f2e8ef07114da]::settings::options::Options as serde[1a28808d63625aed]::de::Deserialize>::deserialize::__Visitor as serde[1a28808d63625aed]::de::Visitor>::visit_seq::<toml_edit[de4ca26332d39787]::de::array::ArraySeqAccess>
     2706 (0.2%,  4.2%)      1 (0.0%,  0.0%)  <&ruff[fa0f2e8ef07114da]::codes::Pylint as core[da82827a87f140f9]::iter::traits::collect::IntoIterator>::into_iter
     2573 (0.1%,  4.3%)      1 (0.0%,  0.0%)  <<ruff[fa0f2e8ef07114da]::rules::isort::settings::Options as serde[1a28808d63625aed]::de::Deserialize>::deserialize::__Visitor as serde[1a28808d63625aed]::de::Visitor>::visit_map::<toml_edit[de4ca26332d39787]::de::spanned::SpannedDeserializer<toml_edit[de4ca26332d39787]::de::value::ValueDeserializer>>
```
I didn't measure the effect on binary size this time.

## Testing

`cargo test` which uses this to generate the schema didn't change
2023-06-18 12:39:06 +02:00

469 lines
16 KiB
Rust

use std::collections::{BTreeMap, HashMap};
use itertools::Itertools;
use proc_macro2::TokenStream;
use quote::{quote, ToTokens};
use syn::{
parenthesized, parse::Parse, spanned::Spanned, Attribute, Error, Expr, ExprCall, ExprMatch,
Ident, ItemFn, LitStr, Pat, Path, Stmt, Token,
};
use crate::rule_code_prefix::{get_prefix_ident, if_all_same, is_nursery};
/// A rule entry in the big match statement such a
/// `(Pycodestyle, "E112") => (RuleGroup::Nursery, rules::pycodestyle::rules::logical_lines::NoIndentedBlock),`
#[derive(Clone)]
struct Rule {
/// The actual name of the rule, e.g., `NoIndentedBlock`.
name: Ident,
/// The linter associated with the rule, e.g., `Pycodestyle`.
linter: Ident,
/// The code associated with the rule, e.g., `"E112"`.
code: LitStr,
/// The rule group identifier, e.g., `RuleGroup::Nursery`.
group: Path,
/// The path to the struct implementing the rule, e.g.
/// `rules::pycodestyle::rules::logical_lines::NoIndentedBlock`
path: Path,
/// The rule attributes, e.g. for feature gates
attrs: Vec<Attribute>,
}
pub(crate) fn map_codes(func: &ItemFn) -> syn::Result<TokenStream> {
let Some(last_stmt) = func.block.stmts.last() else {
return Err(Error::new(func.block.span(), "expected body to end in an expression"));
};
let Stmt::Expr(Expr::Call(ExprCall { args: some_args, .. }), _) = last_stmt else {
return Err(Error::new(last_stmt.span(), "expected last expression to be `Some(match (..) { .. })`"));
};
let mut some_args = some_args.into_iter();
let (Some(Expr::Match(ExprMatch { arms, .. })), None) = (some_args.next(), some_args.next()) else {
return Err(Error::new(last_stmt.span(), "expected last expression to be `Some(match (..) { .. })`"));
};
// Map from: linter (e.g., `Flake8Bugbear`) to rule code (e.g.,`"002"`) to rule data (e.g.,
// `(Rule::UnaryPrefixIncrement, RuleGroup::Unspecified, vec![])`).
let mut linter_to_rules: BTreeMap<Ident, BTreeMap<String, Rule>> = BTreeMap::new();
for arm in arms {
if matches!(arm.pat, Pat::Wild(..)) {
break;
}
let rule = syn::parse::<Rule>(arm.into_token_stream().into())?;
linter_to_rules
.entry(rule.linter.clone())
.or_default()
.insert(rule.code.value(), rule);
}
let linter_idents: Vec<_> = linter_to_rules.keys().collect();
let all_rules = linter_to_rules.values().flat_map(BTreeMap::values);
let mut output = register_rules(all_rules);
output.extend(quote! {
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub enum RuleCodePrefix {
#(#linter_idents(#linter_idents),)*
}
impl RuleCodePrefix {
pub fn linter(&self) -> &'static Linter {
match self {
#(Self::#linter_idents(..) => &Linter::#linter_idents,)*
}
}
pub fn short_code(&self) -> &'static str {
match self {
#(Self::#linter_idents(code) => code.into(),)*
}
}
}
});
for (linter, rules) in &linter_to_rules {
output.extend(super::rule_code_prefix::expand(
linter,
rules
.iter()
.map(|(code, Rule { group, attrs, .. })| (code.as_str(), group, attrs)),
));
output.extend(quote! {
impl From<#linter> for RuleCodePrefix {
fn from(linter: #linter) -> Self {
Self::#linter(linter)
}
}
impl From<#linter> for crate::rule_selector::RuleSelector {
fn from(linter: #linter) -> Self {
Self::Prefix{prefix: RuleCodePrefix::#linter(linter), redirected_from: None}
}
}
});
}
let mut all_codes = Vec::new();
for (linter, rules) in &linter_to_rules {
let rules_by_prefix = rules_by_prefix(rules);
for (prefix, rules) in &rules_by_prefix {
let prefix_ident = get_prefix_ident(prefix);
let attr = match if_all_same(rules.iter().map(|(.., attrs)| attrs)) {
Some(attr) => quote!(#(#attr)*),
None => quote!(),
};
all_codes.push(quote! {
#attr Self::#linter(#linter::#prefix_ident)
});
}
let mut prefix_into_iter_match_arms = quote!();
for (prefix, rules) in rules_by_prefix {
let rule_paths = rules.iter().map(|(path, .., attrs)| {
let rule_name = path.segments.last().unwrap();
quote!(#(#attrs)* Rule::#rule_name)
});
let prefix_ident = get_prefix_ident(&prefix);
let attr = match if_all_same(rules.iter().map(|(.., attrs)| attrs)) {
Some(attr) => quote!(#(#attr)*),
None => quote!(),
};
prefix_into_iter_match_arms.extend(quote! {
#attr #linter::#prefix_ident => vec![#(#rule_paths,)*].into_iter(),
});
}
output.extend(quote! {
impl IntoIterator for &#linter {
type Item = Rule;
type IntoIter = ::std::vec::IntoIter<Self::Item>;
fn into_iter(self) -> Self::IntoIter {
match self { #prefix_into_iter_match_arms }
}
}
});
}
output.extend(quote! {
impl IntoIterator for &RuleCodePrefix {
type Item = Rule;
type IntoIter = ::std::vec::IntoIter<Self::Item>;
fn into_iter(self) -> Self::IntoIter {
match self {
#(RuleCodePrefix::#linter_idents(prefix) => prefix.into_iter(),)*
}
}
}
});
output.extend(quote! {
impl RuleCodePrefix {
pub fn parse(linter: &Linter, code: &str) -> Result<Self, crate::registry::FromCodeError> {
use std::str::FromStr;
Ok(match linter {
#(Linter::#linter_idents => RuleCodePrefix::#linter_idents(#linter_idents::from_str(code).map_err(|_| crate::registry::FromCodeError::Unknown)?),)*
})
}
}
});
let rule_to_code = generate_rule_to_code(&linter_to_rules);
output.extend(rule_to_code);
output.extend(generate_iter_impl(&linter_to_rules, &linter_idents));
Ok(output)
}
/// Group the rules by their common prefixes.
fn rules_by_prefix(
rules: &BTreeMap<String, Rule>,
) -> BTreeMap<String, Vec<(Path, Vec<Attribute>)>> {
// TODO(charlie): Why do we do this here _and_ in `rule_code_prefix::expand`?
let mut rules_by_prefix = BTreeMap::new();
for (code, rule) in rules {
// Nursery rules have to be explicitly selected, so we ignore them when looking at
// prefixes.
if is_nursery(&rule.group) {
rules_by_prefix.insert(code.clone(), vec![(rule.path.clone(), rule.attrs.clone())]);
continue;
}
for i in 1..=code.len() {
let prefix = code[..i].to_string();
let rules: Vec<_> = rules
.iter()
.filter_map(|(code, rule)| {
// Nursery rules have to be explicitly selected, so we ignore them when
// looking at prefixes.
if is_nursery(&rule.group) {
return None;
}
if code.starts_with(&prefix) {
Some((rule.path.clone(), rule.attrs.clone()))
} else {
None
}
})
.collect();
rules_by_prefix.insert(prefix, rules);
}
}
rules_by_prefix
}
/// Map from rule to codes that can be used to select it.
/// This abstraction exists to support a one-to-many mapping, whereby a single rule could map
/// to multiple codes (e.g., if it existed in multiple linters, like Pylint and Flake8, under
/// different codes). We haven't actually activated this functionality yet, but some work was
/// done to support it, so the logic exists here.
fn generate_rule_to_code(linter_to_rules: &BTreeMap<Ident, BTreeMap<String, Rule>>) -> TokenStream {
let mut rule_to_codes: HashMap<&Path, Vec<&Rule>> = HashMap::new();
let mut linter_code_for_rule_match_arms = quote!();
for (linter, map) in linter_to_rules {
for (code, rule) in map {
let Rule {
path, attrs, name, ..
} = rule;
rule_to_codes.entry(path).or_default().push(rule);
linter_code_for_rule_match_arms.extend(quote! {
#(#attrs)* (Self::#linter, Rule::#name) => Some(#code),
});
}
}
let mut rule_noqa_code_match_arms = quote!();
let mut rule_group_match_arms = quote!();
for (rule, codes) in rule_to_codes {
let rule_name = rule.segments.last().unwrap();
assert_eq!(
codes.len(),
1,
"
{} is mapped to multiple codes.
The mapping of multiple codes to one rule has been disabled due to UX concerns (it would
be confusing if violations were reported under a different code than the code you selected).
We firstly want to allow rules to be selected by their names (and report them by name),
and before we can do that we have to rename all our rules to match our naming convention
(see CONTRIBUTING.md) because after that change every rule rename will be a breaking change.
See also https://github.com/astral-sh/ruff/issues/2186.
",
rule_name.ident
);
let Rule {
linter,
code,
group,
attrs,
..
} = codes
.iter()
.sorted_by_key(|data| data.linter == "Pylint")
.next()
.unwrap();
rule_noqa_code_match_arms.extend(quote! {
#(#attrs)* Rule::#rule_name => NoqaCode(crate::registry::Linter::#linter.common_prefix(), #code),
});
rule_group_match_arms.extend(quote! {
#(#attrs)* Rule::#rule_name => #group,
});
}
let rule_to_code = quote! {
impl Rule {
pub fn noqa_code(&self) -> NoqaCode {
use crate::registry::RuleNamespace;
match self {
#rule_noqa_code_match_arms
}
}
pub fn group(&self) -> RuleGroup {
use crate::registry::RuleNamespace;
match self {
#rule_group_match_arms
}
}
pub fn is_nursery(&self) -> bool {
matches!(self.group(), RuleGroup::Nursery)
}
}
impl Linter {
pub fn code_for_rule(&self, rule: Rule) -> Option<&'static str> {
match (self, rule) {
#linter_code_for_rule_match_arms
_ => None,
}
}
}
};
rule_to_code
}
/// Implement `impl IntoIterator for &Linter` and `RuleCodePrefix::iter()`
fn generate_iter_impl(
linter_to_rules: &BTreeMap<Ident, BTreeMap<String, Rule>>,
linter_idents: &[&Ident],
) -> TokenStream {
let mut linter_into_iter_match_arms = quote!();
for (linter, map) in linter_to_rules {
let rule_paths = map.values().map(|Rule { attrs, path, .. }| {
let rule_name = path.segments.last().unwrap();
quote!(#(#attrs)* Rule::#rule_name)
});
linter_into_iter_match_arms.extend(quote! {
Linter::#linter => vec![#(#rule_paths,)*].into_iter(),
});
}
quote! {
impl IntoIterator for &Linter {
type Item = Rule;
type IntoIter = ::std::vec::IntoIter<Self::Item>;
fn into_iter(self) -> Self::IntoIter {
match self {
#linter_into_iter_match_arms
}
}
}
impl RuleCodePrefix {
pub fn iter() -> impl Iterator<Item = RuleCodePrefix> {
use strum::IntoEnumIterator;
std::iter::empty()
#(.chain(#linter_idents::iter().map(|x| Self::#linter_idents(x))))*
}
}
}
}
/// Generate the `Rule` enum
fn register_rules<'a>(input: impl Iterator<Item = &'a Rule>) -> TokenStream {
let mut rule_variants = quote!();
let mut rule_message_formats_match_arms = quote!();
let mut rule_autofixable_match_arms = quote!();
let mut rule_explanation_match_arms = quote!();
let mut from_impls_for_diagnostic_kind = quote!();
for Rule {
name, attrs, path, ..
} in input
{
rule_variants.extend(quote! {
#(#attrs)*
#name,
});
// Apply the `attrs` to each arm, like `[cfg(feature = "foo")]`.
rule_message_formats_match_arms
.extend(quote! {#(#attrs)* Self::#name => <#path as ruff_diagnostics::Violation>::message_formats(),});
rule_autofixable_match_arms.extend(
quote! {#(#attrs)* Self::#name => <#path as ruff_diagnostics::Violation>::AUTOFIX,},
);
rule_explanation_match_arms
.extend(quote! {#(#attrs)* Self::#name => #path::explanation(),});
// Enable conversion from `DiagnosticKind` to `Rule`.
from_impls_for_diagnostic_kind
.extend(quote! {#(#attrs)* stringify!(#name) => Rule::#name,});
}
quote! {
#[derive(
EnumIter,
Debug,
PartialEq,
Eq,
Copy,
Clone,
Hash,
PartialOrd,
Ord,
::ruff_macros::CacheKey,
AsRefStr,
::strum_macros::IntoStaticStr,
)]
#[repr(u16)]
#[strum(serialize_all = "kebab-case")]
pub enum Rule { #rule_variants }
impl Rule {
/// Returns the format strings used to report violations of this rule.
pub fn message_formats(&self) -> &'static [&'static str] {
match self { #rule_message_formats_match_arms }
}
/// Returns the documentation for this rule.
pub fn explanation(&self) -> Option<&'static str> {
match self { #rule_explanation_match_arms }
}
/// Returns the autofix status of this rule.
pub const fn autofixable(&self) -> ruff_diagnostics::AutofixKind {
match self { #rule_autofixable_match_arms }
}
}
impl AsRule for ruff_diagnostics::DiagnosticKind {
fn rule(&self) -> Rule {
match self.name.as_str() {
#from_impls_for_diagnostic_kind
_ => unreachable!("invalid rule name: {}", self.name),
}
}
}
}
}
impl Parse for Rule {
/// Parses a match arm such as `(Pycodestyle, "E112") => (RuleGroup::Nursery, rules::pycodestyle::rules::logical_lines::NoIndentedBlock),`
fn parse(input: syn::parse::ParseStream) -> syn::Result<Self> {
let attrs = Attribute::parse_outer(input)?;
let pat_tuple;
parenthesized!(pat_tuple in input);
let linter: Ident = pat_tuple.parse()?;
let _: Token!(,) = pat_tuple.parse()?;
let code: LitStr = pat_tuple.parse()?;
let _: Token!(=>) = input.parse()?;
let pat_tuple;
parenthesized!(pat_tuple in input);
let group: Path = pat_tuple.parse()?;
let _: Token!(,) = pat_tuple.parse()?;
let rule_path: Path = pat_tuple.parse()?;
let _: Token!(,) = input.parse()?;
let rule_name = rule_path.segments.last().unwrap().ident.clone();
Ok(Rule {
name: rule_name,
linter,
code,
group,
path: rule_path,
attrs,
})
}
}