Compare commits

...

3 Commits

Author SHA1 Message Date
Charlie Marsh
cdae6cf1f3 Run first lint rules atop tree-sitter 2022-10-09 21:10:43 -04:00
Charlie Marsh
c8dad90115 Write an extractor to generate AST from tree-sitter 2022-10-09 18:43:56 -04:00
Charlie Marsh
40ab5d353b Try out tree-sitter 2022-10-09 18:42:58 -04:00
11 changed files with 1528 additions and 68 deletions

3
.gitmodules vendored Normal file
View File

@@ -0,0 +1,3 @@
[submodule "tree-sitter-python"]
path = tree-sitter-python
url = https://github.com/tree-sitter/tree-sitter-python.git

138
Cargo.lock generated
View File

@@ -404,9 +404,9 @@ checksum = "fff857943da45f546682664a79488be82e69e43c1a7a2307679ab9afb3a66d2e"
[[package]]
name = "clap"
version = "4.0.9"
version = "4.0.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "30607dd93c420c6f1f80b544be522a0238a7db35e6a12968d28910983fee0df0"
checksum = "4ed45cc2c62a3eff523e718d8576ba762c83a3146151093283ac62ae11933a73"
dependencies = [
"atty",
"bitflags",
@@ -419,9 +419,9 @@ dependencies = [
[[package]]
name = "clap_derive"
version = "4.0.9"
version = "4.0.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a4a307492e1a34939f79d3b6b9650bd2b971513cd775436bf2b78defeb5af00b"
checksum = "db342ce9fda24fb191e2ed4e102055a4d381c1086a06630174cd8da8d5d917ce"
dependencies = [
"heck",
"proc-macro-error",
@@ -452,6 +452,16 @@ dependencies = [
"winapi 0.3.9",
]
[[package]]
name = "codespan-reporting"
version = "0.11.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3538270d33cc669650c4b093848450d380def10c331d38c768e34cac80576e6e"
dependencies = [
"termcolor",
"unicode-width",
]
[[package]]
name = "colored"
version = "2.0.0"
@@ -574,6 +584,50 @@ dependencies = [
"syn",
]
[[package]]
name = "cxx"
version = "1.0.78"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "19f39818dcfc97d45b03953c1292efc4e80954e1583c4aa770bac1383e2310a4"
dependencies = [
"cc",
"cxxbridge-flags",
"cxxbridge-macro",
"link-cplusplus",
]
[[package]]
name = "cxx-build"
version = "1.0.78"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3e580d70777c116df50c390d1211993f62d40302881e54d4b79727acb83d0199"
dependencies = [
"cc",
"codespan-reporting",
"once_cell",
"proc-macro2",
"quote",
"scratch",
"syn",
]
[[package]]
name = "cxxbridge-flags"
version = "1.0.78"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "56a46460b88d1cec95112c8c363f0e2c39afdb237f60583b0b36343bf627ea9c"
[[package]]
name = "cxxbridge-macro"
version = "1.0.78"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "747b608fecf06b0d72d440f27acc99288207324b793be2c17991839f3d4995ea"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "diff"
version = "0.1.13"
@@ -991,17 +1045,28 @@ checksum = "dfa686283ad6dd069f105e5ab091b04c62850d3e4cf5d67debad1933f55023df"
[[package]]
name = "iana-time-zone"
version = "0.1.50"
version = "0.1.51"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fd911b35d940d2bd0bea0f9100068e5b97b51a1cbe13d13382f132e0365257a0"
checksum = "f5a6ef98976b22b3b7f2f3a806f858cb862044cfa66805aa3ad84cb3d3b785ed"
dependencies = [
"android_system_properties",
"core-foundation-sys",
"iana-time-zone-haiku",
"js-sys",
"wasm-bindgen",
"winapi 0.3.9",
]
[[package]]
name = "iana-time-zone-haiku"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fde6edd6cef363e9359ed3c98ba64590ba9eecba2293eb5a723ab32aee8926aa"
dependencies = [
"cxx",
"cxx-build",
]
[[package]]
name = "idna"
version = "0.3.0"
@@ -1085,9 +1150,9 @@ dependencies = [
[[package]]
name = "itoa"
version = "1.0.3"
version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6c8af84674fe1f223a982c933a0ee1086ac4d4052aa0fb8060c12c6ad838e754"
checksum = "4217ad341ebadf8d8e724e264f13e593e0648f5b3e94b3896a5df283be015ecc"
[[package]]
name = "js-sys"
@@ -1221,6 +1286,15 @@ dependencies = [
"syn",
]
[[package]]
name = "link-cplusplus"
version = "1.0.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9272ab7b96c9046fbc5bc56c06c117cb639fe2d509df0c421cad82d2915cf369"
dependencies = [
"cc",
]
[[package]]
name = "linked-hash-map"
version = "0.5.6"
@@ -1912,6 +1986,7 @@ dependencies = [
"anyhow",
"bincode",
"cacache",
"cc",
"chrono",
"clap",
"clearscreen",
@@ -1927,6 +2002,7 @@ dependencies = [
"log",
"notify",
"num-bigint",
"num-traits",
"once_cell",
"path-absolutize",
"rayon",
@@ -1939,6 +2015,8 @@ dependencies = [
"strum",
"strum_macros",
"toml",
"tree-sitter",
"tree-sitter-python",
"update-informer",
"walkdir",
]
@@ -2056,6 +2134,12 @@ version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd"
[[package]]
name = "scratch"
version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9c8132065adcfd6e02db789d9285a0deb2f3fcb04002865ab67d5fb103533898"
[[package]]
name = "sct"
version = "0.7.0"
@@ -2094,9 +2178,9 @@ dependencies = [
[[package]]
name = "serde_json"
version = "1.0.85"
version = "1.0.86"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e55a28e3aaef9d5ce0506d0a14dbba8054ddc7e499ef522dd8b26859ec9d4a44"
checksum = "41feea4228a6f1cd09ec7a3593a682276702cd67b5273544757dae23c096f074"
dependencies = [
"itoa",
"ryu",
@@ -2279,9 +2363,9 @@ dependencies = [
[[package]]
name = "syn"
version = "1.0.101"
version = "1.0.102"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e90cde112c4b9690b8cbe810cba9ddd8bc1d7472e2cae317b69e9438c1cba7d2"
checksum = "3fcd952facd492f9be3ef0d0b7032a6e442ee9b361d4acc2b1d0c4aaa5f613a1"
dependencies = [
"proc-macro2",
"quote",
@@ -2409,6 +2493,26 @@ dependencies = [
"serde",
]
[[package]]
name = "tree-sitter"
version = "0.20.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d4423c784fe11398ca91e505cdc71356b07b1a924fc8735cfab5333afe3e18bc"
dependencies = [
"cc",
"regex",
]
[[package]]
name = "tree-sitter-python"
version = "0.20.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dda114f58048f5059dcf158aff691dffb8e113e6d2b50d94263fd68711975287"
dependencies = [
"cc",
"tree-sitter",
]
[[package]]
name = "twox-hash"
version = "1.6.3"
@@ -2497,9 +2601,9 @@ checksum = "099b7128301d285f79ddd55b9a83d5e6b9e97c92e0ea0daebee7263e932de992"
[[package]]
name = "unicode-ident"
version = "1.0.4"
version = "1.0.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dcc811dc4066ac62f84f11307873c4850cb653bfa9b1719cee2bd2204a4bc5dd"
checksum = "6ceab39d59e4c9499d4e5a8ee0e2735b891bb7308ac83dfb4e80cad195c9f6f3"
[[package]]
name = "unicode-normalization"
@@ -2510,6 +2614,12 @@ dependencies = [
"tinyvec",
]
[[package]]
name = "unicode-width"
version = "0.1.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c0edd1e5b14653f783770bce4a4dabb4a5108a5370a5f5d8cfe8710c361f6c8b"
[[package]]
name = "unicode-xid"
version = "0.2.4"

View File

@@ -6,10 +6,14 @@ edition = "2021"
[lib]
name = "ruff"
[build-dependencies]
cc = "*"
[dependencies]
anyhow = { version = "1.0.60" }
bincode = { version = "1.3.3" }
cacache = { version = "10.0.1" }
cc = "*"
chrono = { version = "0.4.21" }
clap = { version = "4.0.1", features = ["derive"] }
clearscreen = { version = "1.0.10" }
@@ -23,21 +27,24 @@ itertools = { version = "0.10.5" }
libcst = { git = "https://github.com/charliermarsh/LibCST", rev = "32a044c127668df44582f85699358e67803b0d73" }
log = { version = "0.4.17" }
notify = { version = "4.0.17" }
num-bigint = { version = "0.4.3" }
num-traits = { version = "0.2.15" }
once_cell = { version = "1.13.1" }
path-absolutize = { version = "3.0.13", features = ["once_cell_cache"] }
rayon = { version = "1.5.3" }
regex = { version = "1.6.0" }
rustpython-ast = { features = ["unparse"], git = "https://github.com/charliermarsh/RustPython.git", rev = "778ae2aeb521d0438d2a91bd11238bb5c2bf9d4f" }
rustpython-parser = { features = ["lalrpop"], git = "https://github.com/charliermarsh/RustPython.git", rev = "778ae2aeb521d0438d2a91bd11238bb5c2bf9d4f" }
rustpython-common = { git = "https://github.com/charliermarsh/RustPython.git", rev = "778ae2aeb521d0438d2a91bd11238bb5c2bf9d4f" }
rustpython-parser = { features = ["lalrpop"], git = "https://github.com/charliermarsh/RustPython.git", rev = "778ae2aeb521d0438d2a91bd11238bb5c2bf9d4f" }
serde = { version = "1.0.143", features = ["derive"] }
serde_json = { version = "1.0.83" }
strum = { version = "0.24.1", features = ["strum_macros"] }
strum_macros = { version = "0.24.3" }
toml = { version = "0.5.9" }
tree-sitter = { version = "0.20.9" }
tree-sitter-python = { version = "0.20.2" }
update-informer = { version = "0.5.0", default_features = false, features = ["pypi"], optional = true }
walkdir = { version = "2.3.2" }
strum = { version = "0.24.1", features = ["strum_macros"] }
strum_macros = "0.24.3"
num-bigint = "0.4.3"
[dev-dependencies]
insta = { version = "1.19.1", features = ["yaml"] }

28
build.rs Normal file
View File

@@ -0,0 +1,28 @@
use std::path::Path;
extern crate cc;
fn main() {
let src_dir = Path::new("tree-sitter-python").join("src");
let mut c_config = cc::Build::new();
c_config.include(&src_dir);
c_config
.flag_if_supported("-Wno-unused-parameter")
.flag_if_supported("-Wno-unused-but-set-variable")
.flag_if_supported("-Wno-trigraphs");
let parser_path = src_dir.join("parser.c");
c_config.file(&parser_path);
println!("cargo:rerun-if-changed={}", parser_path.to_str().unwrap());
c_config.compile("parser");
let mut cpp_config = cc::Build::new();
cpp_config.cpp(true);
cpp_config.include(&src_dir);
cpp_config
.flag_if_supported("-Wno-unused-parameter")
.flag_if_supported("-Wno-unused-but-set-variable");
let scanner_path = src_dir.join("scanner.cc");
cpp_config.file(&scanner_path);
println!("cargo:rerun-if-changed={}", scanner_path.to_str().unwrap());
cpp_config.compile("scanner");
}

36
examples/parse_python.rs Normal file
View File

@@ -0,0 +1,36 @@
use std::path::PathBuf;
use anyhow::Result;
use clap::Parser as ClapParser;
use tree_sitter::Parser;
use ruff::fs;
use ruff::tree_parser::extract_module;
#[derive(Debug, ClapParser)]
struct Cli {
#[arg(required = true)]
file: PathBuf,
}
fn main() -> Result<()> {
let cli = Cli::parse();
let src = fs::read_file(&cli.file)?;
let mut parser = Parser::new();
parser
.set_language(tree_sitter_python::language())
.expect("Error loading Python grammar");
let parse_tree = parser.parse(src.as_bytes(), None);
if let Some(parse_tree) = &parse_tree {
// let _ = extract_module(parse_tree.root_node(), src.as_bytes());
println!(
"{:#?}",
extract_module(parse_tree.root_node(), src.as_bytes())
);
}
Ok(())
}

1
foo.py Normal file
View File

@@ -0,0 +1 @@
x = call(1)

View File

@@ -2,10 +2,9 @@ use std::path::Path;
use anyhow::Result;
use log::debug;
use rustpython_parser::lexer::LexResult;
use crate::autofix::fixer::Mode;
use crate::linter::{check_path, tokenize};
use crate::linter::check_path;
use crate::message::Message;
use crate::settings::{RawSettings, Settings};
@@ -27,6 +26,7 @@ pub mod printer;
pub mod pyproject;
mod python;
pub mod settings;
pub mod tree_parser;
/// Run ruff over Python source code directly.
pub fn check(path: &Path, contents: &str) -> Result<Vec<Message>> {
@@ -44,21 +44,8 @@ pub fn check(path: &Path, contents: &str) -> Result<Vec<Message>> {
let settings = Settings::from_raw(RawSettings::from_pyproject(&pyproject, &project_root)?);
// Tokenize once.
let tokens: Vec<LexResult> = tokenize(contents);
// Determine the noqa line for every line in the source.
let noqa_line_for = noqa::extract_noqa_line_for(&tokens);
// Generate checks.
let checks = check_path(
path,
contents,
tokens,
&noqa_line_for,
&settings,
&Mode::None,
)?;
let checks = check_path(path, contents, &[], &settings, &Mode::None)?;
// Convert to messages.
let messages: Vec<Message> = checks

View File

@@ -5,6 +5,7 @@ use anyhow::Result;
use log::debug;
use rustpython_parser::lexer::LexResult;
use rustpython_parser::{lexer, parser};
use tree_sitter::Parser;
use crate::ast::types::Range;
use crate::autofix::fixer;
@@ -16,7 +17,8 @@ use crate::code_gen::SourceGenerator;
use crate::message::Message;
use crate::noqa::add_noqa;
use crate::settings::Settings;
use crate::{cache, fs, noqa};
use crate::tree_parser::extract_module;
use crate::{cache, fs};
/// Collect tokens up to and including the first error.
pub(crate) fn tokenize(contents: &str) -> Vec<LexResult> {
@@ -34,7 +36,6 @@ pub(crate) fn tokenize(contents: &str) -> Vec<LexResult> {
pub(crate) fn check_path(
path: &Path,
contents: &str,
tokens: Vec<LexResult>,
noqa_line_for: &[usize],
settings: &Settings,
autofix: &fixer::Mode,
@@ -48,17 +49,25 @@ pub(crate) fn check_path(
.iter()
.any(|check_code| matches!(check_code.lint_source(), LintSource::AST))
{
match parser::parse_program_tokens(tokens, "<filename>") {
let src = contents.as_bytes();
let mut parser = Parser::new();
parser
.set_language(tree_sitter_python::language())
.expect("Error loading Python grammar");
let parse_tree = parser.parse(src, None).unwrap();
match extract_module(parse_tree.root_node(), src) {
Ok(python_ast) => {
checks.extend(check_ast(&python_ast, contents, settings, autofix, path))
}
Err(parse_error) => {
if settings.enabled.contains(&CheckCode::E999) {
checks.push(Check::new(
CheckKind::SyntaxError(parse_error.error.to_string()),
CheckKind::SyntaxError(parse_error.to_string()),
Range {
location: parse_error.location,
end_location: parse_error.location,
location: Default::default(),
end_location: Default::default(),
},
))
}
@@ -100,14 +109,8 @@ pub fn lint_path(
// Read the file from disk.
let contents = fs::read_file(path)?;
// Tokenize once.
let tokens: Vec<LexResult> = tokenize(&contents);
// Determine the noqa line for every line in the source.
let noqa_line_for = noqa::extract_noqa_line_for(&tokens);
// Generate checks.
let mut checks = check_path(path, &contents, tokens, &noqa_line_for, settings, autofix)?;
let mut checks = check_path(path, &contents, &[], settings, autofix)?;
// Apply autofix.
if matches!(autofix, fixer::Mode::Apply) {
@@ -134,23 +137,10 @@ pub fn add_noqa_to_path(path: &Path, settings: &Settings) -> Result<usize> {
// Read the file from disk.
let contents = fs::read_file(path)?;
// Tokenize once.
let tokens: Vec<LexResult> = tokenize(&contents);
// Determine the noqa line for every line in the source.
let noqa_line_for = noqa::extract_noqa_line_for(&tokens);
// Generate checks.
let checks = check_path(
path,
&contents,
tokens,
&noqa_line_for,
settings,
&fixer::Mode::None,
)?;
let checks = check_path(path, &contents, &[], settings, &fixer::Mode::None)?;
add_noqa(&checks, &contents, &noqa_line_for, path)
add_noqa(&checks, &contents, &[], path)
}
pub fn autoformat_path(path: &Path) -> Result<()> {
@@ -175,14 +165,12 @@ mod tests {
use anyhow::Result;
use regex::Regex;
use rustpython_parser::lexer::LexResult;
use crate::autofix::fixer;
use crate::checks::{Check, CheckCode};
use crate::fs;
use crate::linter;
use crate::linter::tokenize;
use crate::settings;
use crate::{fs, noqa};
fn check_path(
path: &Path,
@@ -190,9 +178,7 @@ mod tests {
autofix: &fixer::Mode,
) -> Result<Vec<Check>> {
let contents = fs::read_file(path)?;
let tokens: Vec<LexResult> = tokenize(&contents);
let noqa_line_for = noqa::extract_noqa_line_for(&tokens);
linter::check_path(path, &contents, tokens, &noqa_line_for, settings, autofix)
linter::check_path(path, &contents, &[], settings, autofix)
}
#[test]

View File

@@ -44,6 +44,7 @@ pub fn extract_noqa_directive(line: &str) -> Directive {
}
}
#[allow(dead_code)]
pub fn extract_noqa_line_for(lxr: &[LexResult]) -> Vec<usize> {
let mut noqa_line_for: Vec<usize> = vec![];

1300
src/tree_parser.rs Normal file

File diff suppressed because it is too large Load Diff

1
tree-sitter-python Submodule

Submodule tree-sitter-python added at de221eccf9