Compare commits

...

2 Commits

Author SHA1 Message Date
Micha Reiser
7f16544cc6 fields only version 2025-06-05 17:25:02 +02:00
Micha Reiser
ff6f0b6ab8 Prototype of file inclusion and exclusion 2025-06-04 17:05:24 +02:00
14 changed files with 848 additions and 41 deletions

4
Cargo.lock generated
View File

@@ -3933,12 +3933,16 @@ name = "ty_project"
version = "0.0.0"
dependencies = [
"anyhow",
"camino",
"crossbeam",
"glob",
"globset",
"ignore",
"insta",
"notify",
"pep440_rs",
"rayon",
"regex-automata 0.4.9",
"ruff_cache",
"ruff_db",
"ruff_macros",

View File

@@ -126,6 +126,7 @@ quote = { version = "1.0.23" }
rand = { version = "0.9.0" }
rayon = { version = "1.10.0" }
regex = { version = "1.10.2" }
regex-automata = { version = "0.4.9" }
rustc-hash = { version = "2.0.0" }
rustc-stable-hash = { version = "0.1.2" }
# When updating salsa, make sure to also update the revision in `fuzz/Cargo.toml`
@@ -165,7 +166,7 @@ tracing-subscriber = { version = "0.3.18", default-features = false, features =
"env-filter",
"fmt",
"ansi",
"smallvec"
"smallvec",
] }
tryfn = { version = "0.2.1" }
typed-arena = { version = "2.0.2" }
@@ -175,11 +176,7 @@ unicode-width = { version = "0.2.0" }
unicode_names2 = { version = "1.2.2" }
unicode-normalization = { version = "0.1.23" }
url = { version = "2.5.0" }
uuid = { version = "1.6.1", features = [
"v4",
"fast-rng",
"macro-diagnostics",
] }
uuid = { version = "1.6.1", features = ["v4", "fast-rng", "macro-diagnostics"] }
walkdir = { version = "2.3.2" }
wasm-bindgen = { version = "0.2.92" }
wasm-bindgen-test = { version = "0.3.42" }
@@ -214,8 +211,8 @@ must_use_candidate = "allow"
similar_names = "allow"
single_match_else = "allow"
too_many_lines = "allow"
needless_continue = "allow" # An explicit continue can be more readable, especially if the alternative is an empty block.
unnecessary_debug_formatting = "allow" # too many instances, the display also doesn't quote the path which is often desired in logs where we use them the most often.
needless_continue = "allow" # An explicit continue can be more readable, especially if the alternative is an empty block.
unnecessary_debug_formatting = "allow" # too many instances, the display also doesn't quote the path which is often desired in logs where we use them the most often.
# Without the hashes we run into a `rustfmt` bug in some snapshot tests, see #13250
needless_raw_string_hashes = "allow"
# Disallowed restriction lints

View File

@@ -652,6 +652,8 @@ pub enum DiagnosticId {
/// Some I/O operation failed
Io,
NoFiles,
/// Some code contains a syntax error
InvalidSyntax,
@@ -699,6 +701,7 @@ impl DiagnosticId {
DiagnosticId::Lint(name) => name.as_str(),
DiagnosticId::RevealedType => "revealed-type",
DiagnosticId::UnknownRule => "unknown-rule",
DiagnosticId::NoFiles => "no-files",
}
}

View File

@@ -369,6 +369,30 @@ impl SystemPath {
Some(SystemPath::new(Utf8Path::from_path(path)?))
}
/// Returns `true` if the `SystemPath` is absolute, i.e., if it is independent of
/// the current directory.
///
/// * On Unix, a path is absolute if it starts with the root, so
/// `is_absolute` and [`has_root`] are equivalent.
///
/// * On Windows, a path is absolute if it has a prefix and starts with the
/// root: `c:\windows` is absolute, while `c:temp` and `\temp` are not.
///
/// # Examples
///
/// ```
/// use ruff_db::system::SystemPath;
///
/// assert!(!SystemPath::new("foo.txt").is_absolute());
/// ```
///
/// [`has_root`]: Utf8Path::has_root
#[inline]
#[must_use]
pub fn is_absolute(&self) -> bool {
self.0.is_absolute()
}
/// Makes a path absolute and normalizes it without accessing the file system.
///
/// Adapted from [cargo](https://github.com/rust-lang/cargo/blob/fede83ccf973457de319ba6fa0e36ead454d2e20/src/cargo/util/paths.rs#L61)
@@ -534,6 +558,10 @@ impl SystemPathBuf {
self.0
}
pub fn into_string(self) -> String {
self.0.into_string()
}
pub fn into_std_path_buf(self) -> PathBuf {
self.0.into_std_path_buf()
}
@@ -822,7 +850,7 @@ impl ruff_cache::CacheKey for SystemVirtualPathBuf {
///
/// # Examples
/// ```rust
/// use ruff_db::system::{SystemPath, deduplicate_nested_paths};///
/// use ruff_db::system::{SystemPath, deduplicate_nested_paths};
///
/// let paths = vec![SystemPath::new("/a/b/c"), SystemPath::new("/a/b"), SystemPath::new("/a/beta"), SystemPath::new("/a/b/c")];
/// assert_eq!(deduplicate_nested_paths(paths).collect::<Vec<_>>(), &[SystemPath::new("/a/b"), SystemPath::new("/a/beta")]);

View File

@@ -143,6 +143,23 @@ typeshed = "/path/to/custom/typeshed"
## `src`
#### `files`
TODO
**Default value**: `null`
**Type**: `list[pattern]`
**Example usage** (`pyproject.toml`):
```toml
[tool.ty.src]
files = ["./app", "!app/build"]
```
---
#### `respect-ignore-files`
Whether to automatically exclude files that are ignored by `.ignore`,

View File

@@ -19,7 +19,7 @@ use colored::Colorize;
use crossbeam::channel as crossbeam_channel;
use rayon::ThreadPoolBuilder;
use ruff_db::Upcast;
use ruff_db::diagnostic::{Diagnostic, DisplayDiagnosticConfig, Severity};
use ruff_db::diagnostic::{Diagnostic, DiagnosticId, DisplayDiagnosticConfig, Severity};
use ruff_db::max_parallelism;
use ruff_db::system::{OsSystem, SystemPath, SystemPathBuf};
use salsa::plumbing::ZalsaDatabase;
@@ -276,7 +276,7 @@ impl MainLoop {
}
MainLoopMessage::CheckCompleted {
result,
mut result,
revision: check_revision,
} => {
let terminal_settings = db.project().settings(db).terminal();
@@ -286,7 +286,11 @@ impl MainLoop {
if check_revision == revision {
if db.project().files(db).is_empty() {
tracing::warn!("No python files found under the given path(s)");
result.push(Diagnostic::new(
DiagnosticId::NoFiles,
Severity::Warning,
"No python files found under the given path(s)",
));
}
let mut stdout = stdout().lock();

View File

@@ -24,8 +24,11 @@ ty_python_semantic = { workspace = true, features = ["serde"] }
ty_vendored = { workspace = true }
anyhow = { workspace = true }
camino = { workspace = true }
crossbeam = { workspace = true }
ignore = { workspace = true }
glob = { workspace = true }
globset = { workspace = true }
notify = { workspace = true }
pep440_rs = { workspace = true, features = ["version-ranges"] }
rayon = { workspace = true }
@@ -35,6 +38,7 @@ schemars = { workspace = true, optional = true }
serde = { workspace = true }
thiserror = { workspace = true }
toml = { workspace = true }
regex-automata = { workspace = true }
tracing = { workspace = true }
[dev-dependencies]

View File

@@ -7,7 +7,7 @@ use std::collections::BTreeSet;
use crate::walk::ProjectFilesWalker;
use ruff_db::Db as _;
use ruff_db::files::{File, Files};
use ruff_db::system::SystemPath;
use ruff_db::system::{FileType, SystemPath};
use rustc_hash::FxHashSet;
use ty_python_semantic::Program;
@@ -113,8 +113,16 @@ impl ProjectDatabase {
// should be included in the project. We can skip this check for
// paths that aren't part of the project or shouldn't be included
// when checking the project.
if project.is_path_included(self, &path) {
if self.system().is_file(&path) {
let metadata = self
.system()
.path_metadata(&path)
.map(|metadata| metadata.file_type());
if project.is_path_included(
self,
&path,
matches!(metadata, Ok(FileType::Directory)),
) {
if matches!(metadata, Ok(FileType::File)) {
// Add the parent directory because `walkdir` always visits explicitly passed files
// even if they match an exclude filter.
added_paths.insert(path.parent().unwrap().to_path_buf());
@@ -153,7 +161,7 @@ impl ProjectDatabase {
result.custom_stdlib_changed = true;
}
if project.is_path_included(self, &path) || path == project_root {
if project.is_path_included(self, &path, true) || path == project_root {
// TODO: Shouldn't it be enough to simply traverse the project files and remove all
// that start with the given path?
tracing::debug!(

View File

@@ -127,7 +127,7 @@ impl Reporter for DummyReporter {
#[salsa::tracked]
impl Project {
pub fn from_metadata(db: &dyn Db, metadata: ProjectMetadata) -> Self {
let (settings, settings_diagnostics) = metadata.options().to_settings(db);
let (settings, settings_diagnostics) = metadata.options().to_settings(db, metadata.root());
Project::builder(metadata, settings, settings_diagnostics)
.durability(Durability::MEDIUM)
@@ -160,8 +160,8 @@ impl Project {
/// the project's include and exclude settings as well as the paths that were passed to `ty check <paths>`.
/// This means, that this method is an over-approximation of `Self::files` and may return `true` for paths
/// that won't be included when checking the project because they're ignored in a `.gitignore` file.
pub fn is_path_included(self, db: &dyn Db, path: &SystemPath) -> bool {
ProjectFilesFilter::from_project(db, self).is_included(path)
pub fn is_path_included(self, db: &dyn Db, path: &SystemPath, is_directory: bool) -> bool {
ProjectFilesFilter::from_project(db, self).is_included(path, is_directory)
}
pub fn reload(self, db: &mut dyn Db, metadata: ProjectMetadata) {
@@ -169,7 +169,8 @@ impl Project {
assert_eq!(self.root(db), metadata.root());
if &metadata != self.metadata(db) {
let (settings, settings_diagnostics) = metadata.options().to_settings(db);
let (settings, settings_diagnostics) =
metadata.options().to_settings(db, metadata.root());
if self.settings(db) != &settings {
self.set_settings(db).to(settings);

View File

@@ -1,5 +1,9 @@
use crate::Db;
use crate::metadata::value::{RangedValue, RelativePathBuf, ValueSource, ValueSourceGuard};
use crate::metadata::settings::SrcSettings;
use crate::metadata::value::{
RangedValue, RelativePathBuf, RelativePathPattern, ValueSource, ValueSourceGuard,
};
use crate::walk::FilePatternsBuilder;
use ruff_db::diagnostic::{Annotation, Diagnostic, DiagnosticFormat, DiagnosticId, Severity, Span};
use ruff_db::files::system_path_to_file;
use ruff_db::system::{System, SystemPath, SystemPathBuf};
@@ -199,10 +203,20 @@ impl Options {
}
#[must_use]
pub(crate) fn to_settings(&self, db: &dyn Db) -> (Settings, Vec<OptionDiagnostic>) {
pub(crate) fn to_settings(
&self,
db: &dyn Db,
project_root: &SystemPath,
) -> (Settings, Vec<OptionDiagnostic>) {
let (rules, diagnostics) = self.to_rule_selection(db);
let mut settings = Settings::new(rules, self.src.as_ref());
let mut settings = Settings::new(rules);
if let Some(src) = self.src.as_ref() {
tracing::debug!("found src options: {src:?}");
// TODO: Error handling
settings.set_src(src.to_settings(db.system(), project_root).unwrap());
}
if let Some(terminal) = self.terminal.as_ref() {
settings.set_terminal(TerminalSettings {
@@ -408,6 +422,17 @@ pub struct SrcOptions {
)]
pub root: Option<RelativePathBuf>,
/// TODO
#[serde(skip_serializing_if = "Option::is_none")]
#[option(
default = r#"null"#,
value_type = "list[pattern]",
example = r#"
files = ["./app", "!app/build"]
"#
)]
pub files: Option<Vec<RelativePathPattern>>,
/// Whether to automatically exclude files that are ignored by `.ignore`,
/// `.gitignore`, `.git/info/exclude`, and global `gitignore` files.
/// Enabled by default.
@@ -422,6 +447,31 @@ pub struct SrcOptions {
pub respect_ignore_files: Option<bool>,
}
impl SrcOptions {
fn to_settings(
&self,
system: &dyn System,
project_root: &SystemPath,
// diagnostics: &mut Vec<OptionDiagnostic>,
) -> Result<SrcSettings, ()> {
// TODO: Error handling, default exclusions
let mut files = FilePatternsBuilder::new();
for pattern in self.files.iter().flatten() {
files.add(&pattern.absolute(project_root, system)).unwrap();
}
let src = SrcSettings {
respect_ignore_files: self.respect_ignore_files.unwrap_or(true),
files: files.build().unwrap(),
};
tracing::debug!("Resolved src settings: {src:?}");
Ok(src)
}
}
#[derive(Debug, Default, Clone, Eq, PartialEq, Combine, Serialize, Deserialize)]
#[serde(rename_all = "kebab-case", transparent)]
#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]

View File

@@ -1,9 +1,9 @@
use std::sync::Arc;
use crate::metadata::options::SrcOptions;
use ruff_db::diagnostic::DiagnosticFormat;
use std::sync::Arc;
use ty_python_semantic::lint::RuleSelection;
use crate::walk::FilePatterns;
/// The resolved [`super::Options`] for the project.
///
/// Unlike [`super::Options`], the struct has default values filled in and
@@ -23,19 +23,15 @@ pub struct Settings {
terminal: TerminalSettings,
respect_ignore_files: bool,
src: SrcSettings,
}
impl Settings {
pub fn new(rules: RuleSelection, src_options: Option<&SrcOptions>) -> Self {
let respect_ignore_files = src_options
.and_then(|src| src.respect_ignore_files)
.unwrap_or(true);
pub fn new(rules: RuleSelection) -> Self {
Self {
rules: Arc::new(rules),
terminal: TerminalSettings::default(),
respect_ignore_files,
src: SrcSettings::default(),
}
}
@@ -43,8 +39,12 @@ impl Settings {
&self.rules
}
pub fn respect_ignore_files(&self) -> bool {
self.respect_ignore_files
pub fn src(&self) -> &SrcSettings {
&self.src
}
pub fn set_src(&mut self, src: SrcSettings) {
self.src = src;
}
pub fn to_rules(&self) -> Arc<RuleSelection> {
@@ -65,3 +65,20 @@ pub struct TerminalSettings {
pub output_format: DiagnosticFormat,
pub error_on_warning: bool,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct SrcSettings {
pub respect_ignore_files: bool,
pub files: FilePatterns,
}
impl Default for SrcSettings {
fn default() -> Self {
Self {
respect_ignore_files: true,
// TODO: This should include all files by default
files: FilePatterns::empty(),
}
}
}

View File

@@ -344,3 +344,56 @@ impl RelativePathBuf {
SystemPath::absolute(&self.0, relative_to)
}
}
/// A relative path pattern that allows for negative patterns (git ignore style).
#[derive(
Debug,
Clone,
serde::Serialize,
serde::Deserialize,
PartialEq,
Eq,
PartialOrd,
Ord,
Hash,
Combine,
)]
#[serde(transparent)]
#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
pub struct RelativePathPattern(RangedValue<String>);
impl RelativePathPattern {
pub fn new(pattern: String, source: ValueSource) -> Self {
Self(RangedValue::new(pattern, source))
}
pub fn cli(pattern: String) -> Self {
Self::new(pattern, ValueSource::Cli)
}
/// Returns the relative pattern.
pub fn pattern(&self) -> &str {
&self.0
}
/// Resolves the relative pattern to an absolute pattern.
pub fn absolute(&self, project_root: &SystemPath, system: &dyn System) -> String {
let relative_to = match &self.0.source {
ValueSource::File(_) => project_root,
ValueSource::Cli => system.current_directory(),
};
if let Some(after) = self.0.strip_prefix('!') {
// Patterns starting with `**` don't need to be anchored.
if after.starts_with("**") {
self.0.to_string()
} else {
format!("!{}", SystemPath::absolute(after, relative_to))
}
} else if self.0.starts_with("**") {
self.0.to_string()
} else {
SystemPath::absolute(&self.0, relative_to).into_string()
}
}
}

View File

@@ -1,10 +1,14 @@
use crate::{Db, IOErrorDiagnostic, IOErrorKind, Project};
use globset::{Candidate, GlobBuilder, GlobSet, GlobSetBuilder};
use regex_automata::util::pool::Pool;
use ruff_db::files::{File, system_path_to_file};
use ruff_db::system::walk_directory::{ErrorKind, WalkDirectoryBuilder, WalkState};
use ruff_db::system::{FileType, SystemPath, SystemPathBuf};
use ruff_python_ast::PySourceType;
use rustc_hash::{FxBuildHasher, FxHashSet};
use rustc_hash::{FxBuildHasher, FxHashMap, FxHashSet};
use std::borrow::Cow;
use std::path::PathBuf;
use std::sync::Arc;
use thiserror::Error;
/// Filter that decides which files are included in the project.
@@ -13,11 +17,15 @@ use thiserror::Error;
///
/// This struct mainly exists because `dyn Db` isn't `Send` or `Sync`, making it impossible
/// to access fields from within the walker.
#[derive(Default, Debug)]
#[derive(Debug)]
pub(crate) struct ProjectFilesFilter<'a> {
/// The same as [`Project::included_paths_or_root`].
included_paths: &'a [SystemPathBuf],
files_patterns: &'a FilePatterns,
project_root: &'a SystemPath,
/// The filter skips checking if the path is in `included_paths` if set to `true`.
///
/// Skipping this check is useful when the walker only walks over `included_paths`.
@@ -28,6 +36,8 @@ impl<'a> ProjectFilesFilter<'a> {
pub(crate) fn from_project(db: &'a dyn Db, project: Project) -> Self {
Self {
included_paths: project.included_paths_or_root(db),
project_root: project.root(db),
files_patterns: &project.settings(db).src().files,
skip_included_paths: false,
}
}
@@ -45,7 +55,7 @@ impl<'a> ProjectFilesFilter<'a> {
/// This method may return `true` for files that don't end up being included when walking the
/// project tree because it doesn't consider `.gitignore` and other ignore files when deciding
/// if a file's included.
pub(crate) fn is_included(&self, path: &SystemPath) -> bool {
pub(crate) fn is_included(&self, path: &SystemPath, is_directory: bool) -> bool {
#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
enum CheckPathMatch {
/// The path is a partial match of the checked path (it's a sub path)
@@ -78,8 +88,26 @@ impl<'a> ProjectFilesFilter<'a> {
match m {
None => false,
Some(CheckPathMatch::Partial) => {
if path == self.project_root {
return true;
}
// TODO: Do we need to use `matched_path_or_any_parents` when not walking?
let matched = self.files_patterns.matches_path(path, is_directory);
tracing::debug!("path `{path} matches {matched:?}");
// TODO: For partial matches, only include the file if it is included by the project's include/exclude settings.
true
match matched {
// We need to traverse directories that don't match because `a` doesn't match the pattern `a/b/c/d.py`
// but we need to traverse the directory to successfully match `a/b/c/d.py`.
// This is very unfortunate because it means ty traverses all directories when e.g. using `files = ["src"]`.
// TODO(micha): 04.06.2025: It would be nice if we could avoid traversing directories
// that are known can never match because they don't share a common prefix with any of the globs.
// But we'd need to be careful in the precense of `**/test` patterns because they can match any path.
PatternMatch::None => true,
PatternMatch::Exclude(_) => false,
PatternMatch::Include => true,
}
}
Some(CheckPathMatch::Full) => true,
}
@@ -132,7 +160,7 @@ impl<'a> ProjectFilesWalker<'a> {
let mut walker = db
.system()
.walk_directory(paths.next()?.as_ref())
.standard_filters(db.project().settings(db).respect_ignore_files())
.standard_filters(db.project().settings(db).src().respect_ignore_files)
.ignore_hidden(false);
for path in paths {
@@ -152,7 +180,10 @@ impl<'a> ProjectFilesWalker<'a> {
Box::new(|entry| {
match entry {
Ok(entry) => {
if !self.filter.is_included(entry.path()) {
if !self
.filter
.is_included(entry.path(), entry.file_type().is_directory())
{
tracing::debug!("Ignoring not-included path: {}", entry.path());
return WalkState::Skip;
}
@@ -258,3 +289,583 @@ pub(crate) enum WalkError {
#[error("`{path}` is not a valid UTF-8 path")]
NonUtf8Path { path: PathBuf },
}
#[derive(Clone)]
pub struct FilePatterns {
set: GlobSet,
patterns: Box<[FilePattern]>,
matches: Option<Arc<Pool<Vec<usize>>>>,
static_prefixes: IncludedPrefixes,
num_positive: usize,
}
impl FilePatterns {
pub(crate) fn empty() -> Self {
Self {
set: GlobSet::empty(),
patterns: Box::default(),
matches: None,
static_prefixes: IncludedPrefixes::new(),
num_positive: 0,
}
}
pub(crate) fn matches(&self, path: impl AsRef<SystemPath>) -> PatternMatch {
self.matches_path(path.as_ref(), false)
}
pub(crate) fn matches_directory(&self, path: impl AsRef<SystemPath>) -> PatternMatch {
self.matches_path(path.as_ref(), true)
}
pub(crate) fn matches_path(&self, path: &SystemPath, is_directory: bool) -> PatternMatch {
debug_assert!(path.is_absolute(), "Path {path} isn't absolute");
if self.patterns.is_empty() {
return PatternMatch::None;
}
let candidate = Candidate::new(path);
let mut matches = self.matches.as_ref().unwrap().get();
self.set.matches_candidate_into(&candidate, &mut *matches);
for &i in matches.iter().rev() {
let pattern = &self.patterns[i];
if pattern.is_only_directory && !is_directory {
continue;
}
return if pattern.negated {
PatternMatch::Exclude(ExcludeReason::Match)
} else {
PatternMatch::Include
};
}
if self.num_positive > 0 {
if is_directory {
// Skip directories for which we know that no glob has a shared prefix with.
// E.g. if `files = ["src"], skip `tests`
if dbg!(self.static_prefixes.is_statically_excluded(path)) {
return PatternMatch::Exclude(ExcludeReason::NoIncludePattern);
}
} else {
// If this is a file and there's at least one include pattern but the file doesn't match it,
// then the file is excluded. If there are only exclude patterns, than the file should be included.
return PatternMatch::Exclude(ExcludeReason::NoIncludePattern);
}
}
PatternMatch::None
}
pub(crate) fn match_once(&self, path: &SystemPath) -> PatternMatch {
for parent in path.ancestors().skip(1) {
match self.matches_directory(parent) {
PatternMatch::Include | PatternMatch::None => {
continue;
}
PatternMatch::Exclude(exclude_reason) => {
return PatternMatch::Exclude(exclude_reason);
}
}
}
// At this point it is known that no parent path is excluded.
// TODO: This could be adirectory too
self.matches(path)
}
}
impl PartialEq for FilePatterns {
fn eq(&self, other: &Self) -> bool {
self.patterns == other.patterns
}
}
impl Eq for FilePatterns {}
impl std::fmt::Debug for FilePatterns {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("FilePatterns")
.field("patterns", &self.patterns)
.finish()
}
}
#[derive(Debug)]
pub(crate) struct FilePatternsBuilder {
set: GlobSetBuilder,
patterns: Vec<FilePattern>,
static_prefixes: IncludedPrefixes,
num_positive: usize,
}
impl FilePatternsBuilder {
pub(crate) fn new() -> Self {
Self {
set: GlobSetBuilder::new(),
patterns: Vec::new(),
static_prefixes: IncludedPrefixes::new(),
num_positive: 0,
}
}
pub(crate) fn add(&mut self, input: &str) -> Result<&mut Self, globset::Error> {
let mut pattern = FilePattern {
negated: false,
is_only_directory: false,
original: input.to_string(),
};
let mut glob = input;
if let Some(after) = glob.strip_prefix('!') {
pattern.negated = true;
glob = after;
}
debug_assert!(
SystemPath::new(glob).is_absolute(),
"The glob {input} isn't anchored"
);
// A pattern ending with a `/` should only match directories. E.g. `src/` only matches directories
// whereas `src` matches both files and directories.
// We need to remove the `/` to ensure that a path missing the trailing `/` matches.
if let Some(before) = glob.strip_suffix('/') {
pattern.is_only_directory = true;
glob = before;
// If the slash was escaped, then remove the escape.
// See: https://github.com/BurntSushi/ripgrep/issues/2236
let trailing_backslashes = glob.chars().rev().filter(|c| *c == '\\').count();
if trailing_backslashes % 2 == 1 {
glob = &glob[..glob.len() - trailing_backslashes]
}
}
// If the last component contains no wildcards or extension, consider it an implicit glob
// This turns `src` into `src/**/*`
// TODO: Should we also enable this behavior for `is_only_directory` patterns?
if !glob.ends_with("**") && !pattern.negated {
let parsed = GlobBuilder::new(&format!("{glob}/**"))
.literal_separator(true)
.backslash_escape(true)
// TODO: Map the error to the pattern the user provided.
.build()?;
self.set.add(parsed);
self.patterns.push(FilePattern {
is_only_directory: false,
..pattern.clone()
});
}
let mut actual = Cow::Borrowed(glob);
// If the glob ends with `/**`, then we should only match everything
// inside a directory, but not the directory itself. Standard globs
// will match the directory. So we add `/*` to force the issue.
if actual.ends_with("/**") {
actual = Cow::Owned(format!("{}/*", actual));
}
// Unlike gitignore, anchor paths (don't insert a `**` prefix).
let parsed = GlobBuilder::new(&*actual)
.literal_separator(true)
.backslash_escape(true)
// TODO: Map the error to the pattern the user provided.
.build()?;
if !pattern.negated {
self.num_positive += 1;
let mut parent = self.static_prefixes.root_mut();
// Do a best effort at extracting a static prefix from a positive include match.
// This allows short-circuting traversal of folders that are known to not overlap with any positive
// match. However, we have to be careful. Any path starting with a `**` requires visiting all folders.
for component in SystemPath::new(glob).components() {
if glob::Pattern::escape(component.as_str()) != component.as_str() {
*parent = IncludedPrefix::Dynamic;
break;
}
let static_parent = match parent {
IncludedPrefix::Dynamic => {
break;
}
IncludedPrefix::Static(static_prefix) => static_prefix,
};
parent = static_parent
.0
.entry(component.to_string())
.or_insert_with(|| IncludedPrefix::Static(StaticPrefix::default()));
}
}
self.set.add(parsed);
self.patterns.push(pattern);
Ok(self)
}
pub(crate) fn build(self) -> Result<FilePatterns, globset::Error> {
Ok(FilePatterns {
set: self.set.build()?,
patterns: self.patterns.into(),
matches: Some(Arc::new(Pool::new(|| vec![]))),
static_prefixes: self.static_prefixes,
num_positive: self.num_positive,
})
}
}
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
pub(crate) enum PatternMatch {
/// The highest precedence pattern is an include pattern.
Include,
/// The highest precedence pattern is a negated pattern (the file should not be included).
Exclude(ExcludeReason),
/// No pattern matched the path.
None,
}
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
pub(crate) enum ExcludeReason {
/// The path is excluded because it matches a negative pattern.
Match,
/// It's a path that doesn't match any include pattern.
NoIncludePattern,
}
#[derive(Clone, Debug, PartialEq, Eq)]
pub(crate) struct FilePattern {
/// The pattern as specified by the user.
original: String,
/// Whether the glob should only match directories (`src/` matches only directories).
is_only_directory: bool,
/// Whether this pattern was negated.
negated: bool,
}
fn is_implicit_glob(pattern: &str) -> bool {
let as_path = SystemPath::new(pattern);
as_path
.components()
.last()
.is_some_and(|last| !last.as_str().contains(['.', '*', '?']))
}
#[derive(Clone, Debug)]
struct IncludedPrefixes {
root: IncludedPrefix,
}
impl IncludedPrefixes {
fn new() -> Self {
Self {
root: IncludedPrefix::Static(StaticPrefix(FxHashMap::default())),
}
}
fn root_mut(&mut self) -> &mut IncludedPrefix {
&mut self.root
}
fn is_statically_excluded(&self, path: &SystemPath) -> bool {
let mut current = &self.root;
for component in path.components() {
match current {
IncludedPrefix::Dynamic => return false,
IncludedPrefix::Static(static_prefix) => {
match static_prefix.0.get(component.as_str()) {
Some(parent) => {
current = parent;
}
None => {
return true;
}
}
}
}
}
false
}
}
#[derive(Clone, Debug)]
enum IncludedPrefix {
/// The path contains at least one dynamic child pattern. E.g. if `a/*/b` and `a/c`, then `a` is dynamic because of the first pattern.
Dynamic,
/// All it's children are fixed.
Static(StaticPrefix),
}
#[derive(Clone, Debug, Default)]
struct StaticPrefix(FxHashMap<String, IncludedPrefix>);
#[cfg(test)]
mod tests {
use crate::walk::{ExcludeReason, FilePatterns, FilePatternsBuilder, PatternMatch};
fn create_patterns(patterns: impl IntoIterator<Item = &'static str>) -> FilePatterns {
let mut builder = FilePatternsBuilder::new();
for pattern in patterns {
builder.add(pattern).unwrap_or_else(|err| {
panic!("Invalid pattern '{pattern}`: {err}");
});
}
builder.build().unwrap()
}
/// The pattern set matching `**` always returns `Include`
#[test]
fn all() {
let patterns = create_patterns(["/**"]);
assert_eq!(patterns.matches_directory("/src"), PatternMatch::Include);
assert_eq!(patterns.matches_directory("/src/"), PatternMatch::Include);
assert_eq!(patterns.matches_directory("/"), PatternMatch::Include);
assert_eq!(patterns.matches("/test.py"), PatternMatch::Include);
}
/// The empty pattern set always returns `None`.
#[test]
fn empty() {
let patterns = create_patterns([]);
assert_eq!(patterns.matches("/a.foo"), PatternMatch::None);
assert_eq!(patterns.matches("/a"), PatternMatch::None);
assert_eq!(patterns.matches("/"), PatternMatch::None);
}
#[test]
fn simple() {
let patterns = create_patterns(["/*.foo", "!/*.bar"]);
assert_eq!(patterns.matches("/a.foo"), PatternMatch::Include);
assert_eq!(patterns.matches_directory("/a.foo"), PatternMatch::Include);
assert_eq!(
patterns.matches("/a.rs"),
PatternMatch::Exclude(ExcludeReason::NoIncludePattern)
);
assert_eq!(patterns.matches_directory("/a.rs"), PatternMatch::None);
assert_eq!(
patterns.matches("/a.bar"),
PatternMatch::Exclude(ExcludeReason::Match)
);
assert_eq!(
patterns.matches_directory("/a.bar"),
PatternMatch::Exclude(ExcludeReason::Match)
);
}
#[test]
fn only_excludes() {
let patterns = create_patterns(["!/*.bar"]);
assert_eq!(patterns.matches("/a.rs"), PatternMatch::None);
assert_eq!(patterns.matches_directory("/a.rs"), PatternMatch::None);
assert_eq!(
patterns.matches("/a.bar"),
PatternMatch::Exclude(ExcludeReason::Match)
);
assert_eq!(
patterns.matches_directory("/a.bar"),
PatternMatch::Exclude(ExcludeReason::Match)
);
}
#[test]
fn precedence() {
let patterns = create_patterns(["/*.foo", "!/*.bar.foo"]);
assert_eq!(patterns.matches("/a.foo"), PatternMatch::Include);
assert_eq!(
patterns.matches("/a.baz"),
PatternMatch::Exclude(ExcludeReason::NoIncludePattern)
);
assert_eq!(
patterns.matches("/a.bar.foo"),
PatternMatch::Exclude(ExcludeReason::Match)
);
}
/// `directory/` matches the directory `directory` and its contents. It doesn't match files.
#[test]
fn implicit_directory_pattern() {
let patterns = create_patterns(["/src/"]);
assert_eq!(patterns.matches_directory("/src"), PatternMatch::Include);
assert_eq!(patterns.matches_directory("/src/"), PatternMatch::Include);
// Don't include files, because the pattern ends with `/`
assert_eq!(
patterns.matches("/src"),
PatternMatch::Exclude(ExcludeReason::NoIncludePattern)
);
// But include the content of src
assert_eq!(patterns.matches("/src/test.py"), PatternMatch::Include);
// Deep nesting
assert_eq!(
patterns.matches("/src/glob/builder.py"),
PatternMatch::Include
);
// Or a file with the same name
assert_eq!(patterns.matches("/src/src"), PatternMatch::Include);
// Or a directory with the same name
assert_eq!(
patterns.matches_directory("/src/src"),
PatternMatch::Include
);
}
#[test]
fn implicit_pattern() {
// Patterns ending without a slash include both files and directories.
// It includes all files in said directory
let patterns = create_patterns(["/src"]);
assert_eq!(patterns.matches_directory("/src"), PatternMatch::Include);
assert_eq!(patterns.matches("/src/"), PatternMatch::Include);
// Also include files
assert_eq!(patterns.matches("/src"), PatternMatch::Include);
assert_eq!(patterns.matches("/src/test.py"), PatternMatch::Include);
// Deep nesting
assert_eq!(
patterns.matches("/src/glob/builder.py"),
PatternMatch::Include
);
// Or a file with the same name
assert_eq!(patterns.matches("/src/src"), PatternMatch::Include);
// Or a directory with the same name
assert_eq!(
patterns.matches_directory("/src/src"),
PatternMatch::Include
);
}
/// Patterns where the last part has an extension match files or directories (without their content).
#[test]
fn pattern_with_extension() {
let patterns = create_patterns(["/test.py"]);
assert_eq!(
patterns.matches_directory("/test.py"),
PatternMatch::Include
);
assert_eq!(
patterns.matches_directory("/test.py"),
PatternMatch::Include
);
assert_eq!(
patterns.matches("/test.py/abcd"),
PatternMatch::Exclude(ExcludeReason::NoIncludePattern)
);
assert_eq!(
patterns.matches_directory("/test.py/abcd"),
PatternMatch::Exclude(ExcludeReason::NoIncludePattern)
);
}
/// Tests that `matches` returns `Exclude` if:
///
/// * There's at least one include
/// * The parent component of `paths` are known to not overlap with any include pattern
///
/// This allows to avoid traversing directories for which it is known that no file will match
/// any include pattern. For example, we want to avoid traversing `tests` if the pattern is `["src"]`.
#[test]
fn directory_pruning() {
let patterns = create_patterns(["/a/b/test-*/d", "/a/b/c/e", "/b/c"]);
// Paths that can be statically pruned because they match no known prefix
assert_eq!(
patterns.matches_directory("/a/x"),
PatternMatch::Exclude(ExcludeReason::NoIncludePattern),
);
assert_eq!(
patterns.matches_directory("/x"),
PatternMatch::Exclude(ExcludeReason::NoIncludePattern),
);
// Paths that are known to be included
assert_eq!(patterns.matches_directory("/b/c"), PatternMatch::Include);
assert_eq!(
patterns.matches_directory("/a/b/test-x/d"),
PatternMatch::Include
);
assert_eq!(
patterns.matches_directory("/a/b/c/e"),
PatternMatch::Include
);
// Path's that can't be pruned because they could match the `test-*` wildcard pattern
assert_eq!(patterns.matches_directory("/a/b/b/d"), PatternMatch::None);
// Path's that can't be pruned because they match a known prefix (in this case `/b/c`) but they don't
// match a pattern themselves
assert_eq!(patterns.matches_directory("/b"), PatternMatch::None)
}
#[test]
fn prefix_wildcard_include() {
let patterns = create_patterns(["/**/test/**", "/a/b/c/e", "/b/c"]);
assert_eq!(
patterns.matches_directory("/src/test/"),
PatternMatch::Include
);
assert_eq!(
patterns.matches_directory("/a/b/c/e"),
PatternMatch::Include
);
assert_eq!(patterns.matches_directory("/b/c"), PatternMatch::Include);
// We can't skip over the following directories because of the `**` wildcard
assert_eq!(
patterns.matches_directory("/not_included/a/b"),
PatternMatch::None
);
}
#[test]
fn nested_prefix_wildcard_include() {
let patterns = create_patterns(["/src/**/test", "/a/b", "/src/abcd/main.py"]);
assert_eq!(patterns.matches_directory("/a/b"), PatternMatch::Include);
assert_eq!(
patterns.matches_directory("/src/test"),
PatternMatch::Include
);
// We can't skip over the following directories because of the `**` wildcard
assert_eq!(
patterns.matches_directory("/src/not_included/a/b"),
PatternMatch::None
);
}
}

10
ty.schema.json generated
View File

@@ -851,6 +851,16 @@
"SrcOptions": {
"type": "object",
"properties": {
"files": {
"description": "TODO",
"type": [
"array",
"null"
],
"items": {
"type": "string"
}
},
"respect-ignore-files": {
"description": "Whether to automatically exclude files that are ignored by `.ignore`, `.gitignore`, `.git/info/exclude`, and global `gitignore` files. Enabled by default.",
"type": [