Implement an iterator for universal newlines (#3454)
# Summary We need to support CR line endings (as opposed to LF and CRLF line endings, which are already supported). They're rare, but they do appear in Python code, and we tend to panic on any file that uses them. Our `Locator` abstraction now supports CR line endings. However, Rust's `str#lines` implementation does _not_. This PR adds a `UniversalNewlineIterator` implementation that respects all of CR, LF, and CRLF line endings, and plugs it into most of the `.lines()` call sites. As an alternative design, it could be nice if we could leverage `Locator` for this. We've already computed all of the line endings, so we could probably iterate much more efficiently? # Test Plan Largely relying on automated testing, however, also ran over some known failure cases, like #3404.
This commit is contained in:
@@ -56,10 +56,18 @@ impl<'a> Locator<'a> {
|
||||
self.contents
|
||||
}
|
||||
|
||||
/// Return the number of lines in the source code.
|
||||
pub fn count_lines(&self) -> usize {
|
||||
let index = self.get_or_init_index();
|
||||
index.count_lines()
|
||||
}
|
||||
|
||||
/// Return the number of bytes in the source code.
|
||||
pub const fn len(&self) -> usize {
|
||||
self.contents.len()
|
||||
}
|
||||
|
||||
/// Return `true` if the source code is empty.
|
||||
pub const fn is_empty(&self) -> bool {
|
||||
self.contents.is_empty()
|
||||
}
|
||||
@@ -83,6 +91,14 @@ impl Index {
|
||||
Index::Utf8(utf8) => utf8.byte_offset(location, contents),
|
||||
}
|
||||
}
|
||||
|
||||
/// Return the number of lines in the source code.
|
||||
fn count_lines(&self) -> usize {
|
||||
match self {
|
||||
Index::Ascii(ascii) => ascii.line_start_byte_offsets.len(),
|
||||
Index::Utf8(utf8) => utf8.line_start_byte_offsets.len(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<&str> for Index {
|
||||
|
||||
Reference in New Issue
Block a user