// re-export our public interface use crate::text_size::{TextLen, TextSize}; use memchr::memrchr2; pub use crate::source_location::{ newlines::{find_newline, UniversalNewlineIterator}, LineIndex, OneIndexed, SourceCode, SourceLocation, }; pub type LineNumber = OneIndexed; #[derive(Debug, Copy, Clone, Default)] pub struct SourceRange { pub start: SourceLocation, pub end: Option, } impl SourceRange { pub fn new(start: SourceLocation, end: SourceLocation) -> Self { Self { start, end: Some(end), } } pub fn unwrap_end(&self) -> SourceLocation { self.end.unwrap() } } impl From> for SourceRange { fn from(value: std::ops::Range) -> Self { Self { start: value.start, end: Some(value.end), } } } /// Converts source code byte-offset to Python convention line and column numbers. pub struct RandomLocator<'a> { pub source: &'a str, index: LineIndex, } impl<'a> RandomLocator<'a> { #[inline] pub fn new(source: &'a str) -> Self { let index = LineIndex::from_source_text(source); Self { source, index } } pub fn to_source_code(&self) -> SourceCode { SourceCode::new(self.source, &self.index) } pub fn locate(&mut self, offset: crate::text_size::TextSize) -> SourceLocation { let offset = offset.to_u32().into(); self.to_source_code().source_location(offset) } pub fn locate_error(&mut self, base: crate::error::BaseError) -> LocatedError where T: Into, { let location = self.locate(base.offset); LocatedError { error: base.error.into(), location: Some(location), source_path: base.source_path, } } } /// Converts source code byte-offset to Python convention line and column numbers. pub struct LinearLocator<'a> { pub source: &'a str, state: LinearLocatorState, #[cfg(debug_assertions)] index: LineIndex, } struct LinearLocatorState { line_start: TextSize, line_end: Option, line_number: OneIndexed, cursor: TextSize, is_ascii: bool, } impl LinearLocatorState { fn init(source: &str) -> Self { let mut line_start = TextSize::default(); if source.starts_with('\u{feff}') { line_start += '\u{feff}'.text_len(); } let (line_end, is_ascii) = if let Some((position, line_ending)) = find_newline(source) { let is_ascii = source[..position].is_ascii(); ( Some(TextSize::new(position as u32 + line_ending.len() as u32)), is_ascii, ) } else { (None, source.is_ascii()) }; let line_number = OneIndexed::MIN; Self { line_start, line_end, line_number, cursor: line_start, is_ascii, } } fn new_line_start(&self, next_offset: TextSize) -> Option { if let Some(new_line_start) = self.line_end { if new_line_start <= next_offset { return Some(new_line_start); } } None } } impl<'a> LinearLocator<'a> { // nl = newline #[inline] pub fn new(source: &'a str) -> Self { let state = LinearLocatorState::init(source); Self { source, state, #[cfg(debug_assertions)] index: LineIndex::from_source_text(source), } } pub fn locate(&mut self, offset: crate::text_size::TextSize) -> SourceLocation { debug_assert!( self.state.cursor <= offset, "{:?} -> {:?} {}", self.state.cursor, offset, &self.source[offset.to_usize()..self.state.cursor.to_usize()] ); let (column, new_state) = self.locate_inner(offset); if let Some(state) = new_state { self.state = state; } else { self.state.cursor = offset; } SourceLocation { row: self.state.line_number, column, } } pub fn locate_only(&mut self, offset: crate::text_size::TextSize) -> SourceLocation { let (column, new_state) = self.locate_inner(offset); let state = new_state.as_ref().unwrap_or(&self.state); SourceLocation { row: state.line_number, column, } } fn locate_inner( &mut self, offset: crate::text_size::TextSize, ) -> (OneIndexed, Option) { let (column, new_state) = if let Some(new_line_start) = self.state.new_line_start(offset) { // not fit in current line let focused = &self.source[new_line_start.to_usize()..offset.to_usize()]; let (lines, line_start, column) = if let Some(last_newline) = memrchr2(b'\r', b'\n', focused.as_bytes()) { let last_newline = new_line_start.to_usize() + last_newline; let lines = UniversalNewlineIterator::from( &self.source[self.state.cursor.to_usize()..last_newline + 1], ) .count(); let line_start = last_newline as u32 + 1; let column = offset.to_u32() - line_start; (lines as u32, line_start, column) } else { let column = (offset - new_line_start).to_u32(); (1, new_line_start.to_u32(), column) }; let line_number = self.state.line_number.saturating_add(lines); let (line_end, is_ascii) = if let Some((newline, line_ending)) = find_newline(&self.source[line_start as usize..]) { let newline = line_start as usize + newline; let is_ascii = self.source[line_start as usize..newline].is_ascii(); ( Some(TextSize::new(newline as u32 + line_ending.len() as u32)), is_ascii, ) } else { let is_ascii = self.source[line_start as usize..].is_ascii(); (None, is_ascii) }; let line_start = TextSize::new(line_start); let state = LinearLocatorState { line_start, line_end, line_number, cursor: offset, is_ascii, }; (column, Some(state)) } else { let column = (offset - self.state.line_start).to_u32(); (column, None) }; let state = new_state.as_ref().unwrap_or(&self.state); let column = if state.is_ascii { column } else { self.source[state.line_start.to_usize()..][..column as usize] .chars() .count() as u32 }; let column = OneIndexed::from_zero_indexed(column); #[cfg(debug_assertions)] { let location = SourceLocation { row: state.line_number, column, }; let source_code = SourceCode::new(self.source, &self.index); assert_eq!( location, source_code.source_location(offset), "input: {} -> {} {}", self.state.cursor.to_usize(), offset.to_usize(), &self.source[self.state.cursor.to_usize()..offset.to_usize()] ); } (column, new_state) } pub fn locate_error(&mut self, base: crate::error::BaseError) -> LocatedError where T: Into, { let location = self.locate(base.offset); LocatedError { error: base.error.into(), location: Some(location), source_path: base.source_path, } } } #[derive(Debug, PartialEq, Eq)] pub struct LocatedError { pub error: T, pub location: Option, pub source_path: String, } impl LocatedError { pub fn error(self) -> T { self.error } pub fn from(obj: LocatedError) -> Self where U: Into, { Self { error: obj.error.into(), location: obj.location, source_path: obj.source_path, } } pub fn into(self) -> LocatedError where T: Into, { LocatedError::from(self) } pub fn python_location(&self) -> (usize, usize) { if let Some(location) = self.location { (location.row.to_usize(), location.column.to_usize()) } else { (0, 0) } } } impl std::fmt::Display for LocatedError where T: std::fmt::Display, { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { let (row, column) = self .location .map_or((0, 0), |l| (l.row.to_usize(), l.column.to_usize())); write!(f, "{} at row {} col {}", &self.error, row, column,) } } impl std::error::Error for LocatedError where T: std::error::Error + 'static, { fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { Some(&self.error) } } #[test] fn test_linear_locator() { let source = r#" 123456789 abcdefghi 유니코드 "# .strip_prefix(char::is_whitespace) .unwrap(); let mut locator = LinearLocator::new(source); let mut random_locator = RandomLocator::new(source); let mut test = |(row, col), offset| { let input = TextSize::from(offset); let expected: SourceLocation = SourceLocation { row: OneIndexed::new(row).unwrap(), column: OneIndexed::new(col).unwrap(), }; let actual = locator.locate(input); let actual2 = random_locator.locate(input); assert_eq!(expected, actual); assert_eq!(expected, actual2); }; test((1, 1), 0); test((1, 6), 5); test((1, 9), 8); test((2, 1), 10); test((4, 1), 21); test((4, 3), 27); }