From b2a8c42b513ac041bbb77b0b39011ed4b447f2c1 Mon Sep 17 00:00:00 2001 From: Micha Reiser Date: Thu, 18 Dec 2025 12:38:21 +0100 Subject: [PATCH] [ty] Correctly encode multiline tokens for clients not supporting multiline tokens (#22033) --- crates/ruff_source_file/src/line_index.rs | 53 ++++--- .../src/server/api/semantic_tokens.rs | 147 +++++++++++------- crates/ty_server/tests/e2e/main.rs | 35 ++++- crates/ty_server/tests/e2e/semantic_tokens.rs | 72 +++++++++ ...lient_not_supporting_multiline_tokens.snap | 88 +++++++++++ ...en_client_supporting_multiline_tokens.snap | 43 +++++ 6 files changed, 350 insertions(+), 88 deletions(-) create mode 100644 crates/ty_server/tests/e2e/semantic_tokens.rs create mode 100644 crates/ty_server/tests/e2e/snapshots/e2e__semantic_tokens__multiline_token_client_not_supporting_multiline_tokens.snap create mode 100644 crates/ty_server/tests/e2e/snapshots/e2e__semantic_tokens__multiline_token_client_supporting_multiline_tokens.snap diff --git a/crates/ruff_source_file/src/line_index.rs b/crates/ruff_source_file/src/line_index.rs index c1d0769e83..e083c11dda 100644 --- a/crates/ruff_source_file/src/line_index.rs +++ b/crates/ruff_source_file/src/line_index.rs @@ -179,42 +179,45 @@ impl LineIndex { let line = self.line_index(offset); let line_start = self.line_start(line, text); + let character_offset = + self.characters_between(TextRange::new(line_start, offset), text, encoding); + + SourceLocation { + line, + character_offset: OneIndexed::from_zero_indexed(character_offset), + } + } + + fn characters_between( + &self, + range: TextRange, + text: &str, + encoding: PositionEncoding, + ) -> usize { if self.is_ascii() { - return SourceLocation { - line, - character_offset: OneIndexed::from_zero_indexed((offset - line_start).to_usize()), - }; + return (range.end() - range.start()).to_usize(); } match encoding { - PositionEncoding::Utf8 => { - let character_offset = offset - line_start; - SourceLocation { - line, - character_offset: OneIndexed::from_zero_indexed(character_offset.to_usize()), - } - } + PositionEncoding::Utf8 => (range.end() - range.start()).to_usize(), PositionEncoding::Utf16 => { - let up_to_character = &text[TextRange::new(line_start, offset)]; - let character = up_to_character.encode_utf16().count(); - - SourceLocation { - line, - character_offset: OneIndexed::from_zero_indexed(character), - } + let up_to_character = &text[range]; + up_to_character.encode_utf16().count() } PositionEncoding::Utf32 => { - let up_to_character = &text[TextRange::new(line_start, offset)]; - let character = up_to_character.chars().count(); - - SourceLocation { - line, - character_offset: OneIndexed::from_zero_indexed(character), - } + let up_to_character = &text[range]; + up_to_character.chars().count() } } } + /// Returns the length of the line in characters, respecting the given encoding + pub fn line_len(&self, line: OneIndexed, text: &str, encoding: PositionEncoding) -> usize { + let line_range = self.line_range(line, text); + + self.characters_between(line_range, text, encoding) + } + /// Return the number of lines in the source code. pub fn line_count(&self) -> usize { self.line_starts().len() diff --git a/crates/ty_server/src/server/api/semantic_tokens.rs b/crates/ty_server/src/server/api/semantic_tokens.rs index a6208087c0..07e4d5432b 100644 --- a/crates/ty_server/src/server/api/semantic_tokens.rs +++ b/crates/ty_server/src/server/api/semantic_tokens.rs @@ -1,7 +1,8 @@ use lsp_types::SemanticToken; -use ruff_db::source::source_text; +use ruff_db::source::{line_index, source_text}; +use ruff_source_file::OneIndexed; use ruff_text_size::{Ranged, TextRange}; -use ty_ide::semantic_tokens; +use ty_ide::{SemanticTokenModifier, SemanticTokenType, semantic_tokens}; use ty_project::ProjectDatabase; use crate::document::{PositionEncoding, ToRangeExt}; @@ -16,12 +17,14 @@ pub(crate) fn generate_semantic_tokens( multiline_token_support: bool, ) -> Vec { let source = source_text(db, file); + let line_index = line_index(db, file); let semantic_token_data = semantic_tokens(db, file, range); - // Convert semantic tokens to LSP format - let mut lsp_tokens = Vec::new(); - let mut prev_line = 0u32; - let mut prev_start = 0u32; + let mut encoder = Encoder { + tokens: Vec::with_capacity(semantic_token_data.len()), + prev_line: 0, + prev_start: 0, + }; for token in &*semantic_token_data { let Some(lsp_range) = token @@ -32,62 +35,92 @@ pub(crate) fn generate_semantic_tokens( continue; }; - let line = lsp_range.start.line; - let character = lsp_range.start.character; + if lsp_range.start.line == lsp_range.end.line { + let len = lsp_range.end.character - lsp_range.start.character; + encoder.push_token_at(lsp_range.start, len, token.token_type, token.modifiers); + } else if multiline_token_support { + // If the client supports multiline-tokens, + // compute the length of the entire range. + let mut len = 0; - // Calculate length in the negotiated encoding - let length = if !multiline_token_support && lsp_range.start.line != lsp_range.end.line { - // Token spans multiple lines but client doesn't support it - // Clamp to the end of the current line - if let Some(line_text) = source.lines().nth(lsp_range.start.line as usize) { - let line_length_in_encoding = match encoding { - PositionEncoding::UTF8 => line_text.len().try_into().unwrap_or(u32::MAX), - PositionEncoding::UTF16 => line_text - .encode_utf16() - .count() - .try_into() - .unwrap_or(u32::MAX), - PositionEncoding::UTF32 => { - line_text.chars().count().try_into().unwrap_or(u32::MAX) - } + for line in lsp_range.start.line..lsp_range.end.line { + let line_len = line_index.line_len( + OneIndexed::from_zero_indexed(line as usize), + &source, + encoding.into(), + ); + + len += u32::try_from(line_len).unwrap(); + } + + // Subtract the first line because we added the length from the beginning. + len -= lsp_range.start.character; + // We didn't compute the length of the last line, add it now. + len += lsp_range.end.character; + + encoder.push_token_at(lsp_range.start, len, token.token_type, token.modifiers); + } else { + // Multiline token but the client only supports single line tokens + // Push a token for each line. + for line in lsp_range.start.line..=lsp_range.end.line { + let start_character = if line == lsp_range.start.line { + lsp_range.start.character + } else { + 0 }; - line_length_in_encoding.saturating_sub(lsp_range.start.character) - } else { - 0 - } - } else { - // Either client supports multiline tokens or this is a single-line token - // Use the difference between start and end character positions - if lsp_range.start.line == lsp_range.end.line { - lsp_range.end.character - lsp_range.start.character - } else { - // Multiline token and client supports it - calculate full token length - let token_text = &source[token.range()]; - match encoding { - PositionEncoding::UTF8 => token_text.len().try_into().unwrap_or(u32::MAX), - PositionEncoding::UTF16 => token_text - .encode_utf16() - .count() - .try_into() - .unwrap_or(u32::MAX), - PositionEncoding::UTF32 => { - token_text.chars().count().try_into().unwrap_or(u32::MAX) - } - } - } - }; - let token_type = token.token_type as u32; - let token_modifiers = token.modifiers.bits(); + let start = lsp_types::Position { + line, + character: start_character, + }; + + let end = if line == lsp_range.end.line { + lsp_range.end.character + } else { + let line_len = line_index.line_len( + OneIndexed::from_zero_indexed(line as usize), + &source, + encoding.into(), + ); + u32::try_from(line_len).unwrap() + }; + + let len = end - start.character; + + encoder.push_token_at(start, len, token.token_type, token.modifiers); + } + } + } + + encoder.tokens +} + +struct Encoder { + tokens: Vec, + prev_line: u32, + prev_start: u32, +} + +impl Encoder { + fn push_token_at( + &mut self, + start: lsp_types::Position, + length: u32, + ty: SemanticTokenType, + modifiers: SemanticTokenModifier, + ) { // LSP semantic tokens are encoded as deltas - let delta_line = line - prev_line; + let delta_line = start.line - self.prev_line; let delta_start = if delta_line == 0 { - character - prev_start + start.character - self.prev_start } else { - character + start.character }; - lsp_tokens.push(SemanticToken { + let token_type = ty as u32; + let token_modifiers = modifiers.bits(); + + self.tokens.push(SemanticToken { delta_line, delta_start, length, @@ -95,9 +128,7 @@ pub(crate) fn generate_semantic_tokens( token_modifiers_bitset: token_modifiers, }); - prev_line = line; - prev_start = character; + self.prev_line = start.line; + self.prev_start = start.character; } - - lsp_tokens } diff --git a/crates/ty_server/tests/e2e/main.rs b/crates/ty_server/tests/e2e/main.rs index 8c6d378857..f939bd96b4 100644 --- a/crates/ty_server/tests/e2e/main.rs +++ b/crates/ty_server/tests/e2e/main.rs @@ -36,6 +36,7 @@ mod notebook; mod publish_diagnostics; mod pull_diagnostics; mod rename; +mod semantic_tokens; mod signature_help; use std::collections::{BTreeMap, HashMap, VecDeque}; @@ -66,11 +67,12 @@ use lsp_types::{ DocumentDiagnosticParams, DocumentDiagnosticReportResult, FileEvent, Hover, HoverParams, InitializeParams, InitializeResult, InitializedParams, InlayHint, InlayHintClientCapabilities, InlayHintParams, NumberOrString, PartialResultParams, Position, PreviousResultId, - PublishDiagnosticsClientCapabilities, Range, SignatureHelp, SignatureHelpParams, - SignatureHelpTriggerKind, TextDocumentClientCapabilities, TextDocumentContentChangeEvent, - TextDocumentIdentifier, TextDocumentItem, TextDocumentPositionParams, Url, - VersionedTextDocumentIdentifier, WorkDoneProgressParams, WorkspaceClientCapabilities, - WorkspaceDiagnosticParams, WorkspaceDiagnosticReportResult, WorkspaceEdit, WorkspaceFolder, + PublishDiagnosticsClientCapabilities, Range, SemanticTokensResult, SignatureHelp, + SignatureHelpParams, SignatureHelpTriggerKind, TextDocumentClientCapabilities, + TextDocumentContentChangeEvent, TextDocumentIdentifier, TextDocumentItem, + TextDocumentPositionParams, Url, VersionedTextDocumentIdentifier, WorkDoneProgressParams, + WorkspaceClientCapabilities, WorkspaceDiagnosticParams, WorkspaceDiagnosticReportResult, + WorkspaceEdit, WorkspaceFolder, }; use ruff_db::system::{OsSystem, SystemPath, SystemPathBuf, TestSystem}; use rustc_hash::FxHashMap; @@ -964,6 +966,19 @@ impl TestServer { }); self.await_response::(&signature_help_id) } + + pub(crate) fn semantic_tokens_full_request( + &mut self, + uri: &Url, + ) -> Option { + self.send_request_await::( + lsp_types::SemanticTokensParams { + text_document: TextDocumentIdentifier { uri: uri.clone() }, + work_done_progress_params: lsp_types::WorkDoneProgressParams::default(), + partial_result_params: PartialResultParams::default(), + }, + ) + } } impl fmt::Debug for TestServer { @@ -1194,6 +1209,16 @@ impl TestServerBuilder { self } + pub(crate) fn enable_multiline_token_support(mut self, enabled: bool) -> Self { + self.client_capabilities + .text_document + .get_or_insert_default() + .semantic_tokens + .get_or_insert_default() + .multiline_token_support = Some(enabled); + self + } + /// Set custom client capabilities (overrides any previously set capabilities) #[expect(dead_code)] pub(crate) fn with_client_capabilities(mut self, capabilities: ClientCapabilities) -> Self { diff --git a/crates/ty_server/tests/e2e/semantic_tokens.rs b/crates/ty_server/tests/e2e/semantic_tokens.rs new file mode 100644 index 0000000000..5eb5957666 --- /dev/null +++ b/crates/ty_server/tests/e2e/semantic_tokens.rs @@ -0,0 +1,72 @@ +use anyhow::Result; +use ruff_db::system::SystemPath; + +use crate::TestServerBuilder; + +#[test] +fn multiline_token_client_not_supporting_multiline_tokens() -> Result<()> { + let workspace_root = SystemPath::new("src"); + let foo = SystemPath::new("src/foo.py"); + let foo_content = r#"def my_function(param1: int, param2: str) -> bool: + """Example function with PEP 484 type annotations. + + Args: + param1: The first parameter. + param2: The second parameter. + + Returns: + The return value. True for success, False otherwise. + + """ +"#; + + let mut server = TestServerBuilder::new()? + .enable_pull_diagnostics(true) + .enable_multiline_token_support(false) + .with_workspace(workspace_root, None)? + .with_file(foo, foo_content)? + .build() + .wait_until_workspaces_are_initialized(); + + server.open_text_document(foo, foo_content, 1); + + let tokens = server.semantic_tokens_full_request(&server.file_uri(foo)); + + insta::assert_json_snapshot!(tokens); + + Ok(()) +} + +#[test] +fn multiline_token_client_supporting_multiline_tokens() -> Result<()> { + let workspace_root = SystemPath::new("src"); + let foo = SystemPath::new("src/foo.py"); + let foo_content = r#"def my_function(param1: int, param2: str) -> bool: + """Example function with PEP 484 type annotations. + + Args: + param1: The first parameter. + param2: The second parameter. + + Returns: + The return value. True for success, False otherwise. + + """ +"#; + + let mut server = TestServerBuilder::new()? + .enable_pull_diagnostics(true) + .enable_multiline_token_support(true) + .with_workspace(workspace_root, None)? + .with_file(foo, foo_content)? + .build() + .wait_until_workspaces_are_initialized(); + + server.open_text_document(foo, foo_content, 1); + + let tokens = server.semantic_tokens_full_request(&server.file_uri(foo)); + + insta::assert_json_snapshot!(tokens); + + Ok(()) +} diff --git a/crates/ty_server/tests/e2e/snapshots/e2e__semantic_tokens__multiline_token_client_not_supporting_multiline_tokens.snap b/crates/ty_server/tests/e2e/snapshots/e2e__semantic_tokens__multiline_token_client_not_supporting_multiline_tokens.snap new file mode 100644 index 0000000000..3b473eb203 --- /dev/null +++ b/crates/ty_server/tests/e2e/snapshots/e2e__semantic_tokens__multiline_token_client_not_supporting_multiline_tokens.snap @@ -0,0 +1,88 @@ +--- +source: crates/ty_server/tests/e2e/semantic_tokens.rs +expression: tokens +--- +{ + "data": [ + 0, + 4, + 11, + 7, + 1, + 0, + 12, + 6, + 2, + 1, + 0, + 8, + 3, + 1, + 0, + 0, + 5, + 6, + 2, + 1, + 0, + 8, + 3, + 1, + 0, + 0, + 8, + 4, + 1, + 0, + 1, + 4, + 51, + 10, + 0, + 1, + 0, + 1, + 10, + 0, + 1, + 0, + 10, + 10, + 0, + 1, + 0, + 37, + 10, + 0, + 1, + 0, + 38, + 10, + 0, + 1, + 0, + 1, + 10, + 0, + 1, + 0, + 13, + 10, + 0, + 1, + 0, + 61, + 10, + 0, + 1, + 0, + 1, + 10, + 0, + 1, + 0, + 7, + 10, + 0 + ] +} diff --git a/crates/ty_server/tests/e2e/snapshots/e2e__semantic_tokens__multiline_token_client_supporting_multiline_tokens.snap b/crates/ty_server/tests/e2e/snapshots/e2e__semantic_tokens__multiline_token_client_supporting_multiline_tokens.snap new file mode 100644 index 0000000000..20d2130006 --- /dev/null +++ b/crates/ty_server/tests/e2e/snapshots/e2e__semantic_tokens__multiline_token_client_supporting_multiline_tokens.snap @@ -0,0 +1,43 @@ +--- +source: crates/ty_server/tests/e2e/semantic_tokens.rs +expression: tokens +--- +{ + "data": [ + 0, + 4, + 11, + 7, + 1, + 0, + 12, + 6, + 2, + 1, + 0, + 8, + 3, + 1, + 0, + 0, + 5, + 6, + 2, + 1, + 0, + 8, + 3, + 1, + 0, + 0, + 8, + 4, + 1, + 0, + 1, + 4, + 220, + 10, + 0 + ] +}