[ty] Correctly encode multiline tokens for clients not supporting multiline tokens (#22033)

This commit is contained in:
Micha Reiser
2025-12-18 12:38:21 +01:00
committed by GitHub
parent 7bb5dd87ff
commit b2a8c42b51
6 changed files with 350 additions and 88 deletions

View File

@@ -179,42 +179,45 @@ impl LineIndex {
let line = self.line_index(offset);
let line_start = self.line_start(line, text);
let character_offset =
self.characters_between(TextRange::new(line_start, offset), text, encoding);
SourceLocation {
line,
character_offset: OneIndexed::from_zero_indexed(character_offset),
}
}
fn characters_between(
&self,
range: TextRange,
text: &str,
encoding: PositionEncoding,
) -> usize {
if self.is_ascii() {
return SourceLocation {
line,
character_offset: OneIndexed::from_zero_indexed((offset - line_start).to_usize()),
};
return (range.end() - range.start()).to_usize();
}
match encoding {
PositionEncoding::Utf8 => {
let character_offset = offset - line_start;
SourceLocation {
line,
character_offset: OneIndexed::from_zero_indexed(character_offset.to_usize()),
}
}
PositionEncoding::Utf8 => (range.end() - range.start()).to_usize(),
PositionEncoding::Utf16 => {
let up_to_character = &text[TextRange::new(line_start, offset)];
let character = up_to_character.encode_utf16().count();
SourceLocation {
line,
character_offset: OneIndexed::from_zero_indexed(character),
}
let up_to_character = &text[range];
up_to_character.encode_utf16().count()
}
PositionEncoding::Utf32 => {
let up_to_character = &text[TextRange::new(line_start, offset)];
let character = up_to_character.chars().count();
SourceLocation {
line,
character_offset: OneIndexed::from_zero_indexed(character),
}
let up_to_character = &text[range];
up_to_character.chars().count()
}
}
}
/// Returns the length of the line in characters, respecting the given encoding
pub fn line_len(&self, line: OneIndexed, text: &str, encoding: PositionEncoding) -> usize {
let line_range = self.line_range(line, text);
self.characters_between(line_range, text, encoding)
}
/// Return the number of lines in the source code.
pub fn line_count(&self) -> usize {
self.line_starts().len()

View File

@@ -1,7 +1,8 @@
use lsp_types::SemanticToken;
use ruff_db::source::source_text;
use ruff_db::source::{line_index, source_text};
use ruff_source_file::OneIndexed;
use ruff_text_size::{Ranged, TextRange};
use ty_ide::semantic_tokens;
use ty_ide::{SemanticTokenModifier, SemanticTokenType, semantic_tokens};
use ty_project::ProjectDatabase;
use crate::document::{PositionEncoding, ToRangeExt};
@@ -16,12 +17,14 @@ pub(crate) fn generate_semantic_tokens(
multiline_token_support: bool,
) -> Vec<SemanticToken> {
let source = source_text(db, file);
let line_index = line_index(db, file);
let semantic_token_data = semantic_tokens(db, file, range);
// Convert semantic tokens to LSP format
let mut lsp_tokens = Vec::new();
let mut prev_line = 0u32;
let mut prev_start = 0u32;
let mut encoder = Encoder {
tokens: Vec::with_capacity(semantic_token_data.len()),
prev_line: 0,
prev_start: 0,
};
for token in &*semantic_token_data {
let Some(lsp_range) = token
@@ -32,62 +35,92 @@ pub(crate) fn generate_semantic_tokens(
continue;
};
let line = lsp_range.start.line;
let character = lsp_range.start.character;
if lsp_range.start.line == lsp_range.end.line {
let len = lsp_range.end.character - lsp_range.start.character;
encoder.push_token_at(lsp_range.start, len, token.token_type, token.modifiers);
} else if multiline_token_support {
// If the client supports multiline-tokens,
// compute the length of the entire range.
let mut len = 0;
// Calculate length in the negotiated encoding
let length = if !multiline_token_support && lsp_range.start.line != lsp_range.end.line {
// Token spans multiple lines but client doesn't support it
// Clamp to the end of the current line
if let Some(line_text) = source.lines().nth(lsp_range.start.line as usize) {
let line_length_in_encoding = match encoding {
PositionEncoding::UTF8 => line_text.len().try_into().unwrap_or(u32::MAX),
PositionEncoding::UTF16 => line_text
.encode_utf16()
.count()
.try_into()
.unwrap_or(u32::MAX),
PositionEncoding::UTF32 => {
line_text.chars().count().try_into().unwrap_or(u32::MAX)
}
for line in lsp_range.start.line..lsp_range.end.line {
let line_len = line_index.line_len(
OneIndexed::from_zero_indexed(line as usize),
&source,
encoding.into(),
);
len += u32::try_from(line_len).unwrap();
}
// Subtract the first line because we added the length from the beginning.
len -= lsp_range.start.character;
// We didn't compute the length of the last line, add it now.
len += lsp_range.end.character;
encoder.push_token_at(lsp_range.start, len, token.token_type, token.modifiers);
} else {
// Multiline token but the client only supports single line tokens
// Push a token for each line.
for line in lsp_range.start.line..=lsp_range.end.line {
let start_character = if line == lsp_range.start.line {
lsp_range.start.character
} else {
0
};
line_length_in_encoding.saturating_sub(lsp_range.start.character)
} else {
0
}
} else {
// Either client supports multiline tokens or this is a single-line token
// Use the difference between start and end character positions
if lsp_range.start.line == lsp_range.end.line {
lsp_range.end.character - lsp_range.start.character
} else {
// Multiline token and client supports it - calculate full token length
let token_text = &source[token.range()];
match encoding {
PositionEncoding::UTF8 => token_text.len().try_into().unwrap_or(u32::MAX),
PositionEncoding::UTF16 => token_text
.encode_utf16()
.count()
.try_into()
.unwrap_or(u32::MAX),
PositionEncoding::UTF32 => {
token_text.chars().count().try_into().unwrap_or(u32::MAX)
}
}
}
};
let token_type = token.token_type as u32;
let token_modifiers = token.modifiers.bits();
let start = lsp_types::Position {
line,
character: start_character,
};
let end = if line == lsp_range.end.line {
lsp_range.end.character
} else {
let line_len = line_index.line_len(
OneIndexed::from_zero_indexed(line as usize),
&source,
encoding.into(),
);
u32::try_from(line_len).unwrap()
};
let len = end - start.character;
encoder.push_token_at(start, len, token.token_type, token.modifiers);
}
}
}
encoder.tokens
}
struct Encoder {
tokens: Vec<SemanticToken>,
prev_line: u32,
prev_start: u32,
}
impl Encoder {
fn push_token_at(
&mut self,
start: lsp_types::Position,
length: u32,
ty: SemanticTokenType,
modifiers: SemanticTokenModifier,
) {
// LSP semantic tokens are encoded as deltas
let delta_line = line - prev_line;
let delta_line = start.line - self.prev_line;
let delta_start = if delta_line == 0 {
character - prev_start
start.character - self.prev_start
} else {
character
start.character
};
lsp_tokens.push(SemanticToken {
let token_type = ty as u32;
let token_modifiers = modifiers.bits();
self.tokens.push(SemanticToken {
delta_line,
delta_start,
length,
@@ -95,9 +128,7 @@ pub(crate) fn generate_semantic_tokens(
token_modifiers_bitset: token_modifiers,
});
prev_line = line;
prev_start = character;
self.prev_line = start.line;
self.prev_start = start.character;
}
lsp_tokens
}

View File

@@ -36,6 +36,7 @@ mod notebook;
mod publish_diagnostics;
mod pull_diagnostics;
mod rename;
mod semantic_tokens;
mod signature_help;
use std::collections::{BTreeMap, HashMap, VecDeque};
@@ -66,11 +67,12 @@ use lsp_types::{
DocumentDiagnosticParams, DocumentDiagnosticReportResult, FileEvent, Hover, HoverParams,
InitializeParams, InitializeResult, InitializedParams, InlayHint, InlayHintClientCapabilities,
InlayHintParams, NumberOrString, PartialResultParams, Position, PreviousResultId,
PublishDiagnosticsClientCapabilities, Range, SignatureHelp, SignatureHelpParams,
SignatureHelpTriggerKind, TextDocumentClientCapabilities, TextDocumentContentChangeEvent,
TextDocumentIdentifier, TextDocumentItem, TextDocumentPositionParams, Url,
VersionedTextDocumentIdentifier, WorkDoneProgressParams, WorkspaceClientCapabilities,
WorkspaceDiagnosticParams, WorkspaceDiagnosticReportResult, WorkspaceEdit, WorkspaceFolder,
PublishDiagnosticsClientCapabilities, Range, SemanticTokensResult, SignatureHelp,
SignatureHelpParams, SignatureHelpTriggerKind, TextDocumentClientCapabilities,
TextDocumentContentChangeEvent, TextDocumentIdentifier, TextDocumentItem,
TextDocumentPositionParams, Url, VersionedTextDocumentIdentifier, WorkDoneProgressParams,
WorkspaceClientCapabilities, WorkspaceDiagnosticParams, WorkspaceDiagnosticReportResult,
WorkspaceEdit, WorkspaceFolder,
};
use ruff_db::system::{OsSystem, SystemPath, SystemPathBuf, TestSystem};
use rustc_hash::FxHashMap;
@@ -964,6 +966,19 @@ impl TestServer {
});
self.await_response::<SignatureHelpRequest>(&signature_help_id)
}
pub(crate) fn semantic_tokens_full_request(
&mut self,
uri: &Url,
) -> Option<SemanticTokensResult> {
self.send_request_await::<lsp_types::request::SemanticTokensFullRequest>(
lsp_types::SemanticTokensParams {
text_document: TextDocumentIdentifier { uri: uri.clone() },
work_done_progress_params: lsp_types::WorkDoneProgressParams::default(),
partial_result_params: PartialResultParams::default(),
},
)
}
}
impl fmt::Debug for TestServer {
@@ -1194,6 +1209,16 @@ impl TestServerBuilder {
self
}
pub(crate) fn enable_multiline_token_support(mut self, enabled: bool) -> Self {
self.client_capabilities
.text_document
.get_or_insert_default()
.semantic_tokens
.get_or_insert_default()
.multiline_token_support = Some(enabled);
self
}
/// Set custom client capabilities (overrides any previously set capabilities)
#[expect(dead_code)]
pub(crate) fn with_client_capabilities(mut self, capabilities: ClientCapabilities) -> Self {

View File

@@ -0,0 +1,72 @@
use anyhow::Result;
use ruff_db::system::SystemPath;
use crate::TestServerBuilder;
#[test]
fn multiline_token_client_not_supporting_multiline_tokens() -> Result<()> {
let workspace_root = SystemPath::new("src");
let foo = SystemPath::new("src/foo.py");
let foo_content = r#"def my_function(param1: int, param2: str) -> bool:
"""Example function with PEP 484 type annotations.
Args:
param1: The first parameter.
param2: The second parameter.
Returns:
The return value. True for success, False otherwise.
"""
"#;
let mut server = TestServerBuilder::new()?
.enable_pull_diagnostics(true)
.enable_multiline_token_support(false)
.with_workspace(workspace_root, None)?
.with_file(foo, foo_content)?
.build()
.wait_until_workspaces_are_initialized();
server.open_text_document(foo, foo_content, 1);
let tokens = server.semantic_tokens_full_request(&server.file_uri(foo));
insta::assert_json_snapshot!(tokens);
Ok(())
}
#[test]
fn multiline_token_client_supporting_multiline_tokens() -> Result<()> {
let workspace_root = SystemPath::new("src");
let foo = SystemPath::new("src/foo.py");
let foo_content = r#"def my_function(param1: int, param2: str) -> bool:
"""Example function with PEP 484 type annotations.
Args:
param1: The first parameter.
param2: The second parameter.
Returns:
The return value. True for success, False otherwise.
"""
"#;
let mut server = TestServerBuilder::new()?
.enable_pull_diagnostics(true)
.enable_multiline_token_support(true)
.with_workspace(workspace_root, None)?
.with_file(foo, foo_content)?
.build()
.wait_until_workspaces_are_initialized();
server.open_text_document(foo, foo_content, 1);
let tokens = server.semantic_tokens_full_request(&server.file_uri(foo));
insta::assert_json_snapshot!(tokens);
Ok(())
}

View File

@@ -0,0 +1,88 @@
---
source: crates/ty_server/tests/e2e/semantic_tokens.rs
expression: tokens
---
{
"data": [
0,
4,
11,
7,
1,
0,
12,
6,
2,
1,
0,
8,
3,
1,
0,
0,
5,
6,
2,
1,
0,
8,
3,
1,
0,
0,
8,
4,
1,
0,
1,
4,
51,
10,
0,
1,
0,
1,
10,
0,
1,
0,
10,
10,
0,
1,
0,
37,
10,
0,
1,
0,
38,
10,
0,
1,
0,
1,
10,
0,
1,
0,
13,
10,
0,
1,
0,
61,
10,
0,
1,
0,
1,
10,
0,
1,
0,
7,
10,
0
]
}

View File

@@ -0,0 +1,43 @@
---
source: crates/ty_server/tests/e2e/semantic_tokens.rs
expression: tokens
---
{
"data": [
0,
4,
11,
7,
1,
0,
12,
6,
2,
1,
0,
8,
3,
1,
0,
0,
5,
6,
2,
1,
0,
8,
3,
1,
0,
0,
8,
4,
1,
0,
1,
4,
220,
10,
0
]
}