[ty] Correctly encode multiline tokens for clients not supporting multiline tokens (#22033)
This commit is contained in:
@@ -179,42 +179,45 @@ impl LineIndex {
|
||||
let line = self.line_index(offset);
|
||||
let line_start = self.line_start(line, text);
|
||||
|
||||
let character_offset =
|
||||
self.characters_between(TextRange::new(line_start, offset), text, encoding);
|
||||
|
||||
SourceLocation {
|
||||
line,
|
||||
character_offset: OneIndexed::from_zero_indexed(character_offset),
|
||||
}
|
||||
}
|
||||
|
||||
fn characters_between(
|
||||
&self,
|
||||
range: TextRange,
|
||||
text: &str,
|
||||
encoding: PositionEncoding,
|
||||
) -> usize {
|
||||
if self.is_ascii() {
|
||||
return SourceLocation {
|
||||
line,
|
||||
character_offset: OneIndexed::from_zero_indexed((offset - line_start).to_usize()),
|
||||
};
|
||||
return (range.end() - range.start()).to_usize();
|
||||
}
|
||||
|
||||
match encoding {
|
||||
PositionEncoding::Utf8 => {
|
||||
let character_offset = offset - line_start;
|
||||
SourceLocation {
|
||||
line,
|
||||
character_offset: OneIndexed::from_zero_indexed(character_offset.to_usize()),
|
||||
}
|
||||
}
|
||||
PositionEncoding::Utf8 => (range.end() - range.start()).to_usize(),
|
||||
PositionEncoding::Utf16 => {
|
||||
let up_to_character = &text[TextRange::new(line_start, offset)];
|
||||
let character = up_to_character.encode_utf16().count();
|
||||
|
||||
SourceLocation {
|
||||
line,
|
||||
character_offset: OneIndexed::from_zero_indexed(character),
|
||||
}
|
||||
let up_to_character = &text[range];
|
||||
up_to_character.encode_utf16().count()
|
||||
}
|
||||
PositionEncoding::Utf32 => {
|
||||
let up_to_character = &text[TextRange::new(line_start, offset)];
|
||||
let character = up_to_character.chars().count();
|
||||
|
||||
SourceLocation {
|
||||
line,
|
||||
character_offset: OneIndexed::from_zero_indexed(character),
|
||||
}
|
||||
let up_to_character = &text[range];
|
||||
up_to_character.chars().count()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the length of the line in characters, respecting the given encoding
|
||||
pub fn line_len(&self, line: OneIndexed, text: &str, encoding: PositionEncoding) -> usize {
|
||||
let line_range = self.line_range(line, text);
|
||||
|
||||
self.characters_between(line_range, text, encoding)
|
||||
}
|
||||
|
||||
/// Return the number of lines in the source code.
|
||||
pub fn line_count(&self) -> usize {
|
||||
self.line_starts().len()
|
||||
|
||||
@@ -1,7 +1,8 @@
|
||||
use lsp_types::SemanticToken;
|
||||
use ruff_db::source::source_text;
|
||||
use ruff_db::source::{line_index, source_text};
|
||||
use ruff_source_file::OneIndexed;
|
||||
use ruff_text_size::{Ranged, TextRange};
|
||||
use ty_ide::semantic_tokens;
|
||||
use ty_ide::{SemanticTokenModifier, SemanticTokenType, semantic_tokens};
|
||||
use ty_project::ProjectDatabase;
|
||||
|
||||
use crate::document::{PositionEncoding, ToRangeExt};
|
||||
@@ -16,12 +17,14 @@ pub(crate) fn generate_semantic_tokens(
|
||||
multiline_token_support: bool,
|
||||
) -> Vec<SemanticToken> {
|
||||
let source = source_text(db, file);
|
||||
let line_index = line_index(db, file);
|
||||
let semantic_token_data = semantic_tokens(db, file, range);
|
||||
|
||||
// Convert semantic tokens to LSP format
|
||||
let mut lsp_tokens = Vec::new();
|
||||
let mut prev_line = 0u32;
|
||||
let mut prev_start = 0u32;
|
||||
let mut encoder = Encoder {
|
||||
tokens: Vec::with_capacity(semantic_token_data.len()),
|
||||
prev_line: 0,
|
||||
prev_start: 0,
|
||||
};
|
||||
|
||||
for token in &*semantic_token_data {
|
||||
let Some(lsp_range) = token
|
||||
@@ -32,62 +35,92 @@ pub(crate) fn generate_semantic_tokens(
|
||||
continue;
|
||||
};
|
||||
|
||||
let line = lsp_range.start.line;
|
||||
let character = lsp_range.start.character;
|
||||
if lsp_range.start.line == lsp_range.end.line {
|
||||
let len = lsp_range.end.character - lsp_range.start.character;
|
||||
encoder.push_token_at(lsp_range.start, len, token.token_type, token.modifiers);
|
||||
} else if multiline_token_support {
|
||||
// If the client supports multiline-tokens,
|
||||
// compute the length of the entire range.
|
||||
let mut len = 0;
|
||||
|
||||
// Calculate length in the negotiated encoding
|
||||
let length = if !multiline_token_support && lsp_range.start.line != lsp_range.end.line {
|
||||
// Token spans multiple lines but client doesn't support it
|
||||
// Clamp to the end of the current line
|
||||
if let Some(line_text) = source.lines().nth(lsp_range.start.line as usize) {
|
||||
let line_length_in_encoding = match encoding {
|
||||
PositionEncoding::UTF8 => line_text.len().try_into().unwrap_or(u32::MAX),
|
||||
PositionEncoding::UTF16 => line_text
|
||||
.encode_utf16()
|
||||
.count()
|
||||
.try_into()
|
||||
.unwrap_or(u32::MAX),
|
||||
PositionEncoding::UTF32 => {
|
||||
line_text.chars().count().try_into().unwrap_or(u32::MAX)
|
||||
}
|
||||
for line in lsp_range.start.line..lsp_range.end.line {
|
||||
let line_len = line_index.line_len(
|
||||
OneIndexed::from_zero_indexed(line as usize),
|
||||
&source,
|
||||
encoding.into(),
|
||||
);
|
||||
|
||||
len += u32::try_from(line_len).unwrap();
|
||||
}
|
||||
|
||||
// Subtract the first line because we added the length from the beginning.
|
||||
len -= lsp_range.start.character;
|
||||
// We didn't compute the length of the last line, add it now.
|
||||
len += lsp_range.end.character;
|
||||
|
||||
encoder.push_token_at(lsp_range.start, len, token.token_type, token.modifiers);
|
||||
} else {
|
||||
// Multiline token but the client only supports single line tokens
|
||||
// Push a token for each line.
|
||||
for line in lsp_range.start.line..=lsp_range.end.line {
|
||||
let start_character = if line == lsp_range.start.line {
|
||||
lsp_range.start.character
|
||||
} else {
|
||||
0
|
||||
};
|
||||
line_length_in_encoding.saturating_sub(lsp_range.start.character)
|
||||
} else {
|
||||
0
|
||||
}
|
||||
} else {
|
||||
// Either client supports multiline tokens or this is a single-line token
|
||||
// Use the difference between start and end character positions
|
||||
if lsp_range.start.line == lsp_range.end.line {
|
||||
lsp_range.end.character - lsp_range.start.character
|
||||
} else {
|
||||
// Multiline token and client supports it - calculate full token length
|
||||
let token_text = &source[token.range()];
|
||||
match encoding {
|
||||
PositionEncoding::UTF8 => token_text.len().try_into().unwrap_or(u32::MAX),
|
||||
PositionEncoding::UTF16 => token_text
|
||||
.encode_utf16()
|
||||
.count()
|
||||
.try_into()
|
||||
.unwrap_or(u32::MAX),
|
||||
PositionEncoding::UTF32 => {
|
||||
token_text.chars().count().try_into().unwrap_or(u32::MAX)
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
let token_type = token.token_type as u32;
|
||||
let token_modifiers = token.modifiers.bits();
|
||||
|
||||
let start = lsp_types::Position {
|
||||
line,
|
||||
character: start_character,
|
||||
};
|
||||
|
||||
let end = if line == lsp_range.end.line {
|
||||
lsp_range.end.character
|
||||
} else {
|
||||
let line_len = line_index.line_len(
|
||||
OneIndexed::from_zero_indexed(line as usize),
|
||||
&source,
|
||||
encoding.into(),
|
||||
);
|
||||
u32::try_from(line_len).unwrap()
|
||||
};
|
||||
|
||||
let len = end - start.character;
|
||||
|
||||
encoder.push_token_at(start, len, token.token_type, token.modifiers);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
encoder.tokens
|
||||
}
|
||||
|
||||
struct Encoder {
|
||||
tokens: Vec<SemanticToken>,
|
||||
prev_line: u32,
|
||||
prev_start: u32,
|
||||
}
|
||||
|
||||
impl Encoder {
|
||||
fn push_token_at(
|
||||
&mut self,
|
||||
start: lsp_types::Position,
|
||||
length: u32,
|
||||
ty: SemanticTokenType,
|
||||
modifiers: SemanticTokenModifier,
|
||||
) {
|
||||
// LSP semantic tokens are encoded as deltas
|
||||
let delta_line = line - prev_line;
|
||||
let delta_line = start.line - self.prev_line;
|
||||
let delta_start = if delta_line == 0 {
|
||||
character - prev_start
|
||||
start.character - self.prev_start
|
||||
} else {
|
||||
character
|
||||
start.character
|
||||
};
|
||||
|
||||
lsp_tokens.push(SemanticToken {
|
||||
let token_type = ty as u32;
|
||||
let token_modifiers = modifiers.bits();
|
||||
|
||||
self.tokens.push(SemanticToken {
|
||||
delta_line,
|
||||
delta_start,
|
||||
length,
|
||||
@@ -95,9 +128,7 @@ pub(crate) fn generate_semantic_tokens(
|
||||
token_modifiers_bitset: token_modifiers,
|
||||
});
|
||||
|
||||
prev_line = line;
|
||||
prev_start = character;
|
||||
self.prev_line = start.line;
|
||||
self.prev_start = start.character;
|
||||
}
|
||||
|
||||
lsp_tokens
|
||||
}
|
||||
|
||||
@@ -36,6 +36,7 @@ mod notebook;
|
||||
mod publish_diagnostics;
|
||||
mod pull_diagnostics;
|
||||
mod rename;
|
||||
mod semantic_tokens;
|
||||
mod signature_help;
|
||||
|
||||
use std::collections::{BTreeMap, HashMap, VecDeque};
|
||||
@@ -66,11 +67,12 @@ use lsp_types::{
|
||||
DocumentDiagnosticParams, DocumentDiagnosticReportResult, FileEvent, Hover, HoverParams,
|
||||
InitializeParams, InitializeResult, InitializedParams, InlayHint, InlayHintClientCapabilities,
|
||||
InlayHintParams, NumberOrString, PartialResultParams, Position, PreviousResultId,
|
||||
PublishDiagnosticsClientCapabilities, Range, SignatureHelp, SignatureHelpParams,
|
||||
SignatureHelpTriggerKind, TextDocumentClientCapabilities, TextDocumentContentChangeEvent,
|
||||
TextDocumentIdentifier, TextDocumentItem, TextDocumentPositionParams, Url,
|
||||
VersionedTextDocumentIdentifier, WorkDoneProgressParams, WorkspaceClientCapabilities,
|
||||
WorkspaceDiagnosticParams, WorkspaceDiagnosticReportResult, WorkspaceEdit, WorkspaceFolder,
|
||||
PublishDiagnosticsClientCapabilities, Range, SemanticTokensResult, SignatureHelp,
|
||||
SignatureHelpParams, SignatureHelpTriggerKind, TextDocumentClientCapabilities,
|
||||
TextDocumentContentChangeEvent, TextDocumentIdentifier, TextDocumentItem,
|
||||
TextDocumentPositionParams, Url, VersionedTextDocumentIdentifier, WorkDoneProgressParams,
|
||||
WorkspaceClientCapabilities, WorkspaceDiagnosticParams, WorkspaceDiagnosticReportResult,
|
||||
WorkspaceEdit, WorkspaceFolder,
|
||||
};
|
||||
use ruff_db::system::{OsSystem, SystemPath, SystemPathBuf, TestSystem};
|
||||
use rustc_hash::FxHashMap;
|
||||
@@ -964,6 +966,19 @@ impl TestServer {
|
||||
});
|
||||
self.await_response::<SignatureHelpRequest>(&signature_help_id)
|
||||
}
|
||||
|
||||
pub(crate) fn semantic_tokens_full_request(
|
||||
&mut self,
|
||||
uri: &Url,
|
||||
) -> Option<SemanticTokensResult> {
|
||||
self.send_request_await::<lsp_types::request::SemanticTokensFullRequest>(
|
||||
lsp_types::SemanticTokensParams {
|
||||
text_document: TextDocumentIdentifier { uri: uri.clone() },
|
||||
work_done_progress_params: lsp_types::WorkDoneProgressParams::default(),
|
||||
partial_result_params: PartialResultParams::default(),
|
||||
},
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Debug for TestServer {
|
||||
@@ -1194,6 +1209,16 @@ impl TestServerBuilder {
|
||||
self
|
||||
}
|
||||
|
||||
pub(crate) fn enable_multiline_token_support(mut self, enabled: bool) -> Self {
|
||||
self.client_capabilities
|
||||
.text_document
|
||||
.get_or_insert_default()
|
||||
.semantic_tokens
|
||||
.get_or_insert_default()
|
||||
.multiline_token_support = Some(enabled);
|
||||
self
|
||||
}
|
||||
|
||||
/// Set custom client capabilities (overrides any previously set capabilities)
|
||||
#[expect(dead_code)]
|
||||
pub(crate) fn with_client_capabilities(mut self, capabilities: ClientCapabilities) -> Self {
|
||||
|
||||
72
crates/ty_server/tests/e2e/semantic_tokens.rs
Normal file
72
crates/ty_server/tests/e2e/semantic_tokens.rs
Normal file
@@ -0,0 +1,72 @@
|
||||
use anyhow::Result;
|
||||
use ruff_db::system::SystemPath;
|
||||
|
||||
use crate::TestServerBuilder;
|
||||
|
||||
#[test]
|
||||
fn multiline_token_client_not_supporting_multiline_tokens() -> Result<()> {
|
||||
let workspace_root = SystemPath::new("src");
|
||||
let foo = SystemPath::new("src/foo.py");
|
||||
let foo_content = r#"def my_function(param1: int, param2: str) -> bool:
|
||||
"""Example function with PEP 484 type annotations.
|
||||
|
||||
Args:
|
||||
param1: The first parameter.
|
||||
param2: The second parameter.
|
||||
|
||||
Returns:
|
||||
The return value. True for success, False otherwise.
|
||||
|
||||
"""
|
||||
"#;
|
||||
|
||||
let mut server = TestServerBuilder::new()?
|
||||
.enable_pull_diagnostics(true)
|
||||
.enable_multiline_token_support(false)
|
||||
.with_workspace(workspace_root, None)?
|
||||
.with_file(foo, foo_content)?
|
||||
.build()
|
||||
.wait_until_workspaces_are_initialized();
|
||||
|
||||
server.open_text_document(foo, foo_content, 1);
|
||||
|
||||
let tokens = server.semantic_tokens_full_request(&server.file_uri(foo));
|
||||
|
||||
insta::assert_json_snapshot!(tokens);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn multiline_token_client_supporting_multiline_tokens() -> Result<()> {
|
||||
let workspace_root = SystemPath::new("src");
|
||||
let foo = SystemPath::new("src/foo.py");
|
||||
let foo_content = r#"def my_function(param1: int, param2: str) -> bool:
|
||||
"""Example function with PEP 484 type annotations.
|
||||
|
||||
Args:
|
||||
param1: The first parameter.
|
||||
param2: The second parameter.
|
||||
|
||||
Returns:
|
||||
The return value. True for success, False otherwise.
|
||||
|
||||
"""
|
||||
"#;
|
||||
|
||||
let mut server = TestServerBuilder::new()?
|
||||
.enable_pull_diagnostics(true)
|
||||
.enable_multiline_token_support(true)
|
||||
.with_workspace(workspace_root, None)?
|
||||
.with_file(foo, foo_content)?
|
||||
.build()
|
||||
.wait_until_workspaces_are_initialized();
|
||||
|
||||
server.open_text_document(foo, foo_content, 1);
|
||||
|
||||
let tokens = server.semantic_tokens_full_request(&server.file_uri(foo));
|
||||
|
||||
insta::assert_json_snapshot!(tokens);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@@ -0,0 +1,88 @@
|
||||
---
|
||||
source: crates/ty_server/tests/e2e/semantic_tokens.rs
|
||||
expression: tokens
|
||||
---
|
||||
{
|
||||
"data": [
|
||||
0,
|
||||
4,
|
||||
11,
|
||||
7,
|
||||
1,
|
||||
0,
|
||||
12,
|
||||
6,
|
||||
2,
|
||||
1,
|
||||
0,
|
||||
8,
|
||||
3,
|
||||
1,
|
||||
0,
|
||||
0,
|
||||
5,
|
||||
6,
|
||||
2,
|
||||
1,
|
||||
0,
|
||||
8,
|
||||
3,
|
||||
1,
|
||||
0,
|
||||
0,
|
||||
8,
|
||||
4,
|
||||
1,
|
||||
0,
|
||||
1,
|
||||
4,
|
||||
51,
|
||||
10,
|
||||
0,
|
||||
1,
|
||||
0,
|
||||
1,
|
||||
10,
|
||||
0,
|
||||
1,
|
||||
0,
|
||||
10,
|
||||
10,
|
||||
0,
|
||||
1,
|
||||
0,
|
||||
37,
|
||||
10,
|
||||
0,
|
||||
1,
|
||||
0,
|
||||
38,
|
||||
10,
|
||||
0,
|
||||
1,
|
||||
0,
|
||||
1,
|
||||
10,
|
||||
0,
|
||||
1,
|
||||
0,
|
||||
13,
|
||||
10,
|
||||
0,
|
||||
1,
|
||||
0,
|
||||
61,
|
||||
10,
|
||||
0,
|
||||
1,
|
||||
0,
|
||||
1,
|
||||
10,
|
||||
0,
|
||||
1,
|
||||
0,
|
||||
7,
|
||||
10,
|
||||
0
|
||||
]
|
||||
}
|
||||
@@ -0,0 +1,43 @@
|
||||
---
|
||||
source: crates/ty_server/tests/e2e/semantic_tokens.rs
|
||||
expression: tokens
|
||||
---
|
||||
{
|
||||
"data": [
|
||||
0,
|
||||
4,
|
||||
11,
|
||||
7,
|
||||
1,
|
||||
0,
|
||||
12,
|
||||
6,
|
||||
2,
|
||||
1,
|
||||
0,
|
||||
8,
|
||||
3,
|
||||
1,
|
||||
0,
|
||||
0,
|
||||
5,
|
||||
6,
|
||||
2,
|
||||
1,
|
||||
0,
|
||||
8,
|
||||
3,
|
||||
1,
|
||||
0,
|
||||
0,
|
||||
8,
|
||||
4,
|
||||
1,
|
||||
0,
|
||||
1,
|
||||
4,
|
||||
220,
|
||||
10,
|
||||
0
|
||||
]
|
||||
}
|
||||
Reference in New Issue
Block a user