[ty] Correctly encode multiline tokens for clients not supporting multiline tokens (#22033)

2025-12-18 12:38:21 +01:00
parent 7bb5dd87ff
commit b2a8c42b51
6 changed files with 350 additions and 88 deletions
--- a/crates/ruff_source_file/src/line_index.rs
+++ b/crates/ruff_source_file/src/line_index.rs
@@ -179,42 +179,45 @@ impl LineIndex {
        let line = self.line_index(offset);
        let line_start = self.line_start(line, text);

+        let character_offset =
+            self.characters_between(TextRange::new(line_start, offset), text, encoding);
+
+        SourceLocation {
+            line,
+            character_offset: OneIndexed::from_zero_indexed(character_offset),
+        }
+    }
+
+    fn characters_between(
+        &self,
+        range: TextRange,
+        text: &str,
+        encoding: PositionEncoding,
+    ) -> usize {
        if self.is_ascii() {
-            return SourceLocation {
-                line,
-                character_offset: OneIndexed::from_zero_indexed((offset - line_start).to_usize()),
-            };
+            return (range.end() - range.start()).to_usize();
        }

        match encoding {
-            PositionEncoding::Utf8 => {
-                let character_offset = offset - line_start;
-                SourceLocation {
-                    line,
-                    character_offset: OneIndexed::from_zero_indexed(character_offset.to_usize()),
-                }
-            }
+            PositionEncoding::Utf8 => (range.end() - range.start()).to_usize(),
            PositionEncoding::Utf16 => {
-                let up_to_character = &text[TextRange::new(line_start, offset)];
-                let character = up_to_character.encode_utf16().count();
-
-                SourceLocation {
-                    line,
-                    character_offset: OneIndexed::from_zero_indexed(character),
-                }
+                let up_to_character = &text[range];
+                up_to_character.encode_utf16().count()
            }
            PositionEncoding::Utf32 => {
-                let up_to_character = &text[TextRange::new(line_start, offset)];
-                let character = up_to_character.chars().count();
-
-                SourceLocation {
-                    line,
-                    character_offset: OneIndexed::from_zero_indexed(character),
-                }
+                let up_to_character = &text[range];
+                up_to_character.chars().count()
            }
        }
    }

+    /// Returns the length of the line in characters, respecting the given encoding
+    pub fn line_len(&self, line: OneIndexed, text: &str, encoding: PositionEncoding) -> usize {
+        let line_range = self.line_range(line, text);
+
+        self.characters_between(line_range, text, encoding)
+    }
+
    /// Return the number of lines in the source code.
    pub fn line_count(&self) -> usize {
        self.line_starts().len()
--- a/crates/ty_server/src/server/api/semantic_tokens.rs
+++ b/crates/ty_server/src/server/api/semantic_tokens.rs
@@ -1,7 +1,8 @@
 use lsp_types::SemanticToken;
-use ruff_db::source::source_text;
+use ruff_db::source::{line_index, source_text};
+use ruff_source_file::OneIndexed;
 use ruff_text_size::{Ranged, TextRange};
-use ty_ide::semantic_tokens;
+use ty_ide::{SemanticTokenModifier, SemanticTokenType, semantic_tokens};
 use ty_project::ProjectDatabase;

 use crate::document::{PositionEncoding, ToRangeExt};
@@ -16,12 +17,14 @@ pub(crate) fn generate_semantic_tokens(
    multiline_token_support: bool,
 ) -> Vec<SemanticToken> {
    let source = source_text(db, file);
+    let line_index = line_index(db, file);
    let semantic_token_data = semantic_tokens(db, file, range);

-    // Convert semantic tokens to LSP format
-    let mut lsp_tokens = Vec::new();
-    let mut prev_line = 0u32;
-    let mut prev_start = 0u32;
+    let mut encoder = Encoder {
+        tokens: Vec::with_capacity(semantic_token_data.len()),
+        prev_line: 0,
+        prev_start: 0,
+    };

    for token in &*semantic_token_data {
        let Some(lsp_range) = token
@@ -32,62 +35,92 @@ pub(crate) fn generate_semantic_tokens(
            continue;
        };

-        let line = lsp_range.start.line;
-        let character = lsp_range.start.character;
+        if lsp_range.start.line == lsp_range.end.line {
+            let len = lsp_range.end.character - lsp_range.start.character;
+            encoder.push_token_at(lsp_range.start, len, token.token_type, token.modifiers);
+        } else if multiline_token_support {
+            // If the client supports multiline-tokens,
+            // compute the length of the entire range.
+            let mut len = 0;

-        // Calculate length in the negotiated encoding
-        let length = if !multiline_token_support && lsp_range.start.line != lsp_range.end.line {
-            // Token spans multiple lines but client doesn't support it
-            // Clamp to the end of the current line
-            if let Some(line_text) = source.lines().nth(lsp_range.start.line as usize) {
-                let line_length_in_encoding = match encoding {
-                    PositionEncoding::UTF8 => line_text.len().try_into().unwrap_or(u32::MAX),
-                    PositionEncoding::UTF16 => line_text
-                        .encode_utf16()
-                        .count()
-                        .try_into()
-                        .unwrap_or(u32::MAX),
-                    PositionEncoding::UTF32 => {
-                        line_text.chars().count().try_into().unwrap_or(u32::MAX)
-                    }
+            for line in lsp_range.start.line..lsp_range.end.line {
+                let line_len = line_index.line_len(
+                    OneIndexed::from_zero_indexed(line as usize),
+                    &source,
+                    encoding.into(),
+                );
+
+                len += u32::try_from(line_len).unwrap();
+            }
+
+            // Subtract the first line because we added the length from the beginning.
+            len -= lsp_range.start.character;
+            // We didn't compute the length of the last line, add it now.
+            len += lsp_range.end.character;
+
+            encoder.push_token_at(lsp_range.start, len, token.token_type, token.modifiers);
+        } else {
+            // Multiline token but the client only supports single line tokens
+            // Push a token for each line.
+            for line in lsp_range.start.line..=lsp_range.end.line {
+                let start_character = if line == lsp_range.start.line {
+                    lsp_range.start.character
+                } else {
+                    0
                };
-                line_length_in_encoding.saturating_sub(lsp_range.start.character)
-            } else {
-                0
-            }
-        } else {
-            // Either client supports multiline tokens or this is a single-line token
-            // Use the difference between start and end character positions
-            if lsp_range.start.line == lsp_range.end.line {
-                lsp_range.end.character - lsp_range.start.character
-            } else {
-                // Multiline token and client supports it - calculate full token length
-                let token_text = &source[token.range()];
-                match encoding {
-                    PositionEncoding::UTF8 => token_text.len().try_into().unwrap_or(u32::MAX),
-                    PositionEncoding::UTF16 => token_text
-                        .encode_utf16()
-                        .count()
-                        .try_into()
-                        .unwrap_or(u32::MAX),
-                    PositionEncoding::UTF32 => {
-                        token_text.chars().count().try_into().unwrap_or(u32::MAX)
-                    }
-                }
-            }
-        };
-        let token_type = token.token_type as u32;
-        let token_modifiers = token.modifiers.bits();

+                let start = lsp_types::Position {
+                    line,
+                    character: start_character,
+                };
+
+                let end = if line == lsp_range.end.line {
+                    lsp_range.end.character
+                } else {
+                    let line_len = line_index.line_len(
+                        OneIndexed::from_zero_indexed(line as usize),
+                        &source,
+                        encoding.into(),
+                    );
+                    u32::try_from(line_len).unwrap()
+                };
+
+                let len = end - start.character;
+
+                encoder.push_token_at(start, len, token.token_type, token.modifiers);
+            }
+        }
+    }
+
+    encoder.tokens
+}
+
+struct Encoder {
+    tokens: Vec<SemanticToken>,
+    prev_line: u32,
+    prev_start: u32,
+}
+
+impl Encoder {
+    fn push_token_at(
+        &mut self,
+        start: lsp_types::Position,
+        length: u32,
+        ty: SemanticTokenType,
+        modifiers: SemanticTokenModifier,
+    ) {
        // LSP semantic tokens are encoded as deltas
-        let delta_line = line - prev_line;
+        let delta_line = start.line - self.prev_line;
        let delta_start = if delta_line == 0 {
-            character - prev_start
+            start.character - self.prev_start
        } else {
-            character
+            start.character
        };

-        lsp_tokens.push(SemanticToken {
+        let token_type = ty as u32;
+        let token_modifiers = modifiers.bits();
+
+        self.tokens.push(SemanticToken {
            delta_line,
            delta_start,
            length,
@@ -95,9 +128,7 @@ pub(crate) fn generate_semantic_tokens(
            token_modifiers_bitset: token_modifiers,
        });

-        prev_line = line;
-        prev_start = character;
+        self.prev_line = start.line;
+        self.prev_start = start.character;
    }
-
-    lsp_tokens
 }
--- a/crates/ty_server/tests/e2e/main.rs
+++ b/crates/ty_server/tests/e2e/main.rs
@@ -36,6 +36,7 @@ mod notebook;
 mod publish_diagnostics;
 mod pull_diagnostics;
 mod rename;
+mod semantic_tokens;
 mod signature_help;

 use std::collections::{BTreeMap, HashMap, VecDeque};
@@ -66,11 +67,12 @@ use lsp_types::{
    DocumentDiagnosticParams, DocumentDiagnosticReportResult, FileEvent, Hover, HoverParams,
    InitializeParams, InitializeResult, InitializedParams, InlayHint, InlayHintClientCapabilities,
    InlayHintParams, NumberOrString, PartialResultParams, Position, PreviousResultId,
-    PublishDiagnosticsClientCapabilities, Range, SignatureHelp, SignatureHelpParams,
-    SignatureHelpTriggerKind, TextDocumentClientCapabilities, TextDocumentContentChangeEvent,
-    TextDocumentIdentifier, TextDocumentItem, TextDocumentPositionParams, Url,
-    VersionedTextDocumentIdentifier, WorkDoneProgressParams, WorkspaceClientCapabilities,
-    WorkspaceDiagnosticParams, WorkspaceDiagnosticReportResult, WorkspaceEdit, WorkspaceFolder,
+    PublishDiagnosticsClientCapabilities, Range, SemanticTokensResult, SignatureHelp,
+    SignatureHelpParams, SignatureHelpTriggerKind, TextDocumentClientCapabilities,
+    TextDocumentContentChangeEvent, TextDocumentIdentifier, TextDocumentItem,
+    TextDocumentPositionParams, Url, VersionedTextDocumentIdentifier, WorkDoneProgressParams,
+    WorkspaceClientCapabilities, WorkspaceDiagnosticParams, WorkspaceDiagnosticReportResult,
+    WorkspaceEdit, WorkspaceFolder,
 };
 use ruff_db::system::{OsSystem, SystemPath, SystemPathBuf, TestSystem};
 use rustc_hash::FxHashMap;
@@ -964,6 +966,19 @@ impl TestServer {
        });
        self.await_response::<SignatureHelpRequest>(&signature_help_id)
    }
+
+    pub(crate) fn semantic_tokens_full_request(
+        &mut self,
+        uri: &Url,
+    ) -> Option<SemanticTokensResult> {
+        self.send_request_await::<lsp_types::request::SemanticTokensFullRequest>(
+            lsp_types::SemanticTokensParams {
+                text_document: TextDocumentIdentifier { uri: uri.clone() },
+                work_done_progress_params: lsp_types::WorkDoneProgressParams::default(),
+                partial_result_params: PartialResultParams::default(),
+            },
+        )
+    }
 }

 impl fmt::Debug for TestServer {
@@ -1194,6 +1209,16 @@ impl TestServerBuilder {
        self
    }

+    pub(crate) fn enable_multiline_token_support(mut self, enabled: bool) -> Self {
+        self.client_capabilities
+            .text_document
+            .get_or_insert_default()
+            .semantic_tokens
+            .get_or_insert_default()
+            .multiline_token_support = Some(enabled);
+        self
+    }
+
    /// Set custom client capabilities (overrides any previously set capabilities)
    #[expect(dead_code)]
    pub(crate) fn with_client_capabilities(mut self, capabilities: ClientCapabilities) -> Self {
--- a/crates/ty_server/tests/e2e/semantic_tokens.rs
+++ b/crates/ty_server/tests/e2e/semantic_tokens.rs
@@ -0,0 +1,72 @@
+use anyhow::Result;
+use ruff_db::system::SystemPath;
+
+use crate::TestServerBuilder;
+
+#[test]
+fn multiline_token_client_not_supporting_multiline_tokens() -> Result<()> {
+    let workspace_root = SystemPath::new("src");
+    let foo = SystemPath::new("src/foo.py");
+    let foo_content = r#"def my_function(param1: int, param2: str) -> bool:
+    """Example function with PEP 484 type annotations.
+
+    Args:
+        param1: The first parameter.
+        param2: The second parameter.
+
+    Returns:
+        The return value. True for success, False otherwise.
+
+    """
+"#;
+
+    let mut server = TestServerBuilder::new()?
+        .enable_pull_diagnostics(true)
+        .enable_multiline_token_support(false)
+        .with_workspace(workspace_root, None)?
+        .with_file(foo, foo_content)?
+        .build()
+        .wait_until_workspaces_are_initialized();
+
+    server.open_text_document(foo, foo_content, 1);
+
+    let tokens = server.semantic_tokens_full_request(&server.file_uri(foo));
+
+    insta::assert_json_snapshot!(tokens);
+
+    Ok(())
+}
+
+#[test]
+fn multiline_token_client_supporting_multiline_tokens() -> Result<()> {
+    let workspace_root = SystemPath::new("src");
+    let foo = SystemPath::new("src/foo.py");
+    let foo_content = r#"def my_function(param1: int, param2: str) -> bool:
+    """Example function with PEP 484 type annotations.
+
+    Args:
+        param1: The first parameter.
+        param2: The second parameter.
+
+    Returns:
+        The return value. True for success, False otherwise.
+
+    """
+"#;
+
+    let mut server = TestServerBuilder::new()?
+        .enable_pull_diagnostics(true)
+        .enable_multiline_token_support(true)
+        .with_workspace(workspace_root, None)?
+        .with_file(foo, foo_content)?
+        .build()
+        .wait_until_workspaces_are_initialized();
+
+    server.open_text_document(foo, foo_content, 1);
+
+    let tokens = server.semantic_tokens_full_request(&server.file_uri(foo));
+
+    insta::assert_json_snapshot!(tokens);
+
+    Ok(())
+}
--- a/crates/ty_server/tests/e2e/snapshots/e2e__semantic_tokens__multiline_token_client_not_supporting_multiline_tokens.snap
+++ b/crates/ty_server/tests/e2e/snapshots/e2e__semantic_tokens__multiline_token_client_not_supporting_multiline_tokens.snap
@@ -0,0 +1,88 @@
+---
+source: crates/ty_server/tests/e2e/semantic_tokens.rs
+expression: tokens
+---
+{
+  "data": [
+    0,
+    4,
+    11,
+    7,
+    1,
+    0,
+    12,
+    6,
+    2,
+    1,
+    0,
+    8,
+    3,
+    1,
+    0,
+    0,
+    5,
+    6,
+    2,
+    1,
+    0,
+    8,
+    3,
+    1,
+    0,
+    0,
+    8,
+    4,
+    1,
+    0,
+    1,
+    4,
+    51,
+    10,
+    0,
+    1,
+    0,
+    1,
+    10,
+    0,
+    1,
+    0,
+    10,
+    10,
+    0,
+    1,
+    0,
+    37,
+    10,
+    0,
+    1,
+    0,
+    38,
+    10,
+    0,
+    1,
+    0,
+    1,
+    10,
+    0,
+    1,
+    0,
+    13,
+    10,
+    0,
+    1,
+    0,
+    61,
+    10,
+    0,
+    1,
+    0,
+    1,
+    10,
+    0,
+    1,
+    0,
+    7,
+    10,
+    0
+  ]
+}
--- a/crates/ty_server/tests/e2e/snapshots/e2e__semantic_tokens__multiline_token_client_supporting_multiline_tokens.snap
+++ b/crates/ty_server/tests/e2e/snapshots/e2e__semantic_tokens__multiline_token_client_supporting_multiline_tokens.snap
@@ -0,0 +1,43 @@
+---
+source: crates/ty_server/tests/e2e/semantic_tokens.rs
+expression: tokens
+---
+{
+  "data": [
+    0,
+    4,
+    11,
+    7,
+    1,
+    0,
+    12,
+    6,
+    2,
+    1,
+    0,
+    8,
+    3,
+    1,
+    0,
+    0,
+    5,
+    6,
+    2,
+    1,
+    0,
+    8,
+    3,
+    1,
+    0,
+    0,
+    8,
+    4,
+    1,
+    0,
+    1,
+    4,
+    220,
+    10,
+    0
+  ]
+}