Import Tint changes from Dawn
Changes:
- c34fb14c0c04ba9e6dc2f8295615db310ac2bb51 [tintd] Implement TextDocumentCompletionRequest by Ben Clayton <bclayton@google.com>
- 5d717c97b175e994e6bb582757ba9fb392d49ca1 [tintd] Improve signature help by Ben Clayton <bclayton@google.com>
- 84b6c63433307fc1ec43102a8a4cf25acf4722d2 [tintd] Handle unicode by Ben Clayton <bclayton@google.com>
- 7d00535a8ebd2cdc9778dc25e6d03e5dd319e5a4 [tint][utils] Expand unicode support by Ben Clayton <bclayton@google.com>
GitOrigin-RevId: c34fb14c0c04ba9e6dc2f8295615db310ac2bb51
Change-Id: If506b8aabdc62fcca3e8ac31e825f42968aa58bf
Reviewed-on: https://dawn-review.googlesource.com/c/tint/+/183168
Kokoro: Kokoro <noreply+kokoro@google.com>
Commit-Queue: dan sinclair <dsinclair@chromium.org>
Reviewed-by: dan sinclair <dsinclair@chromium.org>
diff --git a/src/tint/lang/wgsl/ls/BUILD.bazel b/src/tint/lang/wgsl/ls/BUILD.bazel
index 8a94377..52c6e48 100644
--- a/src/tint/lang/wgsl/ls/BUILD.bazel
+++ b/src/tint/lang/wgsl/ls/BUILD.bazel
@@ -42,6 +42,7 @@
"cancel_request.cc",
"change_configuration.cc",
"change_watched_files.cc",
+ "completions.cc",
"definition.cc",
"diagnostics.cc",
"document.cc",
@@ -57,6 +58,7 @@
"set_trace.cc",
"signature_help.cc",
"symbols.cc",
+ "utils.cc",
],
hdrs = [
"file.h",
@@ -111,6 +113,7 @@
name = "test",
alwayslink = True,
srcs = [
+ "completions_test.cc",
"definition_test.cc",
"diagnostics_test.cc",
"helpers_test.cc",
diff --git a/src/tint/lang/wgsl/ls/BUILD.cmake b/src/tint/lang/wgsl/ls/BUILD.cmake
index f5b6632..b6335b6 100644
--- a/src/tint/lang/wgsl/ls/BUILD.cmake
+++ b/src/tint/lang/wgsl/ls/BUILD.cmake
@@ -44,6 +44,7 @@
lang/wgsl/ls/cancel_request.cc
lang/wgsl/ls/change_configuration.cc
lang/wgsl/ls/change_watched_files.cc
+ lang/wgsl/ls/completions.cc
lang/wgsl/ls/definition.cc
lang/wgsl/ls/diagnostics.cc
lang/wgsl/ls/document.cc
@@ -63,6 +64,7 @@
lang/wgsl/ls/set_trace.cc
lang/wgsl/ls/signature_help.cc
lang/wgsl/ls/symbols.cc
+ lang/wgsl/ls/utils.cc
lang/wgsl/ls/utils.h
)
@@ -119,6 +121,7 @@
# Condition: TINT_BUILD_TINTD AND TINT_BUILD_WGSL_READER
################################################################################
tint_add_target(tint_lang_wgsl_ls_test test
+ lang/wgsl/ls/completions_test.cc
lang/wgsl/ls/definition_test.cc
lang/wgsl/ls/diagnostics_test.cc
lang/wgsl/ls/helpers_test.cc
diff --git a/src/tint/lang/wgsl/ls/BUILD.gn b/src/tint/lang/wgsl/ls/BUILD.gn
index d3d285e..4289044 100644
--- a/src/tint/lang/wgsl/ls/BUILD.gn
+++ b/src/tint/lang/wgsl/ls/BUILD.gn
@@ -47,6 +47,7 @@
"cancel_request.cc",
"change_configuration.cc",
"change_watched_files.cc",
+ "completions.cc",
"definition.cc",
"diagnostics.cc",
"document.cc",
@@ -66,6 +67,7 @@
"set_trace.cc",
"signature_help.cc",
"symbols.cc",
+ "utils.cc",
"utils.h",
]
deps = [
@@ -111,6 +113,7 @@
if (tint_build_tintd && tint_build_wgsl_reader) {
tint_unittests_source_set("unittests") {
sources = [
+ "completions_test.cc",
"definition_test.cc",
"diagnostics_test.cc",
"helpers_test.cc",
diff --git a/src/tint/lang/wgsl/ls/completions.cc b/src/tint/lang/wgsl/ls/completions.cc
new file mode 100644
index 0000000..8102fd1
--- /dev/null
+++ b/src/tint/lang/wgsl/ls/completions.cc
@@ -0,0 +1,127 @@
+// Copyright 2024 The Dawn & Tint Authors
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// 1. Redistributions of source code must retain the above copyright notice, this
+// list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the copyright holder nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "src/tint/lang/wgsl/ls/server.h"
+
+#include "src/tint/lang/wgsl/ast/identifier.h"
+#include "src/tint/lang/wgsl/ast/module.h"
+#include "src/tint/lang/wgsl/ast/type_decl.h"
+#include "src/tint/lang/wgsl/builtin_fn.h"
+#include "src/tint/lang/wgsl/ls/utils.h"
+#include "src/tint/lang/wgsl/sem/block_statement.h"
+#include "src/tint/lang/wgsl/sem/function.h"
+#include "src/tint/utils/rtti/switch.h"
+
+namespace lsp = langsvr::lsp;
+
+namespace tint::wgsl::ls {
+
+typename lsp::TextDocumentCompletionRequest::ResultType //
+Server::Handle(const lsp::TextDocumentCompletionRequest& r) {
+ auto file = files_.Get(r.text_document.uri);
+ if (!file) {
+ return lsp::Null{};
+ }
+
+ // TODO(bclayton): This is very much a first-pass effort.
+ // To handle completions properly, the resolver will need to parse ASTs that are incomplete, and
+ // provide semantic information even in the case of resolver error.
+
+ std::vector<lsp::CompletionItem> out;
+ Hashset<std::string, 32> seen;
+
+ auto loc = (*file)->Conv(r.position);
+ if (auto* stmt = (*file)->NodeAt<sem::Statement>(loc)) {
+ for (auto* s = stmt; s; s = s->Parent()) {
+ Switch(s, //
+ [&](const sem::BlockStatement* block) {
+ for (auto it : block->Decls()) {
+ if (seen.Add(it.key->Name())) {
+ lsp::CompletionItem item;
+ item.label = it.key->Name();
+ item.kind = lsp::CompletionItemKind::kVariable;
+ out.push_back(item);
+ }
+ }
+ });
+ }
+ if (auto* fn = stmt->Function()) {
+ for (auto* param : fn->Parameters()) {
+ auto name = param->Declaration()->name->symbol.Name();
+ if (seen.Add(name)) {
+ lsp::CompletionItem item;
+ item.label = name;
+ item.kind = lsp::CompletionItemKind::kVariable;
+ out.push_back(item);
+ }
+ }
+ }
+ }
+
+ for (auto decl : (*file)->program.AST().TypeDecls()) {
+ auto name = decl->name->symbol.Name();
+ if (seen.Add(name)) {
+ lsp::CompletionItem item;
+ item.label = name;
+ item.kind = lsp::CompletionItemKind::kStruct;
+ out.push_back(item);
+ }
+ }
+
+ for (auto fn : (*file)->program.AST().Functions()) {
+ auto name = fn->name->symbol.Name();
+ if (seen.Add(name)) {
+ lsp::CompletionItem item;
+ item.label = name;
+ item.kind = lsp::CompletionItemKind::kFunction;
+ out.push_back(item);
+ }
+ }
+
+ for (auto v : (*file)->program.AST().GlobalVariables()) {
+ auto name = v->name->symbol.Name();
+ if (seen.Add(name)) {
+ lsp::CompletionItem item;
+ item.label = name;
+ item.kind = lsp::CompletionItemKind::kVariable;
+ out.push_back(item);
+ }
+ }
+
+ for (auto& builtin : wgsl::kBuiltinFnStrings) {
+ if (seen.Add(builtin)) {
+ lsp::CompletionItem item;
+ item.label = builtin;
+ item.kind = lsp::CompletionItemKind::kFunction;
+ out.push_back(item);
+ }
+ }
+
+ return out;
+}
+
+} // namespace tint::wgsl::ls
diff --git a/src/tint/lang/wgsl/ls/completions_test.cc b/src/tint/lang/wgsl/ls/completions_test.cc
new file mode 100644
index 0000000..fa24fb4
--- /dev/null
+++ b/src/tint/lang/wgsl/ls/completions_test.cc
@@ -0,0 +1,171 @@
+// Copyright 2024 The Dawn & Tint Authors
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// 1. Redistributions of source code must retain the above copyright notice, this
+// list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the copyright holder nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include <sstream>
+#include <string_view>
+
+#include "gmock/gmock.h"
+
+#include "langsvr/lsp/lsp.h"
+#include "langsvr/lsp/primitives.h"
+#include "langsvr/lsp/printer.h"
+#include "src/tint/lang/wgsl/ls/helpers_test.h"
+
+namespace tint::wgsl::ls {
+namespace {
+
+namespace lsp = langsvr::lsp;
+
+struct CompletionItem {
+ std::string_view label;
+ langsvr::Optional<lsp::CompletionItemKind> kind;
+
+ bool operator==(const CompletionItem& other) const {
+ return label == other.label && kind == other.kind;
+ }
+};
+
+std::ostream& operator<<(std::ostream& stream, const CompletionItem& i) {
+ return stream << "['" << i.label << "', " << i.kind << "]";
+}
+
+struct Case {
+ std::string_view markup;
+ std::vector<CompletionItem> completions; // Subset of all the returns completions
+};
+
+std::ostream& operator<<(std::ostream& stream, const Case& c) {
+ return stream << "wgsl: '" << c.markup << "'";
+}
+
+using LsCompletionsTest = LsTestWithParam<Case>;
+TEST_P(LsCompletionsTest, Completions) {
+ auto parsed = ParseMarkers(GetParam().markup);
+ ASSERT_EQ(parsed.ranges.size(), 0u);
+ ASSERT_EQ(parsed.positions.size(), 1u);
+
+ lsp::TextDocumentCompletionRequest req{};
+ req.text_document.uri = OpenDocument(parsed.clean);
+ req.position = parsed.positions[0];
+
+ for (auto& n : diagnostics_) {
+ for (auto& d : n.diagnostics) {
+ if (d.severity == lsp::DiagnosticSeverity::kError) {
+ FAIL() << "Error: " << d.message << "\nWGSL:\n" << parsed.clean;
+ }
+ }
+ }
+
+ auto future = client_session_.Send(req);
+ ASSERT_EQ(future, langsvr::Success);
+ auto res = future->get();
+ ASSERT_TRUE(res.Is<std::vector<langsvr::lsp::CompletionItem>>());
+ auto& got_lsp_items = *res.Get<std::vector<lsp::CompletionItem>>();
+
+ std::vector<CompletionItem> got_items;
+ for (auto& item : got_lsp_items) {
+ got_items.push_back(CompletionItem{item.label, item.kind});
+ }
+ EXPECT_THAT(GetParam().completions, testing::IsSubsetOf(got_items));
+}
+
+INSTANTIATE_TEST_SUITE_P(,
+ LsCompletionsTest,
+ ::testing::ValuesIn(std::vector<Case>{
+ {
+ R"(const X⧘ = 42;)",
+ {
+ {"max", lsp::CompletionItemKind::kFunction},
+ },
+ },
+ {
+ R"(
+const ABC = 42;
+const XYZ = 1⧘;
+)",
+ {
+ {"max", lsp::CompletionItemKind::kFunction},
+ {"ABC", lsp::CompletionItemKind::kVariable},
+ },
+ },
+ {
+ R"(
+fn ABC() { _ = 2⧘; }
+fn DEF() -> i32 { return 1; }
+)",
+ {
+ {"max", lsp::CompletionItemKind::kFunction},
+ {"DEF", lsp::CompletionItemKind::kFunction},
+ },
+ },
+ {
+ R"(
+fn A() {
+ let XYZ = 1;
+ _ = 2⧘;
+}
+)",
+ {
+ {"max", lsp::CompletionItemKind::kFunction},
+ {"XYZ", lsp::CompletionItemKind::kVariable},
+ },
+ },
+ {
+ R"(
+fn A(XYZ : i32) {
+ _ = 2⧘;
+}
+)",
+ {
+ {"max", lsp::CompletionItemKind::kFunction},
+ {"XYZ", lsp::CompletionItemKind::kVariable},
+ },
+ },
+ {
+ R"(
+struct S { i : i32 }
+fn f(s : i⧘32) {}
+)",
+ {
+ {"max", lsp::CompletionItemKind::kFunction},
+ {"S", lsp::CompletionItemKind::kStruct},
+ },
+ },
+ {
+ R"(
+alias A = i32;
+fn f(s : i⧘32) {}
+)",
+ {
+ {"max", lsp::CompletionItemKind::kFunction},
+ {"A", lsp::CompletionItemKind::kStruct},
+ },
+ },
+ }));
+
+} // namespace
+} // namespace tint::wgsl::ls
diff --git a/src/tint/lang/wgsl/ls/definition.cc b/src/tint/lang/wgsl/ls/definition.cc
index d20b269..196f5bb 100644
--- a/src/tint/lang/wgsl/ls/definition.cc
+++ b/src/tint/lang/wgsl/ls/definition.cc
@@ -38,9 +38,9 @@
typename lsp::TextDocumentDefinitionRequest::SuccessType result = lsp::Null{};
if (auto file = files_.Get(r.text_document.uri)) {
- if (auto def = (*file)->Definition(Conv(r.position))) {
+ if (auto def = (*file)->Definition((*file)->Conv(r.position))) {
lsp::Location loc;
- loc.range = Conv(def->definition);
+ loc.range = (*file)->Conv(def->definition);
loc.uri = r.text_document.uri;
result = lsp::Definition{loc};
}
diff --git a/src/tint/lang/wgsl/ls/diagnostics.cc b/src/tint/lang/wgsl/ls/diagnostics.cc
index c4d0a90..58ed542 100644
--- a/src/tint/lang/wgsl/ls/diagnostics.cc
+++ b/src/tint/lang/wgsl/ls/diagnostics.cc
@@ -40,7 +40,7 @@
for (auto& diag : file.program.Diagnostics()) {
lsp::Diagnostic d;
d.message = diag.message.Plain();
- d.range = Conv(diag.source.range);
+ d.range = file.Conv(diag.source.range);
switch (diag.severity) {
case diag::Severity::Note:
d.severity = lsp::DiagnosticSeverity::kInformation;
diff --git a/src/tint/lang/wgsl/ls/document.cc b/src/tint/lang/wgsl/ls/document.cc
index 457c720..2a60da4 100644
--- a/src/tint/lang/wgsl/ls/document.cc
+++ b/src/tint/lang/wgsl/ls/document.cc
@@ -72,16 +72,17 @@
return langsvr::Failure{"document not found"};
}
- auto content = (*file)->source->content.data;
+ auto utf8 = (*file)->source->content.data;
for (auto& change : n.content_changes) {
if (auto* edit = change.Get<lsp::TextDocumentContentChangePartial>()) {
- std::vector<size_t> line_offsets = LineOffsets(content);
- size_t start = line_offsets[edit->range.start.line] + edit->range.start.character;
- size_t end = line_offsets[edit->range.end.line] + edit->range.end.character;
- content = content.substr(0, start) + edit->text + content.substr(end);
+ auto range = (*file)->Conv(edit->range);
+ std::vector<size_t> line_offsets = LineOffsets(utf8);
+ size_t utf8_start = line_offsets[range.begin.line - 1] + range.begin.column - 1;
+ size_t utf8_end = line_offsets[range.end.line - 1] + range.end.column - 1;
+ utf8 = utf8.substr(0, utf8_start) + edit->text + utf8.substr(utf8_end);
}
}
- auto source = std::make_unique<Source::File>(n.text_document.uri, content);
+ auto source = std::make_unique<Source::File>(n.text_document.uri, utf8);
auto program = wgsl::reader::Parse(source.get());
*file = std::make_shared<File>(std::move(source), n.text_document.version, std::move(program));
return PublishDiagnostics(**file);
diff --git a/src/tint/lang/wgsl/ls/file.cc b/src/tint/lang/wgsl/ls/file.cc
index 223eb8f..516da2a 100644
--- a/src/tint/lang/wgsl/ls/file.cc
+++ b/src/tint/lang/wgsl/ls/file.cc
@@ -26,6 +26,7 @@
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <optional>
+#include <string_view>
#include <utility>
#include "src/tint/lang/wgsl/ast/identifier.h"
@@ -40,6 +41,7 @@
#include "src/tint/lang/wgsl/sem/type_expression.h"
#include "src/tint/lang/wgsl/sem/variable.h"
#include "src/tint/utils/rtti/switch.h"
+#include "src/tint/utils/text/unicode.h"
namespace tint::wgsl::ls {
@@ -193,4 +195,61 @@
});
}
+Source::Location File::Conv(langsvr::lsp::Position pos) const {
+ Source::Location loc;
+ loc.line = static_cast<uint32_t>(pos.line + 1);
+ loc.column = 0;
+
+ // Convert utf-16 code points -> utf-8 code points
+ if (pos.line < source->content.lines.size()) {
+ std::string_view utf8 = source->content.lines[pos.line];
+ for (langsvr::lsp::Uinteger i = 0; i < pos.character;) {
+ const auto [code_point, n] = utf8::Decode(utf8.substr(loc.column));
+ if (n == 0) {
+ break;
+ }
+ loc.column += n;
+ i += utf16::Encode(code_point, nullptr);
+ }
+ }
+
+ loc.column++; // one-based index
+ return loc;
+}
+
+langsvr::lsp::Position File::Conv(Source::Location loc) const {
+ langsvr::lsp::Position pos;
+ pos.line = loc.line - 1;
+ pos.character = 0;
+
+ // Convert utf-8 code points -> utf-16 code points
+ if (pos.line < source->content.lines.size()) {
+ std::string_view utf8 = source->content.lines[pos.line];
+ for (uint32_t i = 0; i < loc.column - 1;) {
+ const auto [code_point, n] = utf8::Decode(utf8.substr(i));
+ if (n == 0) {
+ break;
+ }
+ pos.character += utf16::Encode(code_point, nullptr);
+ i += n;
+ }
+ }
+
+ return pos;
+}
+
+langsvr::lsp::Range File::Conv(Source::Range rng) const {
+ langsvr::lsp::Range out;
+ out.start = Conv(rng.begin);
+ out.end = Conv(rng.end);
+ return out;
+}
+
+Source::Range File::Conv(langsvr::lsp::Range rng) const {
+ Source::Range out;
+ out.begin = Conv(rng.start);
+ out.end = Conv(rng.end);
+ return out;
+}
+
} // namespace tint::wgsl::ls
diff --git a/src/tint/lang/wgsl/ls/file.h b/src/tint/lang/wgsl/ls/file.h
index 7f31ad9..90a86de 100644
--- a/src/tint/lang/wgsl/ls/file.h
+++ b/src/tint/lang/wgsl/ls/file.h
@@ -119,6 +119,22 @@
}
return best_node;
}
+
+ /// @return the zero-based langsvr::lsp::Position @p pos in utf-16 code points converted to a
+ /// one-based tint::Source::Location in utf-8 code points.
+ Source::Location Conv(langsvr::lsp::Position pos) const;
+
+ /// @return the one-based tint::Source::Position @p loc in utf-8 code points converted to a
+ /// zero-based langsvr::lsp::Position in utf-16 code points.
+ langsvr::lsp::Position Conv(Source::Location loc) const;
+
+ /// @return the one-based tint::Source::Range @p rng in utf-8 code points converted to a
+ /// zero-based langsvr::lsp::Range in utf-16 code points.
+ langsvr::lsp::Range Conv(Source::Range rng) const;
+
+ /// @return the zero-based langsvr::lsp::Range @p rng in utf-16 code points converted to a
+ /// one-based Source::Range in utf-8 code points.
+ Source::Range Conv(langsvr::lsp::Range rng) const;
};
} // namespace tint::wgsl::ls
diff --git a/src/tint/lang/wgsl/ls/hover.cc b/src/tint/lang/wgsl/ls/hover.cc
index fe9f123..bbdc16a 100644
--- a/src/tint/lang/wgsl/ls/hover.cc
+++ b/src/tint/lang/wgsl/ls/hover.cc
@@ -187,7 +187,8 @@
return lsp::Null{};
}
- auto* node = (*file)->NodeAt<CastableBase, File::UnwrapMode::kNoUnwrap>(Conv(r.position));
+ auto* node =
+ (*file)->NodeAt<CastableBase, File::UnwrapMode::kNoUnwrap>((*file)->Conv(r.position));
if (!node) {
return lsp::Null{};
}
@@ -199,7 +200,7 @@
Constant(val, strings);
lsp::Hover hover;
hover.contents = std::move(strings);
- hover.range = Conv(materialize->Declaration()->source.range);
+ hover.range = (*file)->Conv(materialize->Declaration()->source.range);
return hover;
}
}
@@ -209,27 +210,27 @@
Unwrap(node), //
[&](const sem::VariableUser* user) {
Variable(user->Variable(), strings);
- range = Conv(user->Declaration()->source.range);
+ range = (*file)->Conv(user->Declaration()->source.range);
},
[&](const sem::Variable* v) {
Variable(v, strings);
- range = Conv(v->Declaration()->name->source.range);
+ range = (*file)->Conv(v->Declaration()->name->source.range);
},
[&](const sem::Call* c) {
Call(c->Declaration()->target->identifier->symbol.NameView(), c, strings);
- range = Conv(c->Declaration()->target->source.range);
+ range = (*file)->Conv(c->Declaration()->target->source.range);
},
[&](const sem::FunctionExpression* expr) {
Function(expr->Function(), strings);
- range = Conv(expr->Declaration()->source.range);
+ range = (*file)->Conv(expr->Declaration()->source.range);
},
[&](const sem::BuiltinEnumExpression<wgsl::BuiltinFn>* fn) {
- if (auto* call = (*file)->NodeAt<sem::Call>(Conv(r.position))) {
+ if (auto* call = (*file)->NodeAt<sem::Call>((*file)->Conv(r.position))) {
Call(str(fn->Value()), call, strings);
} else {
strings.push_back(WGSL(str(fn->Value())));
}
- range = Conv(fn->Declaration()->source.range);
+ range = (*file)->Conv(fn->Declaration()->source.range);
},
[&](const sem::TypeExpression* expr) {
Switch(
@@ -238,7 +239,7 @@
strings.push_back(WGSL("struct " + str->Name().Name()));
},
[&](Default) { strings.push_back(WGSL(expr->Type()->FriendlyName())); });
- range = Conv(expr->Declaration()->source.range);
+ range = (*file)->Conv(expr->Declaration()->source.range);
},
[&](const sem::StructMemberAccess* access) {
if (auto* member = access->Member()->As<sem::StructMember>()) {
@@ -246,13 +247,13 @@
ss << member->Declaration()->name->symbol.NameView() << " : "
<< member->Type()->FriendlyName();
strings.push_back(WGSL(ss.str()));
- range = Conv(access->Declaration()->member->source.range);
+ range = (*file)->Conv(access->Declaration()->member->source.range);
}
},
[&](const sem::ValueExpression* expr) {
if (auto* val = expr->ConstantValue()) {
Constant(val, strings);
- range = Conv(expr->Declaration()->source.range);
+ range = (*file)->Conv(expr->Declaration()->source.range);
}
});
diff --git a/src/tint/lang/wgsl/ls/inlay_hints.cc b/src/tint/lang/wgsl/ls/inlay_hints.cc
index 6e3a76d..0788731 100644
--- a/src/tint/lang/wgsl/ls/inlay_hints.cc
+++ b/src/tint/lang/wgsl/ls/inlay_hints.cc
@@ -55,7 +55,7 @@
continue;
}
for (auto* member : str->Members()) {
- auto pos = Conv(member->Declaration()->name->source.range.begin);
+ auto pos = (*file)->Conv(member->Declaration()->name->source.range.begin);
auto add = [&](std::string text) {
lsp::InlayHint hint;
hint.position = pos;
@@ -75,7 +75,7 @@
if (!decl->type) {
if (auto* variable = program.Sem().Get(decl); variable && variable->Type()) {
lsp::InlayHint hint;
- hint.position = Conv(decl->name->source.range.end);
+ hint.position = (*file)->Conv(decl->name->source.range.end);
hint.label = " : " + variable->Type()->UnwrapRef()->FriendlyName();
hints.push_back(hint);
}
diff --git a/src/tint/lang/wgsl/ls/references.cc b/src/tint/lang/wgsl/ls/references.cc
index 9eda4d6..8f1a329 100644
--- a/src/tint/lang/wgsl/ls/references.cc
+++ b/src/tint/lang/wgsl/ls/references.cc
@@ -40,9 +40,10 @@
if (auto file = files_.Get(r.text_document.uri)) {
std::vector<lsp::Location> out;
- for (auto& ref : (*file)->References(Conv(r.position), r.context.include_declaration)) {
+ for (auto& ref :
+ (*file)->References((*file)->Conv(r.position), r.context.include_declaration)) {
lsp::Location loc;
- loc.range = Conv(ref);
+ loc.range = (*file)->Conv(ref);
loc.uri = r.text_document.uri;
out.push_back(std::move(loc));
}
diff --git a/src/tint/lang/wgsl/ls/rename.cc b/src/tint/lang/wgsl/ls/rename.cc
index 4cf5447..bae63f8 100644
--- a/src/tint/lang/wgsl/ls/rename.cc
+++ b/src/tint/lang/wgsl/ls/rename.cc
@@ -51,13 +51,13 @@
return lsp::Null{};
}
- auto def = (*file)->Definition(Conv(r.position));
+ auto def = (*file)->Definition((*file)->Conv(r.position));
if (!def) {
return lsp::Null{};
}
lsp::PrepareRenamePlaceholder out;
- out.range = Conv(def->reference);
+ out.range = (*file)->Conv(def->reference);
out.placeholder = def->text;
return lsp::PrepareRenameResult{out};
}
@@ -69,14 +69,15 @@
return lsp::Null{};
}
- if (!(*file)->Definition(Conv(r.position))) {
+ if (!(*file)->Definition((*file)->Conv(r.position))) {
return lsp::Null{};
}
std::vector<lsp::TextEdit> changes;
- for (auto& ref : (*file)->References(Conv(r.position), /* include_declaration */ true)) {
+ for (auto& ref :
+ (*file)->References((*file)->Conv(r.position), /* include_declaration */ true)) {
lsp::TextEdit edit;
- edit.range = Conv(ref);
+ edit.range = (*file)->Conv(ref);
edit.new_text = r.new_name;
changes.emplace_back(std::move(edit));
}
diff --git a/src/tint/lang/wgsl/ls/sem_tokens.cc b/src/tint/lang/wgsl/ls/sem_tokens.cc
index ddbd5a5..c389b8c 100644
--- a/src/tint/lang/wgsl/ls/sem_tokens.cc
+++ b/src/tint/lang/wgsl/ls/sem_tokens.cc
@@ -61,9 +61,9 @@
};
/// @returns a Token built from the source range @p range with the kind @p kind
-Token TokenFromRange(const tint::Source::Range& range, SemToken::Kind kind) {
+Token TokenFromRange(const File& file, const tint::Source::Range& range, SemToken::Kind kind) {
Token tok;
- tok.position = Conv(range.begin);
+ tok.position = file.Conv(range.begin);
tok.length = range.end.column - range.begin.column;
tok.kind = kind;
return tok;
@@ -91,23 +91,25 @@
node, //
[&](const ast::IdentifierExpression* expr) {
if (auto kind = TokenKindFor(sem.Get(expr))) {
- tokens.push_back(TokenFromRange(expr->identifier->source.range, *kind));
+ tokens.push_back(TokenFromRange(file, expr->identifier->source.range, *kind));
}
},
[&](const ast::Struct* str) {
- tokens.push_back(TokenFromRange(str->name->source.range, SemToken::kType));
+ tokens.push_back(TokenFromRange(file, str->name->source.range, SemToken::kType));
},
[&](const ast::StructMember* member) {
- tokens.push_back(TokenFromRange(member->name->source.range, SemToken::kMember));
+ tokens.push_back(
+ TokenFromRange(file, member->name->source.range, SemToken::kMember));
},
[&](const ast::Variable* var) {
- tokens.push_back(TokenFromRange(var->name->source.range, SemToken::kVariable));
+ tokens.push_back(
+ TokenFromRange(file, var->name->source.range, SemToken::kVariable));
},
[&](const ast::Function* fn) {
- tokens.push_back(TokenFromRange(fn->name->source.range, SemToken::kFunction));
+ tokens.push_back(TokenFromRange(file, fn->name->source.range, SemToken::kFunction));
},
[&](const ast::MemberAccessorExpression* a) {
- tokens.push_back(TokenFromRange(a->member->source.range, SemToken::kMember));
+ tokens.push_back(TokenFromRange(file, a->member->source.range, SemToken::kMember));
});
}
std::sort(tokens.begin(), tokens.end(),
diff --git a/src/tint/lang/wgsl/ls/server.cc b/src/tint/lang/wgsl/ls/server.cc
index 65e6d62..efd41e4 100644
--- a/src/tint/lang/wgsl/ls/server.cc
+++ b/src/tint/lang/wgsl/ls/server.cc
@@ -40,6 +40,11 @@
Server::Server(langsvr::Session& session) : session_(session) {
session.Register([&](const lsp::InitializeRequest&) {
lsp::InitializeResult result;
+ result.capabilities.completion_provider = [] {
+ lsp::CompletionOptions opts;
+ opts.completion_item = lsp::ServerCompletionItemOptions{};
+ return opts;
+ }();
result.capabilities.definition_provider = true;
result.capabilities.document_symbol_provider = [] {
lsp::DocumentSymbolOptions opts;
@@ -93,6 +98,7 @@
[&](const lsp::WorkspaceDidChangeConfigurationNotification& n) { return Handle(n); });
// Request handlers
+ session.Register([&](const lsp::TextDocumentCompletionRequest& r) { return Handle(r); });
session.Register([&](const lsp::TextDocumentDefinitionRequest& r) { return Handle(r); });
session.Register([&](const lsp::TextDocumentDocumentSymbolRequest& r) { return Handle(r); });
session.Register([&](const lsp::TextDocumentHoverRequest& r) { return Handle(r); });
@@ -111,8 +117,8 @@
Server::Logger::~Logger() {
lsp::WindowLogMessageNotification n;
- n.type = lsp::MessageType::kLog;
n.message = msg.str();
+ n.type = type;
(void)session.Send(n);
}
diff --git a/src/tint/lang/wgsl/ls/server.h b/src/tint/lang/wgsl/ls/server.h
index a9c55a1..ab2fb68 100644
--- a/src/tint/lang/wgsl/ls/server.h
+++ b/src/tint/lang/wgsl/ls/server.h
@@ -59,6 +59,10 @@
// Requests
////////////////////////////////////////////////////////////////////////////
+ /// Handler for langsvr::lsp::TextDocumentCompletionRequest
+ typename langsvr::lsp::TextDocumentCompletionRequest::ResultType //
+ Handle(const langsvr::lsp::TextDocumentCompletionRequest&);
+
/// Handler for langsvr::lsp::TextDocumentDefinitionRequest
typename langsvr::lsp::TextDocumentDefinitionRequest::ResultType //
Handle(const langsvr::lsp::TextDocumentDefinitionRequest&);
@@ -150,11 +154,15 @@
}
langsvr::Session& session;
+ langsvr::lsp::MessageType type;
StringStream msg{};
};
/// Log constructs a new Logger to send a log message to the client.
- Logger Log() { return Logger{session_}; }
+ Logger Log() { return Logger{session_, langsvr::lsp::MessageType::kLog}; }
+
+ /// Error constructs a new Logger to send a log message to the client.
+ Logger Error() { return Logger{session_, langsvr::lsp::MessageType::kError}; }
/// The LSP session.
langsvr::Session& session_;
diff --git a/src/tint/lang/wgsl/ls/signature_help.cc b/src/tint/lang/wgsl/ls/signature_help.cc
index 7644c74..d0f1850 100644
--- a/src/tint/lang/wgsl/ls/signature_help.cc
+++ b/src/tint/lang/wgsl/ls/signature_help.cc
@@ -25,6 +25,8 @@
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#include <sstream>
+#include "src/tint/lang/wgsl/diagnostic_severity.h"
#include "src/tint/lang/wgsl/ls/server.h"
#include "langsvr/lsp/comparators.h"
@@ -34,6 +36,7 @@
#include "src/tint/lang/wgsl/sem/call.h"
#include "src/tint/utils/rtti/switch.h"
#include "src/tint/utils/text/string_stream.h"
+#include "src/tint/utils/text/text_style.h"
namespace lsp = langsvr::lsp;
@@ -61,13 +64,15 @@
/// @returns the zero-based index of the parameter at with the cursor at @p position, for a call
/// with the source @p call_source.
-size_t CalcParamIndex(const Source& call_source, const Source::Location& position) {
+size_t CalcParamIndex(const File& file,
+ const Source& call_source,
+ const Source::Location& position) {
size_t index = 0;
int depth = 0;
- auto range = Conv(call_source.range);
+ auto range = file.Conv(call_source.range);
auto start = range.start;
- auto end = std::min(range.end, Conv(position));
+ auto end = std::min(range.end, file.Conv(position));
auto& lines = call_source.file->content.lines;
for (auto line_idx = start.line; line_idx <= end.line; line_idx++) {
@@ -96,72 +101,67 @@
}
/// PrintOverload() emits a description of the intrinsic overload @p overload of the function with
-/// name @p intrinsic_name to @p ss.
-void PrintOverload(StyledText& ss,
+/// name @p intrinsic_name to @p name and @p description.
+void PrintOverload(std::string& name,
+ StyledText& description,
core::intrinsic::Context& context,
const core::intrinsic::OverloadInfo& overload,
std::string_view intrinsic_name) {
- // Restore old style before returning.
- auto prev_style = ss.Style();
- TINT_DEFER(ss << prev_style);
-
core::intrinsic::TemplateState templates;
auto earliest_eval_stage = core::EvaluationStage::kConstant;
- ss << style::Code << style::Function(intrinsic_name);
-
+ StyledText name_st;
+ name_st << style::Code << intrinsic_name;
if (overload.num_explicit_templates > 0) {
- ss << "<";
+ name_st << "<";
for (size_t i = 0; i < overload.num_explicit_templates; i++) {
const auto& tmpl = context.data[overload.templates + i];
if (i > 0) {
- ss << ", ";
+ name_st << ", ";
}
- ss << style::Type(tmpl.name) << " ";
+ name_st << style::Type(tmpl.name) << " ";
}
- ss << ">";
+ name_st << ">";
}
- ss << "(";
+ name_st << "(";
for (size_t i = 0; i < overload.num_parameters; i++) {
const auto& parameter = context.data[overload.parameters + i];
auto* matcher_indices = context.data[parameter.matcher_indices];
if (i > 0) {
- ss << ", ";
+ name_st << ", ";
}
if (parameter.usage != core::ParameterUsage::kNone) {
- ss << style::Variable(parameter.usage, ": ");
+ name_st << style::Variable(parameter.usage, ": ");
}
- context.Match(templates, overload, matcher_indices, earliest_eval_stage).PrintType(ss);
+ context.Match(templates, overload, matcher_indices, earliest_eval_stage).PrintType(name_st);
}
- ss << ")";
+ name_st << ")";
if (overload.return_matcher_indices.IsValid()) {
- ss << " -> ";
+ name_st << " -> ";
auto* matcher_indices = context.data[overload.return_matcher_indices];
- context.Match(templates, overload, matcher_indices, earliest_eval_stage).PrintType(ss);
+ context.Match(templates, overload, matcher_indices, earliest_eval_stage).PrintType(name_st);
}
- bool first = true;
- auto separator = [&] {
- ss << style::Plain(first ? " where:\n " : "\n ");
- first = false;
- };
+ { // Like name_st.Plain(), but no code quotes.
+ StringStream ss;
+ name_st.Walk([&](std::string_view text, TextStyle) { ss << text; });
+ name = ss.str();
+ }
for (size_t i = 0; i < overload.num_templates; i++) {
auto& tmpl = context.data[overload.templates + i];
if (auto* matcher_indices = context.data[tmpl.matcher_indices]) {
- separator();
-
- ss << style::Type(tmpl.name) << style::Plain(" is ");
+ description << "\n" << style::Type(tmpl.name) << style::Plain(" is ");
if (tmpl.kind == core::intrinsic::TemplateInfo::Kind::kType) {
context.Match(templates, overload, matcher_indices, earliest_eval_stage)
- .PrintType(ss);
+ .PrintType(description);
} else {
context.Match(templates, overload, matcher_indices, earliest_eval_stage)
- .PrintNum(ss);
+ .PrintNum(description);
}
}
}
@@ -177,7 +177,7 @@
}
auto& program = (*file)->program;
- auto pos = Conv(r.position);
+ auto pos = (*file)->Conv(r.position);
auto call = (*file)->NodeAt<sem::Call>(pos);
if (!call) {
@@ -185,13 +185,12 @@
}
lsp::SignatureHelp help;
- help.active_parameter = CalcParamIndex(call->Declaration()->source, pos);
+ help.active_parameter = CalcParamIndex(**file, call->Declaration()->source, pos);
Switch(call->Target(), //
[&](const sem::BuiltinFn* target) {
auto& data = wgsl::intrinsic::Dialect::kData;
auto& builtins = data.builtins;
auto& intrinsic_info = builtins[static_cast<size_t>(target->Fn())];
- std::string name{wgsl::str(target->Fn())};
for (size_t i = 0; i < intrinsic_info.num_overloads; i++) {
auto& overload = data[intrinsic_info.overloads + i];
@@ -201,13 +200,15 @@
auto type_mgr = core::type::Manager::Wrap(program.Types());
auto symbols = SymbolTable::Wrap(program.Symbols());
- StyledText ss;
+ StyledText description;
core::intrinsic::Context ctx{data, type_mgr, symbols};
- PrintOverload(ss, ctx, overload, name);
+ std::string name;
+ PrintOverload(name, description, ctx, overload, wgsl::str(target->Fn()));
lsp::SignatureInformation sig;
sig.parameters = params;
- sig.label = ss.Plain();
+ sig.label = name;
+ sig.documentation = Conv(description);
help.signatures.push_back(sig);
if (&overload == &target->Overload()) {
diff --git a/src/tint/lang/wgsl/ls/signature_help_test.cc b/src/tint/lang/wgsl/ls/signature_help_test.cc
index 31fcaba..f1f67b6 100644
--- a/src/tint/lang/wgsl/ls/signature_help_test.cc
+++ b/src/tint/lang/wgsl/ls/signature_help_test.cc
@@ -55,9 +55,15 @@
/* documentation */ {},
});
+ lsp::MarkupContent documentation;
+ documentation.kind = lsp::MarkupKind::kMarkdown;
+ documentation.value =
+ R"(
+`T` is `abstract-float`, `abstract-int`, `f32`, `i32`, `u32` or `f16`)";
+
lsp::SignatureInformation sig{};
- sig.label = R"('max(T, T) -> T' where:
- 'T' is 'abstract-float', 'abstract-int', 'f32', 'i32', 'u32' or 'f16')";
+ sig.label = "max(T, T) -> T";
+ sig.documentation = documentation;
sig.parameters = std::move(parameters);
out.push_back(std::move(sig));
@@ -74,9 +80,15 @@
/* documentation */ {},
});
+ lsp::MarkupContent documentation;
+ documentation.kind = lsp::MarkupKind::kMarkdown;
+ documentation.value =
+ R"(
+`T` is `abstract-float`, `abstract-int`, `f32`, `i32`, `u32` or `f16`)";
+
lsp::SignatureInformation sig{};
- sig.label = R"('max(vecN<T>, vecN<T>) -> vecN<T>' where:
- 'T' is 'abstract-float', 'abstract-int', 'f32', 'i32', 'u32' or 'f16')";
+ sig.label = R"(max(vecN<T>, vecN<T>) -> vecN<T>)";
+ sig.documentation = documentation;
sig.parameters = std::move(parameters);
out.push_back(std::move(sig));
diff --git a/src/tint/lang/wgsl/ls/symbols.cc b/src/tint/lang/wgsl/ls/symbols.cc
index 78771ba..e1db1ab 100644
--- a/src/tint/lang/wgsl/ls/symbols.cc
+++ b/src/tint/lang/wgsl/ls/symbols.cc
@@ -47,16 +47,16 @@
if (auto file = files_.Get(r.text_document.uri)) {
for (auto* decl : (*file)->program.AST().Functions()) {
lsp::DocumentSymbol sym;
- sym.range = Conv(decl->source.range);
- sym.selection_range = Conv(decl->name->source.range);
+ sym.range = (*file)->Conv(decl->source.range);
+ sym.selection_range = (*file)->Conv(decl->name->source.range);
sym.kind = lsp::SymbolKind::kFunction;
sym.name = decl->name->symbol.NameView();
symbols.push_back(sym);
}
for (auto* decl : (*file)->program.AST().GlobalVariables()) {
lsp::DocumentSymbol sym;
- sym.range = Conv(decl->source.range);
- sym.selection_range = Conv(decl->name->source.range);
+ sym.range = (*file)->Conv(decl->source.range);
+ sym.selection_range = (*file)->Conv(decl->name->source.range);
sym.kind =
decl->Is<ast::Const>() ? lsp::SymbolKind::kConstant : lsp::SymbolKind::kVariable;
sym.name = decl->name->symbol.NameView();
@@ -67,16 +67,16 @@
decl, //
[&](const ast::Struct* str) {
lsp::DocumentSymbol sym;
- sym.range = Conv(str->source.range);
- sym.selection_range = Conv(decl->name->source.range);
+ sym.range = (*file)->Conv(str->source.range);
+ sym.selection_range = (*file)->Conv(decl->name->source.range);
sym.kind = lsp::SymbolKind::kStruct;
sym.name = decl->name->symbol.NameView();
symbols.push_back(sym);
},
[&](const ast::Alias* str) {
lsp::DocumentSymbol sym;
- sym.range = Conv(str->source.range);
- sym.selection_range = Conv(decl->name->source.range);
+ sym.range = (*file)->Conv(str->source.range);
+ sym.selection_range = (*file)->Conv(decl->name->source.range);
// TODO(bclayton): Is there a better symbol kind?
sym.kind = lsp::SymbolKind::kObject;
sym.name = decl->name->symbol.NameView();
diff --git a/src/tint/lang/wgsl/ls/utils.cc b/src/tint/lang/wgsl/ls/utils.cc
new file mode 100644
index 0000000..0164f9a
--- /dev/null
+++ b/src/tint/lang/wgsl/ls/utils.cc
@@ -0,0 +1,59 @@
+// Copyright 2024 The Dawn & Tint Authors
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// 1. Redistributions of source code must retain the above copyright notice, this
+// list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the copyright holder nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "src/tint/lang/wgsl/ls/utils.h"
+#include "langsvr/lsp/lsp.h"
+
+namespace tint::wgsl::ls {
+
+langsvr::lsp::MarkupContent Conv(const StyledText& styled_text) {
+ langsvr::lsp::MarkupContent out;
+ out.kind = langsvr::lsp::MarkupKind::kMarkdown;
+
+ StringStream ss;
+ bool is_code = false;
+ styled_text.Walk([&](std::string_view text, TextStyle style) {
+ if (style.IsBold()) {
+ ss << "*";
+ }
+ if (is_code != style.IsCode()) {
+ ss << "`";
+ }
+ is_code = style.IsCode();
+ ss << text;
+ if (style.IsBold()) {
+ ss << "*";
+ }
+ });
+ if (is_code) {
+ ss << "`";
+ }
+ out.value = ss.str();
+ return out;
+}
+
+} // namespace tint::wgsl::ls
diff --git a/src/tint/lang/wgsl/ls/utils.h b/src/tint/lang/wgsl/ls/utils.h
index 921fbc1..8dbfd0d 100644
--- a/src/tint/lang/wgsl/ls/utils.h
+++ b/src/tint/lang/wgsl/ls/utils.h
@@ -32,6 +32,8 @@
#include "src/tint/lang/wgsl/sem/value_expression.h"
#include "src/tint/utils/diagnostic/source.h"
#include "src/tint/utils/rtti/castable.h"
+#include "src/tint/utils/text/styled_text.h"
+#include "src/tint/utils/text/text_style.h"
// Forward declarations
namespace tint::sem {
@@ -40,29 +42,8 @@
namespace tint::wgsl::ls {
-/// @return the langsvr::lsp::Position @p pos converted to a tint::Source::Location
-inline Source::Location Conv(langsvr::lsp::Position pos) {
- Source::Location loc;
- loc.line = static_cast<uint32_t>(pos.line + 1);
- loc.column = static_cast<uint32_t>(pos.character + 1);
- return loc;
-}
-
-/// @return the tint::Source::Location @p loc converted to a langsvr::lsp::Position
-inline langsvr::lsp::Position Conv(Source::Location loc) {
- langsvr::lsp::Position pos;
- pos.line = loc.line - 1;
- pos.character = loc.column - 1;
- return pos;
-}
-
-/// @return the tint::Source::Range @p rng converted to a langsvr::lsp::Range
-inline langsvr::lsp::Range Conv(Source::Range rng) {
- langsvr::lsp::Range out;
- out.start = Conv(rng.begin);
- out.end = Conv(rng.end);
- return out;
-}
+/// @return the StyledText converted to lsp::MarkupContent
+langsvr::lsp::MarkupContent Conv(const StyledText& text);
/// @returns the sem::Load() and sem::Materialize() unwrapped sem::ValueExpression, if `T` is a
/// sem::ValueExpression, otherwise returns @p node.
diff --git a/src/tint/utils/text/unicode.cc b/src/tint/utils/text/unicode.cc
index 5e4a4b7..157f5da 100644
--- a/src/tint/utils/text/unicode.cc
+++ b/src/tint/utils/text/unicode.cc
@@ -424,6 +424,40 @@
return Decode(reinterpret_cast<const uint8_t*>(utf8_string.data()), utf8_string.size());
}
+size_t Encode(CodePoint code_point, uint8_t* ptr) {
+ if (code_point <= 0x7f) {
+ if (ptr) {
+ ptr[0] = static_cast<uint8_t>(code_point);
+ }
+ return 1;
+ }
+ if (code_point <= 0x7ff) {
+ if (ptr) {
+ ptr[0] = static_cast<uint8_t>(code_point >> 6) | 0b11000000;
+ ptr[1] = static_cast<uint8_t>(code_point & 0b00111111) | 0b10000000;
+ }
+ return 2;
+ }
+ if (code_point <= 0xffff) {
+ if (ptr) {
+ ptr[0] = static_cast<uint8_t>(code_point >> 12) | 0b11100000;
+ ptr[1] = static_cast<uint8_t>((code_point >> 6) & 0b00111111) | 0b10000000;
+ ptr[2] = static_cast<uint8_t>(code_point & 0b00111111) | 0b10000000;
+ }
+ return 3;
+ }
+ if (code_point <= 0x10ffff) {
+ if (ptr) {
+ ptr[0] = static_cast<uint8_t>(code_point >> 18) | 0b11110000;
+ ptr[1] = static_cast<uint8_t>((code_point >> 12) & 0b00111111) | 0b10000000;
+ ptr[2] = static_cast<uint8_t>((code_point >> 6) & 0b00111111) | 0b10000000;
+ ptr[3] = static_cast<uint8_t>(code_point & 0b00111111) | 0b10000000;
+ }
+ return 4;
+ }
+ return 0; // invalid code point
+}
+
bool IsASCII(std::string_view str) {
for (auto c : str) {
if (c & 0x80) {
@@ -435,4 +469,49 @@
} // namespace utf8
+namespace utf16 {
+
+std::pair<CodePoint, size_t> Decode(const uint16_t* ptr, size_t len) {
+ if (len < 1) {
+ return {};
+ }
+ uint16_t a = ptr[0];
+ if (a <= 0xd7ff || a >= 0xe000) {
+ return {CodePoint{static_cast<uint32_t>(a)}, 1};
+ }
+ if (len < 2) {
+ return {};
+ }
+ uint32_t b = ptr[1];
+ if (b <= 0xd7ff || b >= 0xe000) {
+ return {};
+ }
+ uint32_t high = a - 0xd800;
+ uint32_t low = b - 0xdc00;
+ return {CodePoint{0x10000 + ((high << 10) | low)}, 2};
+}
+
+std::pair<CodePoint, size_t> Decode(std::string_view utf16_string) {
+ return Decode(reinterpret_cast<const uint16_t*>(utf16_string.data()), utf16_string.size() / 2);
+}
+
+size_t Encode(CodePoint code_point, uint16_t* ptr) {
+ if (code_point <= 0xd7ff || (code_point >= 0xe000 && code_point <= 0xffff)) {
+ if (ptr) {
+ ptr[0] = static_cast<uint16_t>(code_point);
+ }
+ return 1;
+ }
+ if (code_point >= 0x10000 && code_point <= 0x10ffff) {
+ if (ptr) {
+ auto biased = code_point - 0x10000;
+ ptr[0] = static_cast<uint16_t>((biased >> 10) + 0xd800);
+ ptr[1] = static_cast<uint16_t>((biased & 0b1111111111) + 0xdc00);
+ }
+ return 2;
+ }
+ return 0; // invalid code point
+}
+
+} // namespace utf16
} // namespace tint
diff --git a/src/tint/utils/text/unicode.h b/src/tint/utils/text/unicode.h
index c993996..76e9b2a 100644
--- a/src/tint/utils/text/unicode.h
+++ b/src/tint/utils/text/unicode.h
@@ -71,23 +71,56 @@
/// Decodes the first code point in the utf8 string.
/// @param ptr the pointer to the first byte of the utf8 sequence
-/// @param len the maximum number of bytes to read
-/// @returns a pair of CodePoint and width in code units (bytes).
+/// @param len the maximum number of uint8_t to read
+/// @returns a pair of CodePoint and width in code units (uint8_t).
/// If the next code point cannot be decoded then returns [0,0].
std::pair<CodePoint, size_t> Decode(const uint8_t* ptr, size_t len);
/// Decodes the first code point in the utf8 string.
/// @param utf8_string the string view that contains the utf8 sequence
-/// @returns a pair of CodePoint and width in code units (bytes).
-/// If the next code point cannot be decoded then returns [0,0].
+/// @returns a pair of CodePoint and width in code units (uint8_t).
+/// If the next code point cannot be decoded, then returns [0,0].
std::pair<CodePoint, size_t> Decode(std::string_view utf8_string);
+/// Encodes a code point to the utf8 string buffer or queries the number of code units used to
+/// encode the code point.
+/// @param code_point the code point to encode.
+/// @param ptr the pointer to the utf8 string buffer, or nullptr to query the number of code units
+/// that would be written if @p ptr is not nullptr.
+/// @returns the number of code units written / would be written (at most 4).
+size_t Encode(CodePoint code_point, uint8_t* ptr);
+
/// @returns true if all the utf-8 code points in the string are ASCII
/// (code-points 0x00..0x7f).
bool IsASCII(std::string_view);
} // namespace utf8
+namespace utf16 {
+
+/// Decodes the first code point in the utf16 string.
+/// @param ptr the pointer to the first byte of the utf16 sequence
+/// @param len the maximum number of code units to read
+/// @returns a pair of CodePoint and width in code units (16-bit integers).
+/// If the next code point cannot be decoded then returns [0,0].
+std::pair<CodePoint, size_t> Decode(const uint16_t* ptr, size_t len);
+
+/// Decodes the first code point in the utf16 string.
+/// @param utf16_string the string view that contains the utf16 sequence
+/// @returns a pair of CodePoint and width in code units (16-bit integers).
+/// If the next code point cannot be decoded then returns [0,0].
+std::pair<CodePoint, size_t> Decode(std::string_view utf16_string);
+
+/// Encodes a code point to the utf16 string buffer or queries the number of code units used to
+/// encode the code point.
+/// @param code_point the code point to encode.
+/// @param ptr the pointer to the utf16 string buffer, or nullptr to query the number of code units
+/// that would be written if @p ptr is not nullptr.
+/// @returns the number of code units written / would be written (at most 2).
+size_t Encode(CodePoint code_point, uint16_t* ptr);
+
+} // namespace utf16
+
} // namespace tint
#endif // SRC_TINT_UTILS_TEXT_UNICODE_H_
diff --git a/src/tint/utils/text/unicode_test.cc b/src/tint/utils/text/unicode_test.cc
index c1a8c35..1887fd7 100644
--- a/src/tint/utils/text/unicode_test.cc
+++ b/src/tint/utils/text/unicode_test.cc
@@ -27,19 +27,19 @@
#include "src/tint/utils/text/unicode.h"
+#include <cstdint>
+#include <ios>
#include <string>
+#include <string_view>
#include <vector>
#include "gmock/gmock.h"
+#include "src/tint/utils/text/string.h"
/// Helper for constructing a CodePoint
#define C(x) CodePoint(x)
namespace tint {
-
-////////////////////////////////////////////////////////////////////////////////
-// CodePoint character set tests
-////////////////////////////////////////////////////////////////////////////////
namespace {
struct CodePointCase {
@@ -48,10 +48,30 @@
bool is_xid_continue;
};
-std::ostream& operator<<(std::ostream& out, CodePointCase c) {
+static std::ostream& operator<<(std::ostream& out, CodePointCase c) {
return out << c.code_point;
}
+struct CodePointAndWidth {
+ CodePoint code_point;
+ size_t width;
+};
+
+bool operator==(const CodePointAndWidth& a, const CodePointAndWidth& b) {
+ return a.code_point == b.code_point && a.width == b.width;
+}
+
+static std::ostream& operator<<(std::ostream& out, CodePointAndWidth cpw) {
+ return out << "code_point: " << cpw.code_point << ", width: " << cpw.width;
+}
+
+} // namespace
+
+////////////////////////////////////////////////////////////////////////////////
+// CodePoint character set tests
+////////////////////////////////////////////////////////////////////////////////
+namespace {
+
class CodePointTest : public testing::TestWithParam<CodePointCase> {};
TEST_P(CodePointTest, CharacterSets) {
@@ -232,33 +252,26 @@
////////////////////////////////////////////////////////////////////////////////
// DecodeUTF8 valid tests
////////////////////////////////////////////////////////////////////////////////
-namespace {
+namespace utf8_tests {
-struct CodePointAndWidth {
- CodePoint code_point;
- size_t width;
+struct UTF8Case {
+ std::vector<uint8_t> string;
+ std::vector<CodePointAndWidth> code_points;
};
-bool operator==(const CodePointAndWidth& a, const CodePointAndWidth& b) {
- return a.code_point == b.code_point && a.width == b.width;
+static std::ostream& operator<<(std::ostream& out, UTF8Case c) {
+ for (size_t i = 0; i < c.string.size(); i++) {
+ if (i > 0) {
+ out << ", ";
+ }
+ out << "0x" << std::hex << std::setfill('0') << std::setw(2) << c.string[i];
+ }
+ return out;
}
-std::ostream& operator<<(std::ostream& out, CodePointAndWidth cpw) {
- return out << "code_point: " << cpw.code_point << ", width: " << cpw.width;
-}
+class UTF8Test : public testing::TestWithParam<UTF8Case> {};
-struct DecodeUTF8Case {
- std::string string;
- std::vector<CodePointAndWidth> expected;
-};
-
-std::ostream& operator<<(std::ostream& out, DecodeUTF8Case c) {
- return out << "'" << c.string << "'";
-}
-
-class DecodeUTF8Test : public testing::TestWithParam<DecodeUTF8Case> {};
-
-TEST_P(DecodeUTF8Test, Valid) {
+TEST_P(UTF8Test, Decode) {
auto param = GetParam();
const uint8_t* data = reinterpret_cast<const uint8_t*>(param.string.data());
@@ -275,75 +288,96 @@
got.emplace_back(CodePointAndWidth{code_point, width});
}
- EXPECT_THAT(got, ::testing::ElementsAreArray(param.expected));
+ EXPECT_THAT(got, ::testing::ElementsAreArray(param.code_points));
+}
+
+TEST_P(UTF8Test, Encode) {
+ auto param = GetParam();
+
+ Slice<const uint8_t> str{reinterpret_cast<const uint8_t*>(param.string.data()),
+ param.string.size()};
+ for (auto codepoint : param.code_points) {
+ EXPECT_EQ(utf8::Encode(codepoint.code_point, nullptr), codepoint.width);
+
+ uint8_t encoded[4];
+ size_t len = utf8::Encode(codepoint.code_point, encoded);
+ ASSERT_EQ(len, codepoint.width);
+ EXPECT_THAT(Slice<const uint8_t>(encoded, len),
+ ::testing::ElementsAreArray(str.Truncate(len)));
+ str = str.Offset(len);
+ }
}
INSTANTIATE_TEST_SUITE_P(AsciiLetters,
- DecodeUTF8Test,
+ UTF8Test,
::testing::ValuesIn({
- DecodeUTF8Case{"a", {{C('a'), 1}}},
- DecodeUTF8Case{"abc", {{C('a'), 1}, {C('b'), 1}, {C('c'), 1}}},
- DecodeUTF8Case{"def", {{C('d'), 1}, {C('e'), 1}, {C('f'), 1}}},
- DecodeUTF8Case{"gh", {{C('g'), 1}, {C('h'), 1}}},
- DecodeUTF8Case{"ij", {{C('i'), 1}, {C('j'), 1}}},
- DecodeUTF8Case{"klm", {{C('k'), 1}, {C('l'), 1}, {C('m'), 1}}},
- DecodeUTF8Case{"nop", {{C('n'), 1}, {C('o'), 1}, {C('p'), 1}}},
- DecodeUTF8Case{"qr", {{C('q'), 1}, {C('r'), 1}}},
- DecodeUTF8Case{"stu", {{C('s'), 1}, {C('t'), 1}, {C('u'), 1}}},
- DecodeUTF8Case{"vw", {{C('v'), 1}, {C('w'), 1}}},
- DecodeUTF8Case{"xyz", {{C('x'), 1}, {C('y'), 1}, {C('z'), 1}}},
- DecodeUTF8Case{"A", {{C('A'), 1}}},
- DecodeUTF8Case{"ABC", {{C('A'), 1}, {C('B'), 1}, {C('C'), 1}}},
- DecodeUTF8Case{"DEF", {{C('D'), 1}, {C('E'), 1}, {C('F'), 1}}},
- DecodeUTF8Case{"GH", {{C('G'), 1}, {C('H'), 1}}},
- DecodeUTF8Case{"IJ", {{C('I'), 1}, {C('J'), 1}}},
- DecodeUTF8Case{"KLM", {{C('K'), 1}, {C('L'), 1}, {C('M'), 1}}},
- DecodeUTF8Case{"NOP", {{C('N'), 1}, {C('O'), 1}, {C('P'), 1}}},
- DecodeUTF8Case{"QR", {{C('Q'), 1}, {C('R'), 1}}},
- DecodeUTF8Case{"STU", {{C('S'), 1}, {C('T'), 1}, {C('U'), 1}}},
- DecodeUTF8Case{"VW", {{C('V'), 1}, {C('W'), 1}}},
- DecodeUTF8Case{"XYZ", {{C('X'), 1}, {C('Y'), 1}, {C('Z'), 1}}},
+ UTF8Case{{'a'}, {{C('a'), 1}}},
+ UTF8Case{{'a', 'b', 'c'}, {{C('a'), 1}, {C('b'), 1}, {C('c'), 1}}},
+ UTF8Case{{'d', 'e', 'f'}, {{C('d'), 1}, {C('e'), 1}, {C('f'), 1}}},
+ UTF8Case{{'g', 'h'}, {{C('g'), 1}, {C('h'), 1}}},
+ UTF8Case{{'i', 'j'}, {{C('i'), 1}, {C('j'), 1}}},
+ UTF8Case{{'k', 'l', 'm'}, {{C('k'), 1}, {C('l'), 1}, {C('m'), 1}}},
+ UTF8Case{{'n', 'o', 'p'}, {{C('n'), 1}, {C('o'), 1}, {C('p'), 1}}},
+ UTF8Case{{'q', 'r'}, {{C('q'), 1}, {C('r'), 1}}},
+ UTF8Case{{'s', 't', 'u'}, {{C('s'), 1}, {C('t'), 1}, {C('u'), 1}}},
+ UTF8Case{{'v', 'w'}, {{C('v'), 1}, {C('w'), 1}}},
+ UTF8Case{{'x', 'y', 'z'}, {{C('x'), 1}, {C('y'), 1}, {C('z'), 1}}},
+ UTF8Case{{'A'}, {{C('A'), 1}}},
+ UTF8Case{{'A', 'B', 'C'}, {{C('A'), 1}, {C('B'), 1}, {C('C'), 1}}},
+ UTF8Case{{'D', 'E', 'F'}, {{C('D'), 1}, {C('E'), 1}, {C('F'), 1}}},
+ UTF8Case{{'G', 'H'}, {{C('G'), 1}, {C('H'), 1}}},
+ UTF8Case{{'I', 'J'}, {{C('I'), 1}, {C('J'), 1}}},
+ UTF8Case{{'K', 'L', 'M'}, {{C('K'), 1}, {C('L'), 1}, {C('M'), 1}}},
+ UTF8Case{{'N', 'O', 'P'}, {{C('N'), 1}, {C('O'), 1}, {C('P'), 1}}},
+ UTF8Case{{'Q', 'R'}, {{C('Q'), 1}, {C('R'), 1}}},
+ UTF8Case{{'S', 'T', 'U'}, {{C('S'), 1}, {C('T'), 1}, {C('U'), 1}}},
+ UTF8Case{{'V', 'W'}, {{C('V'), 1}, {C('W'), 1}}},
+ UTF8Case{{'X', 'Y', 'Z'}, {{C('X'), 1}, {C('Y'), 1}, {C('Z'), 1}}},
}));
INSTANTIATE_TEST_SUITE_P(AsciiNumbers,
- DecodeUTF8Test,
+ UTF8Test,
::testing::ValuesIn({
- DecodeUTF8Case{"012", {{C('0'), 1}, {C('1'), 1}, {C('2'), 1}}},
- DecodeUTF8Case{"345", {{C('3'), 1}, {C('4'), 1}, {C('5'), 1}}},
- DecodeUTF8Case{"678", {{C('6'), 1}, {C('7'), 1}, {C('8'), 1}}},
- DecodeUTF8Case{"9", {{C('9'), 1}}},
+ UTF8Case{{'0', '1', '2'}, {{C('0'), 1}, {C('1'), 1}, {C('2'), 1}}},
+ UTF8Case{{'3', '4', '5'}, {{C('3'), 1}, {C('4'), 1}, {C('5'), 1}}},
+ UTF8Case{{'6', '7', '8'}, {{C('6'), 1}, {C('7'), 1}, {C('8'), 1}}},
+ UTF8Case{{'9'}, {{C('9'), 1}}},
}));
INSTANTIATE_TEST_SUITE_P(AsciiSymbols,
- DecodeUTF8Test,
+ UTF8Test,
::testing::ValuesIn({
- DecodeUTF8Case{"!\"#", {{C('!'), 1}, {C('"'), 1}, {C('#'), 1}}},
- DecodeUTF8Case{"$%&", {{C('$'), 1}, {C('%'), 1}, {C('&'), 1}}},
- DecodeUTF8Case{"'()", {{C('\''), 1}, {C('('), 1}, {C(')'), 1}}},
- DecodeUTF8Case{"*,-", {{C('*'), 1}, {C(','), 1}, {C('-'), 1}}},
- DecodeUTF8Case{"/`@", {{C('/'), 1}, {C('`'), 1}, {C('@'), 1}}},
- DecodeUTF8Case{"^\\[", {{C('^'), 1}, {C('\\'), 1}, {C('['), 1}}},
- DecodeUTF8Case{"]_|", {{C(']'), 1}, {C('_'), 1}, {C('|'), 1}}},
- DecodeUTF8Case{"{}", {{C('{'), 1}, {C('}'), 1}}},
+ UTF8Case{{'!', '"', '#'}, {{C('!'), 1}, {C('"'), 1}, {C('#'), 1}}},
+ UTF8Case{{'$', '%', '&'}, {{C('$'), 1}, {C('%'), 1}, {C('&'), 1}}},
+ UTF8Case{{'\'', '(', ')'}, {{C('\''), 1}, {C('('), 1}, {C(')'), 1}}},
+ UTF8Case{{'*', ',', '-'}, {{C('*'), 1}, {C(','), 1}, {C('-'), 1}}},
+ UTF8Case{{'/', '`', '@'}, {{C('/'), 1}, {C('`'), 1}, {C('@'), 1}}},
+ UTF8Case{{'^', '\\', '['}, {{C('^'), 1}, {C('\\'), 1}, {C('['), 1}}},
+ UTF8Case{{']', '_', '|'}, {{C(']'), 1}, {C('_'), 1}, {C('|'), 1}}},
+ UTF8Case{{'{', '}'}, {{C('{'), 1}, {C('}'), 1}}},
}));
-INSTANTIATE_TEST_SUITE_P(AsciiSpecial,
- DecodeUTF8Test,
- ::testing::ValuesIn({
- DecodeUTF8Case{"", {}},
- DecodeUTF8Case{" \t\n", {{C(' '), 1}, {C('\t'), 1}, {C('\n'), 1}}},
- DecodeUTF8Case{"\a\b\f", {{C('\a'), 1}, {C('\b'), 1}, {C('\f'), 1}}},
- DecodeUTF8Case{"\n\r\t", {{C('\n'), 1}, {C('\r'), 1}, {C('\t'), 1}}},
- DecodeUTF8Case{"\v", {{C('\v'), 1}}},
- }));
+INSTANTIATE_TEST_SUITE_P(
+ AsciiSpecial,
+ UTF8Test,
+ ::testing::ValuesIn({
+ UTF8Case{{}, {}},
+ UTF8Case{{' ', '\t', '\n'}, {{C(' '), 1}, {C('\t'), 1}, {C('\n'), 1}}},
+ UTF8Case{{'\a', '\b', '\f'}, {{C('\a'), 1}, {C('\b'), 1}, {C('\f'), 1}}},
+ UTF8Case{{'\n', '\r', '\t'}, {{C('\n'), 1}, {C('\r'), 1}, {C('\t'), 1}}},
+ UTF8Case{{'\v'}, {{C('\v'), 1}}},
+ }));
INSTANTIATE_TEST_SUITE_P(Hindi,
- DecodeUTF8Test,
- ::testing::ValuesIn({DecodeUTF8Case{
+ UTF8Test,
+ ::testing::ValuesIn({UTF8Case{
// नमस्ते दुनिया
- "\xe0\xa4\xa8\xe0\xa4\xae\xe0\xa4\xb8\xe0\xa5\x8d\xe0\xa4\xa4\xe0\xa5"
- "\x87\x20\xe0\xa4\xa6\xe0\xa5\x81\xe0\xa4\xa8\xe0\xa4\xbf\xe0\xa4\xaf"
- "\xe0\xa4\xbe",
+ {
+ 0xe0, 0xa4, 0xa8, 0xe0, 0xa4, 0xae, 0xe0, 0xa4, 0xb8, 0xe0,
+ 0xa5, 0x8d, 0xe0, 0xa4, 0xa4, 0xe0, 0xa5, 0x87, 0x20, 0xe0,
+ 0xa4, 0xa6, 0xe0, 0xa5, 0x81, 0xe0, 0xa4, 0xa8, 0xe0, 0xa4,
+ 0xbf, 0xe0, 0xa4, 0xaf, 0xe0, 0xa4, 0xbe,
+ },
{
{C(0x0928), 3}, // न
{C(0x092e), 3}, // म
@@ -362,10 +396,23 @@
}}));
INSTANTIATE_TEST_SUITE_P(Mandarin,
- DecodeUTF8Test,
- ::testing::ValuesIn({DecodeUTF8Case{
+ UTF8Test,
+ ::testing::ValuesIn({UTF8Case{
// 你好世界
- "\xe4\xbd\xa0\xe5\xa5\xbd\xe4\xb8\x96\xe7\x95\x8c",
+ {
+ 0xe4,
+ 0xbd,
+ 0xa0,
+ 0xe5,
+ 0xa5,
+ 0xbd,
+ 0xe4,
+ 0xb8,
+ 0x96,
+ 0xe7,
+ 0x95,
+ 0x8c,
+ },
{
{C(0x4f60), 3}, // 你
{C(0x597d), 3}, // 好
@@ -375,11 +422,13 @@
}}));
INSTANTIATE_TEST_SUITE_P(Japanese,
- DecodeUTF8Test,
- ::testing::ValuesIn({DecodeUTF8Case{
+ UTF8Test,
+ ::testing::ValuesIn({UTF8Case{
// こんにちは世界
- "\xe3\x81\x93\xe3\x82\x93\xe3\x81\xab\xe3\x81\xa1"
- "\xe3\x81\xaf\xe4\xb8\x96\xe7\x95\x8c",
+ {
+ 0xe3, 0x81, 0x93, 0xe3, 0x82, 0x93, 0xe3, 0x81, 0xab, 0xe3, 0x81,
+ 0xa1, 0xe3, 0x81, 0xaf, 0xe4, 0xb8, 0x96, 0xe7, 0x95, 0x8c,
+ },
{
{C(0x3053), 3}, // こ
{C(0x3093), 3}, // ん
@@ -392,11 +441,13 @@
}}));
INSTANTIATE_TEST_SUITE_P(Korean,
- DecodeUTF8Test,
- ::testing::ValuesIn({DecodeUTF8Case{
+ UTF8Test,
+ ::testing::ValuesIn({UTF8Case{
// 안녕하세요 세계
- "\xec\x95\x88\xeb\x85\x95\xed\x95\x98\xec\x84\xb8"
- "\xec\x9a\x94\x20\xec\x84\xb8\xea\xb3\x84",
+ {
+ 0xec, 0x95, 0x88, 0xeb, 0x85, 0x95, 0xed, 0x95, 0x98, 0xec, 0x84,
+ 0xb8, 0xec, 0x9a, 0x94, 0x20, 0xec, 0x84, 0xb8, 0xea, 0xb3, 0x84,
+ },
{
{C(0xc548), 3}, // 안
{C(0xb155), 3}, // 녕
@@ -410,10 +461,19 @@
}}));
INSTANTIATE_TEST_SUITE_P(Emoji,
- DecodeUTF8Test,
- ::testing::ValuesIn({DecodeUTF8Case{
+ UTF8Test,
+ ::testing::ValuesIn({UTF8Case{
// 👋🌎
- "\xf0\x9f\x91\x8b\xf0\x9f\x8c\x8e",
+ {
+ 0xf0,
+ 0x9f,
+ 0x91,
+ 0x8b,
+ 0xf0,
+ 0x9f,
+ 0x8c,
+ 0x8e,
+ },
{
{C(0x1f44b), 4}, // 👋
{C(0x1f30e), 4}, // 🌎
@@ -421,12 +481,15 @@
}}));
INSTANTIATE_TEST_SUITE_P(Random,
- DecodeUTF8Test,
- ::testing::ValuesIn({DecodeUTF8Case{
+ UTF8Test,
+ ::testing::ValuesIn({UTF8Case{
// Øⓑꚫ쁹Ǵ𐌒岾🥍ⴵ㍨又ᮗ
- "\xc3\x98\xe2\x93\x91\xea\x9a\xab\xec\x81\xb9\xc7\xb4\xf0\x90\x8c\x92"
- "\xe5\xb2\xbe\xf0\x9f\xa5\x8d\xe2\xb4\xb5\xe3\x8d\xa8\xe5\x8f\x88\xe1"
- "\xae\x97",
+ {
+ 0xc3, 0x98, 0xe2, 0x93, 0x91, 0xea, 0x9a, 0xab, 0xec,
+ 0x81, 0xb9, 0xc7, 0xb4, 0xf0, 0x90, 0x8c, 0x92, 0xe5,
+ 0xb2, 0xbe, 0xf0, 0x9f, 0xa5, 0x8d, 0xe2, 0xb4, 0xb5,
+ 0xe3, 0x8d, 0xa8, 0xe5, 0x8f, 0x88, 0xe1, 0xae, 0x97,
+ },
{
{C(0x000d8), 2}, // Ø
{C(0x024d1), 3}, // ⓑ
@@ -443,61 +506,336 @@
},
}}));
-} // namespace
-
////////////////////////////////////////////////////////////////////////////////
// DecodeUTF8 invalid tests
////////////////////////////////////////////////////////////////////////////////
-namespace {
-class DecodeUTF8InvalidTest : public testing::TestWithParam<const char*> {};
+class DecodeUTF8InvalidTest : public testing::TestWithParam<std::vector<uint8_t>> {};
TEST_P(DecodeUTF8InvalidTest, Invalid) {
- auto* param = GetParam();
-
- const uint8_t* data = reinterpret_cast<const uint8_t*>(param);
- const size_t len = std::string(param).size();
-
- auto [code_point, width] = utf8::Decode(data, len);
+ auto [code_point, width] = utf8::Decode(GetParam().data(), GetParam().size());
EXPECT_EQ(code_point, CodePoint(0));
EXPECT_EQ(width, 0u);
}
INSTANTIATE_TEST_SUITE_P(Invalid,
DecodeUTF8InvalidTest,
- ::testing::ValuesIn({
- "\x80\x80\x80\x80", // 10000000
- "\x81\x80\x80\x80", // 10000001
- "\x8f\x80\x80\x80", // 10001111
- "\x90\x80\x80\x80", // 10010000
- "\x91\x80\x80\x80", // 10010001
- "\x9f\x80\x80\x80", // 10011111
- "\xa0\x80\x80\x80", // 10100000
- "\xa1\x80\x80\x80", // 10100001
- "\xaf\x80\x80\x80", // 10101111
- "\xb0\x80\x80\x80", // 10110000
- "\xb1\x80\x80\x80", // 10110001
- "\xbf\x80\x80\x80", // 10111111
- "\xc0\x80\x80\x80", // 11000000
- "\xc1\x80\x80\x80", // 11000001
- "\xf5\x80\x80\x80", // 11110101
- "\xf6\x80\x80\x80", // 11110110
- "\xf7\x80\x80\x80", // 11110111
- "\xf8\x80\x80\x80", // 11111000
- "\xfe\x80\x80\x80", // 11111110
- "\xff\x80\x80\x80", // 11111111
+ ::testing::ValuesIn(std::vector<std::vector<uint8_t>>{
+ {0x80, 0x80, 0x80, 0x80}, // 10000000
+ {0x81, 0x80, 0x80, 0x80}, // 10000001
+ {0x8f, 0x80, 0x80, 0x80}, // 10001111
+ {0x90, 0x80, 0x80, 0x80}, // 10010000
+ {0x91, 0x80, 0x80, 0x80}, // 10010001
+ {0x9f, 0x80, 0x80, 0x80}, // 10011111
+ {0xa0, 0x80, 0x80, 0x80}, // 10100000
+ {0xa1, 0x80, 0x80, 0x80}, // 10100001
+ {0xaf, 0x80, 0x80, 0x80}, // 10101111
+ {0xb0, 0x80, 0x80, 0x80}, // 10110000
+ {0xb1, 0x80, 0x80, 0x80}, // 10110001
+ {0xbf, 0x80, 0x80, 0x80}, // 10111111
+ {0xc0, 0x80, 0x80, 0x80}, // 11000000
+ {0xc1, 0x80, 0x80, 0x80}, // 11000001
+ {0xf5, 0x80, 0x80, 0x80}, // 11110101
+ {0xf6, 0x80, 0x80, 0x80}, // 11110110
+ {0xf7, 0x80, 0x80, 0x80}, // 11110111
+ {0xf8, 0x80, 0x80, 0x80}, // 11111000
+ {0xfe, 0x80, 0x80, 0x80}, // 11111110
+ {0xff, 0x80, 0x80, 0x80}, // 11111111
- "\xd0", // 2-bytes, missing second byte
- "\xe8\x8f", // 3-bytes, missing third byte
- "\xf4\x8f\x8f", // 4-bytes, missing fourth byte
+ {0xd0}, // 2-bytes, missing second byte
+ {0xe8, 0x8f}, // 3-bytes, missing third byte
+ {0xf4, 0x8f, 0x8f}, // 4-bytes, missing fourth byte
- "\xd0\x7f", // 2-bytes, second byte MSB unset
- "\xe8\x7f\x8f", // 3-bytes, second byte MSB unset
- "\xe8\x8f\x7f", // 3-bytes, third byte MSB unset
- "\xf4\x7f\x8f\x8f", // 4-bytes, second byte MSB unset
- "\xf4\x8f\x7f\x8f", // 4-bytes, third byte MSB unset
- "\xf4\x8f\x8f\x7f", // 4-bytes, fourth byte MSB unset
+ {0xd0, 0x7f}, // 2-bytes, second byte MSB unset
+ {0xe8, 0x7f, 0x8f}, // 3-bytes, second byte MSB unset
+ {0xe8, 0x8f, 0x7f}, // 3-bytes, third byte MSB unset
+ {0xf4, 0x7f, 0x8f, 0x8f}, // 4-bytes, second byte MSB unset
+ {0xf4, 0x8f, 0x7f, 0x8f}, // 4-bytes, third byte MSB unset
+ {0xf4, 0x8f, 0x8f, 0x7f}, // 4-bytes, fourth byte MSB unset
}));
-} // namespace
+} // namespace utf8_tests
+////////////////////////////////////////////////////////////////////////////////
+// DecodeUTF16 valid tests
+////////////////////////////////////////////////////////////////////////////////
+namespace utf16_tests {
+
+struct UTF16Case {
+ std::vector<uint16_t> string;
+ std::vector<CodePointAndWidth> code_points;
+};
+
+static std::ostream& operator<<(std::ostream& out, UTF16Case c) {
+ for (size_t i = 0; i < c.string.size(); i++) {
+ if (i > 0) {
+ out << ", ";
+ }
+ out << "0x" << std::hex << std::setfill('0') << std::setw(4) << c.string[i];
+ }
+ return out;
+}
+
+class UTF16Test : public testing::TestWithParam<UTF16Case> {};
+
+TEST_P(UTF16Test, Decode) {
+ auto param = GetParam();
+
+ const uint16_t* data = reinterpret_cast<const uint16_t*>(param.string.data());
+ const size_t len = param.string.size();
+
+ std::vector<CodePointAndWidth> got;
+ size_t offset = 0;
+ while (offset < len) {
+ auto [code_point, width] = utf16::Decode(data + offset, len - offset);
+ if (width == 0) {
+ FAIL() << "Decode() failed at byte offset " << offset;
+ }
+ offset += width;
+ got.emplace_back(CodePointAndWidth{code_point, width});
+ }
+
+ EXPECT_THAT(got, ::testing::ElementsAreArray(param.code_points));
+}
+
+TEST_P(UTF16Test, Encode) {
+ auto param = GetParam();
+
+ Slice<const uint16_t> str{reinterpret_cast<const uint16_t*>(param.string.data()),
+ param.string.size()};
+ for (auto codepoint : param.code_points) {
+ EXPECT_EQ(utf16::Encode(codepoint.code_point, nullptr), codepoint.width);
+
+ uint16_t encoded[2];
+ size_t len = utf16::Encode(codepoint.code_point, encoded);
+ ASSERT_EQ(len, codepoint.width);
+ EXPECT_THAT(Slice<const uint16_t>(encoded, len),
+ ::testing::ElementsAreArray(str.Truncate(len)));
+ str = str.Offset(len);
+ }
+}
+
+INSTANTIATE_TEST_SUITE_P(AsciiLetters,
+ UTF16Test,
+ ::testing::ValuesIn({
+ UTF16Case{{'a'}, {{C('a'), 1}}},
+ UTF16Case{{'a', 'b', 'c'}, {{C('a'), 1}, {C('b'), 1}, {C('c'), 1}}},
+ UTF16Case{{'d', 'e', 'f'}, {{C('d'), 1}, {C('e'), 1}, {C('f'), 1}}},
+ UTF16Case{{'g', 'h'}, {{C('g'), 1}, {C('h'), 1}}},
+ UTF16Case{{'i', 'j'}, {{C('i'), 1}, {C('j'), 1}}},
+ UTF16Case{{'k', 'l', 'm'}, {{C('k'), 1}, {C('l'), 1}, {C('m'), 1}}},
+ UTF16Case{{'n', 'o', 'p'}, {{C('n'), 1}, {C('o'), 1}, {C('p'), 1}}},
+ UTF16Case{{'q', 'r'}, {{C('q'), 1}, {C('r'), 1}}},
+ UTF16Case{{'s', 't', 'u'}, {{C('s'), 1}, {C('t'), 1}, {C('u'), 1}}},
+ UTF16Case{{'v', 'w'}, {{C('v'), 1}, {C('w'), 1}}},
+ UTF16Case{{'x', 'y', 'z'}, {{C('x'), 1}, {C('y'), 1}, {C('z'), 1}}},
+ UTF16Case{{'A'}, {{C('A'), 1}}},
+ UTF16Case{{'A', 'B', 'C'}, {{C('A'), 1}, {C('B'), 1}, {C('C'), 1}}},
+ UTF16Case{{'D', 'E', 'F'}, {{C('D'), 1}, {C('E'), 1}, {C('F'), 1}}},
+ UTF16Case{{'G', 'H'}, {{C('G'), 1}, {C('H'), 1}}},
+ UTF16Case{{'I', 'J'}, {{C('I'), 1}, {C('J'), 1}}},
+ UTF16Case{{'K', 'L', 'M'}, {{C('K'), 1}, {C('L'), 1}, {C('M'), 1}}},
+ UTF16Case{{'N', 'O', 'P'}, {{C('N'), 1}, {C('O'), 1}, {C('P'), 1}}},
+ UTF16Case{{'Q', 'R'}, {{C('Q'), 1}, {C('R'), 1}}},
+ UTF16Case{{'S', 'T', 'U'}, {{C('S'), 1}, {C('T'), 1}, {C('U'), 1}}},
+ UTF16Case{{'V', 'W'}, {{C('V'), 1}, {C('W'), 1}}},
+ UTF16Case{{'X', 'Y', 'Z'}, {{C('X'), 1}, {C('Y'), 1}, {C('Z'), 1}}},
+ }));
+
+INSTANTIATE_TEST_SUITE_P(AsciiNumbers,
+ UTF16Test,
+ ::testing::ValuesIn({
+ UTF16Case{{'0', '1', '2'}, {{C('0'), 1}, {C('1'), 1}, {C('2'), 1}}},
+ UTF16Case{{'3', '4', '5'}, {{C('3'), 1}, {C('4'), 1}, {C('5'), 1}}},
+ UTF16Case{{'6', '7', '8'}, {{C('6'), 1}, {C('7'), 1}, {C('8'), 1}}},
+ UTF16Case{{'9'}, {{C('9'), 1}}},
+ }));
+
+INSTANTIATE_TEST_SUITE_P(AsciiSymbols,
+ UTF16Test,
+ ::testing::ValuesIn({
+ UTF16Case{{'!', '"', '#'}, {{C('!'), 1}, {C('"'), 1}, {C('#'), 1}}},
+ UTF16Case{{'$', '%', '&'}, {{C('$'), 1}, {C('%'), 1}, {C('&'), 1}}},
+ UTF16Case{{'\'', '(', ')'}, {{C('\''), 1}, {C('('), 1}, {C(')'), 1}}},
+ UTF16Case{{'*', ',', '-'}, {{C('*'), 1}, {C(','), 1}, {C('-'), 1}}},
+ UTF16Case{{'/', '`', '@'}, {{C('/'), 1}, {C('`'), 1}, {C('@'), 1}}},
+ UTF16Case{{'^', '\\', '['}, {{C('^'), 1}, {C('\\'), 1}, {C('['), 1}}},
+ UTF16Case{{']', '_', '|'}, {{C(']'), 1}, {C('_'), 1}, {C('|'), 1}}},
+ UTF16Case{{'{', '}'}, {{C('{'), 1}, {C('}'), 1}}},
+ }));
+
+INSTANTIATE_TEST_SUITE_P(
+ AsciiSpecial,
+ UTF16Test,
+ ::testing::ValuesIn({
+ UTF16Case{{}, {}},
+ UTF16Case{{' ', '\t', '\n'}, {{C(' '), 1}, {C('\t'), 1}, {C('\n'), 1}}},
+ UTF16Case{{'\a', '\b', '\f'}, {{C('\a'), 1}, {C('\b'), 1}, {C('\f'), 1}}},
+ UTF16Case{{'\n', '\r', '\t'}, {{C('\n'), 1}, {C('\r'), 1}, {C('\t'), 1}}},
+ UTF16Case{{'\v'}, {{C('\v'), 1}}},
+ }));
+
+INSTANTIATE_TEST_SUITE_P(Hindi,
+ UTF16Test,
+ ::testing::ValuesIn({UTF16Case{
+ // नमस्ते दुनिया
+ {
+ 0x0928,
+ 0x092e,
+ 0x0938,
+ 0x094d,
+ 0x0924,
+ 0x0947,
+ 0x0020,
+ 0x0926,
+ 0x0941,
+ 0x0928,
+ 0x093f,
+ 0x092f,
+ 0x093e,
+ },
+ {
+ {C(0x0928), 1}, // न
+ {C(0x092e), 1}, // म
+ {C(0x0938), 1}, // स
+ {C(0x094d), 1}, // ् //
+ {C(0x0924), 1}, // त
+ {C(0x0947), 1}, // े //
+ {C(' '), 1},
+ {C(0x0926), 1}, // द
+ {C(0x0941), 1}, // ु //
+ {C(0x0928), 1}, // न
+ {C(0x093f), 1}, // ि //
+ {C(0x092f), 1}, // य
+ {C(0x093e), 1}, // ा //
+ },
+ }}));
+
+INSTANTIATE_TEST_SUITE_P(Mandarin,
+ UTF16Test,
+ ::testing::ValuesIn({UTF16Case{
+ // 你好世界
+ {0x4f60, 0x597d, 0x4e16, 0x754c},
+ {
+ {C(0x4f60), 1}, // 你
+ {C(0x597d), 1}, // 好
+ {C(0x4e16), 1}, // 世
+ {C(0x754c), 1}, // 界
+ },
+ }}));
+
+INSTANTIATE_TEST_SUITE_P(Japanese,
+ UTF16Test,
+ ::testing::ValuesIn({UTF16Case{
+ // こんにちは世界
+ {
+ 0x3053,
+ 0x3093,
+ 0x306b,
+ 0x3061,
+ 0x306f,
+ 0x4e16,
+ 0x754c,
+ },
+ {
+ {C(0x3053), 1}, // こ
+ {C(0x3093), 1}, // ん
+ {C(0x306B), 1}, // に
+ {C(0x3061), 1}, // ち
+ {C(0x306F), 1}, // は
+ {C(0x4E16), 1}, // 世
+ {C(0x754C), 1}, // 界
+ },
+ }}));
+
+INSTANTIATE_TEST_SUITE_P(Korean,
+ UTF16Test,
+ ::testing::ValuesIn({UTF16Case{
+ // 안녕하세요 세계
+ {
+ 0xc548,
+ 0xb155,
+ 0xd558,
+ 0xc138,
+ 0xc694,
+ 0x0020,
+ 0xc138,
+ 0xacc4,
+ },
+ {
+ {C(0xc548), 1}, // 안
+ {C(0xb155), 1}, // 녕
+ {C(0xd558), 1}, // 하
+ {C(0xc138), 1}, // 세
+ {C(0xc694), 1}, // 요
+ {C(' '), 1}, //
+ {C(0xc138), 1}, // 세
+ {C(0xacc4), 1}, // 계
+ },
+ }}));
+
+INSTANTIATE_TEST_SUITE_P(Emoji,
+ UTF16Test,
+ ::testing::ValuesIn({UTF16Case{
+ // 👋🌎
+ {0xd83d, 0xdc4b, 0xd83c, 0xdf0e},
+ {
+ {C(0x1f44b), 2}, // 👋
+ {C(0x1f30e), 2}, // 🌎
+ },
+ }}));
+
+INSTANTIATE_TEST_SUITE_P(Random,
+ UTF16Test,
+ ::testing::ValuesIn({UTF16Case{
+ // Øⓑꚫ쁹Ǵ𐌒岾🥍ⴵ㍨又ᮗ
+ {
+ 0x00d8,
+ 0x24d1,
+ 0xa6ab,
+ 0xc079,
+ 0x01f4,
+ 0xd800,
+ 0xdf12,
+ 0x5cbe,
+ 0xd83e,
+ 0xdd4d,
+ 0x2d35,
+ 0x3368,
+ 0x53c8,
+ 0x1b97,
+ },
+ {
+ {C(0x000d8), 1}, // Ø
+ {C(0x024d1), 1}, // ⓑ
+ {C(0x0a6ab), 1}, // ꚫ
+ {C(0x0c079), 1}, // 쁹
+ {C(0x001f4), 1}, // Ǵ
+ {C(0x10312), 2}, // 𐌒
+ {C(0x05cbe), 1}, // 岾
+ {C(0x1f94d), 2}, // 🥍
+ {C(0x02d35), 1}, // ⴵ
+ {C(0x03368), 1}, // ㍨
+ {C(0x053c8), 1}, // 又
+ {C(0x01b97), 1}, // ᮗ
+ },
+ }}));
+
+////////////////////////////////////////////////////////////////////////////////
+// DecodeUTF16 invalid tests
+////////////////////////////////////////////////////////////////////////////////
+class DecodeUTF16InvalidTest : public testing::TestWithParam<std::vector<uint16_t>> {};
+
+TEST_P(DecodeUTF16InvalidTest, Invalid) {
+ auto [code_point, width] = utf16::Decode(GetParam().data(), GetParam().size());
+ EXPECT_EQ(code_point, CodePoint(0));
+ EXPECT_EQ(width, 0u);
+}
+INSTANTIATE_TEST_SUITE_P(Invalid,
+ DecodeUTF16InvalidTest,
+ ::testing::ValuesIn(std::vector<std::vector<uint16_t>>{
+ {0xdc00}, // surrogate, end-of-stream
+ {0xdc00, 0x0040}, // surrogate, non-surrogate
+ }));
+
+} // namespace utf16_tests
} // namespace tint