Files
test/frontend/__tests__/utils/strings.spec.ts
Benjamin Falch 2bc741fb78
Some checks failed
Mark Stale PRs / stale (push) Has been cancelled
adding monkeytype
2026-04-23 13:53:44 +02:00

591 lines
21 KiB
TypeScript
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import { describe, it, expect, beforeEach, afterEach, vi } from "vitest";
import * as Strings from "../../src/ts/utils/strings";
describe("string utils", () => {
describe("highlightMatches", () => {
const shouldHighlight = [
{
description: "word at the beginning",
text: "Start here.",
matches: ["Start"],
expected: '<span class="highlight">Start</span> here.',
},
{
description: "word at the end",
text: "reach the end",
matches: ["end"],
expected: 'reach the <span class="highlight">end</span>',
},
{
description: "mutliple matches",
text: "one two three",
matches: ["one", "three"],
expected:
'<span class="highlight">one</span> two <span class="highlight">three</span>',
},
{
description: "repeated matches",
text: "one two two",
matches: ["two"],
expected:
'one <span class="highlight">two</span> <span class="highlight">two</span>',
},
{
description: "longest possible match",
text: "abc ab",
matches: ["ab", "abc"],
expected:
'<span class="highlight">abc</span> <span class="highlight">ab</span>',
},
{
description: "if wrapped in parenthesis",
text: "(test)",
matches: ["test"],
expected: '(<span class="highlight">test</span>)',
},
{
description: "if wrapped in commas",
text: ",test,",
matches: ["test"],
expected: ',<span class="highlight">test</span>,',
},
{
description: "if wrapped in underscores",
text: "_test_",
matches: ["test"],
expected: '_<span class="highlight">test</span>_',
},
{
description: "words in russian",
text: "Привет, мир!",
matches: ["Привет", "мир"],
expected:
'<span class="highlight">Привет</span>, <span class="highlight">мир</span>!',
},
{
description: "words with chinese punctuation",
text: "你好,世界!",
matches: ["你好", "世界"],
expected:
'<span class="highlight">你好</span><span class="highlight">世界</span>',
},
{
description: "words with arabic punctuation",
text: "؟مرحبا، بكم؛",
matches: ["مرحبا", "بكم"],
expected:
'؟<span class="highlight">مرحبا</span>، <span class="highlight">بكم</span>؛',
},
{
description: "standalone numbers",
text: "My number is 1234.",
matches: ["1234"],
expected: 'My number is <span class="highlight">1234</span>.',
},
];
const shouldNotHighlight = [
{
description: "a match within a longer word",
text: "together",
matches: ["get"],
},
{
description: "a match with leading letters",
text: "welcome",
matches: ["come"],
},
{
description: "a match with trailing letters",
text: "comets",
matches: ["come"],
},
{
description: "japanese matches within longer words",
text: "こんにちは世界",
matches: ["こんにちは"],
},
{
description: "numbers within words",
text: "abc1234def",
matches: ["1234"],
},
];
const returnOriginal = [
{
description: "if matches is an empty array",
text: "Nothing to match.",
matches: [],
},
{
description: "if matches has an empty string only",
text: "Nothing to match.",
matches: [""],
},
{
description: "if no matches found in text",
text: "Hello world.",
matches: ["absent"],
},
{
description: "if text is empty",
text: "",
matches: ["anything"],
},
];
it.each(shouldHighlight)(
"should highlight $description",
({ text, matches, expected }) => {
expect(Strings.highlightMatches(text, matches)).toBe(expected);
},
);
it.each(shouldNotHighlight)(
"should not highlight $description",
({ text, matches }) => {
expect(Strings.highlightMatches(text, matches)).toBe(text);
},
);
it.each(returnOriginal)(
"should return original text $description",
({ text, matches }) => {
expect(Strings.highlightMatches(text, matches)).toBe(text);
},
);
});
describe("splitIntoCharacters", () => {
it("splits regular characters", () => {
expect(Strings.splitIntoCharacters("abc")).toEqual(["a", "b", "c"]);
});
it("splits characters outside of the bmp", () => {
expect(Strings.splitIntoCharacters("t𐑩e")).toEqual(["t", "𐑩", "e"]);
});
});
describe("replaceControlCharacters", () => {
it.each([
// Basic tab conversions
["\\t", "\t", "single tab"],
["\\t\\t\\t", "\t\t\t", "multiple tabs"],
["hello\\tworld", "hello\tworld", "tab between words"],
["\\tstart", "\tstart", "tab at start"],
["end\\t", "end\t", "tab at end"],
// Basic newline conversions
["\\n", " \n", "single newline with space prefix"],
["hello\\nworld", "hello \nworld", "newline between words with space"],
["\\nstart", " \nstart", "newline at start with space"],
["end\\n", "end \n", "newline at end with space"],
// Complex newline handling (after first two regexes)
["a\\n", "a \n", "single char followed by newline gets space prefix"],
["hello\\n", "hello \n", "word followed by newline gets space prefix"],
// Double-escaped sequences (should become single-escaped)
["\\\\t", "\\t", "double-escaped tab becomes single-escaped"],
[
"\\\\n",
"\\ \n",
"double-escaped newline becomes backslash + space + newline",
],
["\\\\t\\\\n", "\\t\\ \n", "multiple double-escaped sequences"],
// Mixed scenarios
[
"\\t\\n\\\\t",
"\t \n\\t",
"mix of tab, newline, and double-escaped tab",
],
[
"hello\\tworld\\ntest\\\\t",
"hello\tworld \ntest\\t",
"complex mixed scenario",
],
// Edge cases
["", "", "empty string"],
["no escapes", "no escapes", "string with no escape sequences"],
["\\", "\\", "single backslash"],
["\\x", "\\x", "backslash with non-control character"],
// Escaped backslashes that don't precede control chars
["\\\\", "\\\\", "double backslash not followed by control char"],
["\\\\x", "\\\\x", "double backslash followed by non-control char"],
])(
"should convert %s to %s (%s)",
(input: string, expected: string, _description: string) => {
expect(Strings.replaceControlCharacters(input)).toBe(expected);
},
);
});
describe("hasRTLCharacters", () => {
it.each([
// LTR characters should return false
[false, "hello", "basic Latin text"],
[false, "world123", "Latin text with numbers"],
[false, "test!", "Latin text with punctuation"],
[false, "ABC", "uppercase Latin text"],
[false, "", "empty string"],
[false, "123", "numbers only"],
[false, "!@#$%", "punctuation and symbols only"],
[false, " ", "whitespace only"],
// Common LTR scripts
[false, "Здравствуй", "Cyrillic text"],
[false, "Bonjour", "Latin with accents"],
[false, "Καλημέρα", "Greek text"],
[false, "こんにちは", "Japanese Hiragana"],
[false, "你好", "Chinese characters"],
[false, "안녕하세요", "Korean text"],
// RTL characters should return true - Arabic
[true, "مرحبا", "Arabic text"],
[true, "السلام", "Arabic phrase"],
[true, "العربية", "Arabic word"],
[true, "٠١٢٣٤٥٦٧٨٩", "Arabic-Indic digits"],
// RTL characters should return true - Hebrew
[true, "שלום", "Hebrew text"],
[true, "עברית", "Hebrew word"],
[true, "ברוך", "Hebrew name"],
// RTL characters should return true - Persian/Farsi
[true, "سلام", "Persian text"],
[true, "فارسی", "Persian word"],
// Mixed content (should return true if ANY RTL characters are present)
[true, "hello مرحبا", "mixed LTR and Arabic"],
[true, "123 שלום", "numbers and Hebrew"],
[true, "test سلام!", "Latin, Persian, and punctuation"],
[true, "مرحبا123", "Arabic with numbers"],
[true, "hello؟", "Latin with Arabic punctuation"],
// Edge cases with various Unicode ranges
[false, "𝕳𝖊𝖑𝖑𝖔", "mathematical bold text (LTR)"],
[false, "🌍🌎🌏", "emoji"],
] as const)(
"should return %s for word '%s' (%s)",
(expected: boolean, word: string, _description: string) => {
expect(Strings.__testing.hasRTLCharacters(word)[0]).toBe(expected);
},
);
});
describe("isWordRightToLeft", () => {
beforeEach(() => {
Strings.clearWordDirectionCache();
});
it.each([
// Basic functionality - should use hasRTLCharacters result when word has core content
[false, "hello", false, "LTR word in LTR language"],
[
false,
"hello",
true,
"LTR word in RTL language (word direction overrides language)",
],
[
true,
"مرحبا",
false,
"RTL word in LTR language (word direction overrides language)",
],
[true, "مرحبا", true, "RTL word in RTL language"],
// Punctuation stripping behavior
[false, "hello!", false, "LTR word with trailing punctuation"],
[false, "!hello", false, "LTR word with leading punctuation"],
[false, "!hello!", false, "LTR word with surrounding punctuation"],
[true, "مرحبا؟", false, "RTL word with trailing punctuation"],
[true, "؟مرحبا", false, "RTL word with leading punctuation"],
[true, "؟مرحبا؟", false, "RTL word with surrounding punctuation"],
// Fallback to language direction for empty/neutral content
[false, "", false, "empty string falls back to LTR language"],
[true, "", true, "empty string falls back to RTL language"],
[false, "!!!", false, "punctuation only falls back to LTR language"],
[true, "!!!", true, "punctuation only falls back to RTL language"],
[false, " ", false, "whitespace only falls back to LTR language"],
[true, " ", true, "whitespace only falls back to RTL language"],
// Numbers behavior (numbers are neutral, follow hasRTLCharacters detection)
[false, "123", false, "regular digits are not RTL"],
[false, "123", true, "regular digits are not RTL regardless of language"],
[true, "١٢٣", false, "Arabic-Indic digits are detected as RTL"],
[true, "١٢٣", true, "Arabic-Indic digits are detected as RTL"],
] as const)(
"should return %s for word '%s' with languageRTL=%s (%s)",
(
expected: boolean,
word: string,
languageRTL: boolean,
_description: string,
) => {
expect(Strings.isWordRightToLeft(word, languageRTL)[0]).toBe(expected);
},
);
it("should return languageRTL for undefined word", () => {
expect(Strings.isWordRightToLeft(undefined, false)[0]).toBe(false);
expect(Strings.isWordRightToLeft(undefined, true)[0]).toBe(true);
});
// testing reverseDirection
it("should return true for LTR word with reversed direction", () => {
expect(Strings.isWordRightToLeft("hello", false, true)[0]).toBe(true);
expect(Strings.isWordRightToLeft("hello", true, true)[0]).toBe(true);
});
it("should return false for RTL word with reversed direction", () => {
expect(Strings.isWordRightToLeft("مرحبا", true, true)[0]).toBe(false);
expect(Strings.isWordRightToLeft("مرحبا", false, true)[0]).toBe(false);
});
it("should return reverse of languageRTL for undefined word with reversed direction", () => {
expect(Strings.isWordRightToLeft(undefined, false, true)[0]).toBe(true);
expect(Strings.isWordRightToLeft(undefined, true, true)[0]).toBe(false);
});
describe("caching", () => {
let mapGetSpy: ReturnType<typeof vi.spyOn>;
let mapSetSpy: ReturnType<typeof vi.spyOn>;
let mapClearSpy: ReturnType<typeof vi.spyOn>;
beforeEach(() => {
mapGetSpy = vi.spyOn(Map.prototype, "get");
mapSetSpy = vi.spyOn(Map.prototype, "set");
mapClearSpy = vi.spyOn(Map.prototype, "clear");
});
afterEach(() => {
mapGetSpy.mockRestore();
mapSetSpy.mockRestore();
mapClearSpy.mockRestore();
});
it("should use cache for repeated calls", () => {
// First call should cache the result (cache miss)
const result1 = Strings.isWordRightToLeft("hello", false);
expect(result1[0]).toBe(false);
expect(mapSetSpy).toHaveBeenCalledWith("hello", [false, 0]);
// Reset spies to check second call
mapGetSpy.mockClear();
mapSetSpy.mockClear();
// Second call should use cache (cache hit)
const result2 = Strings.isWordRightToLeft("hello", false);
expect(result2[0]).toBe(false);
expect(mapGetSpy).toHaveBeenCalledWith("hello");
expect(mapSetSpy).not.toHaveBeenCalled(); // Should not set again
// Cache should work regardless of language direction for same word
mapGetSpy.mockClear();
mapSetSpy.mockClear();
const result3 = Strings.isWordRightToLeft("hello", true);
expect(result3[0]).toBe(false); // Still false because "hello" is LTR regardless of language
expect(mapGetSpy).toHaveBeenCalledWith("hello");
expect(mapSetSpy).not.toHaveBeenCalled(); // Should not set again
});
it("should cache based on core word without punctuation", () => {
// First call should cache the result for core "hello"
const result1 = Strings.isWordRightToLeft("hello", false);
expect(result1[0]).toBe(false);
expect(mapSetSpy).toHaveBeenCalledWith("hello", [false, 0]);
mapGetSpy.mockClear();
mapSetSpy.mockClear();
// These should all use the same cache entry since they have the same core
const result2 = Strings.isWordRightToLeft("hello!", false);
expect(result2[0]).toBe(false);
expect(mapGetSpy).toHaveBeenCalledWith("hello");
expect(mapSetSpy).not.toHaveBeenCalled();
mapGetSpy.mockClear();
mapSetSpy.mockClear();
const result3 = Strings.isWordRightToLeft("!hello", false);
expect(result3[0]).toBe(false);
expect(mapGetSpy).toHaveBeenCalledWith("hello");
expect(mapSetSpy).not.toHaveBeenCalled();
mapGetSpy.mockClear();
mapSetSpy.mockClear();
const result4 = Strings.isWordRightToLeft("!hello!", false);
expect(result4[0]).toBe(false);
expect(mapGetSpy).toHaveBeenCalledWith("hello");
expect(mapSetSpy).not.toHaveBeenCalled();
});
it("should handle cache clearing", () => {
// Cache a result
Strings.isWordRightToLeft("test", false);
expect(mapSetSpy).toHaveBeenCalledWith("test", [false, 0]);
// Clear cache
Strings.clearWordDirectionCache();
expect(mapClearSpy).toHaveBeenCalled();
mapGetSpy.mockClear();
mapSetSpy.mockClear();
mapClearSpy.mockClear();
// Should work normally after cache clear (cache miss again)
const result = Strings.isWordRightToLeft("test", false);
expect(result[0]).toBe(false);
expect(mapSetSpy).toHaveBeenCalledWith("test", [false, 0]);
});
it("should demonstrate cache miss vs cache hit behavior", () => {
// Test cache miss - first time seeing this word
const result1 = Strings.isWordRightToLeft("unique", false);
expect(result1[0]).toBe(false);
expect(mapGetSpy).toHaveBeenCalledWith("unique");
expect(mapSetSpy).toHaveBeenCalledWith("unique", [false, 0]);
mapGetSpy.mockClear();
mapSetSpy.mockClear();
// Test cache hit - same word again
const result2 = Strings.isWordRightToLeft("unique", false);
expect(result2[0]).toBe(false);
expect(mapGetSpy).toHaveBeenCalledWith("unique");
expect(mapSetSpy).not.toHaveBeenCalled(); // No cache set on hit
mapGetSpy.mockClear();
mapSetSpy.mockClear();
// Test cache miss - different word
const result3 = Strings.isWordRightToLeft("different", false);
expect(result3[0]).toBe(false);
expect(mapGetSpy).toHaveBeenCalledWith("different");
expect(mapSetSpy).toHaveBeenCalledWith("different", [false, 0]);
});
});
});
describe("isSpace", () => {
it.each([
// Should return true for directly typable spaces
[" ", 0x0020, "regular space", true],
["\u2002", 0x2002, "en space", true],
["\u2003", 0x2003, "em space", true],
["\u2009", 0x2009, "thin space", true],
[" ", 0x3000, "ideographic space", true],
["\u00A0", 0x00a0, "non-breaking space", true],
["\u2007", 0x2007, "figure space", true],
["\u2008", 0x2008, "punctuation space", true],
["\u200A", 0x200a, "hair space", true],
["", 0x200b, "zero-width space", true],
// Should return false for other characters
["\t", 0x0009, "tab", false],
["a", 0x0061, "letter a", false],
["A", 0x0041, "letter A", false],
["1", 0x0031, "digit 1", false],
["!", 0x0021, "exclamation mark", false],
["\n", 0x000a, "newline", false],
["\r", 0x000d, "carriage return", false],
// Edge cases
["", null, "empty string", false],
[" ", null, "two spaces", false],
["ab", null, "two letters", false],
])(
"should return %s for %s (U+%s - %s)",
(
char: string,
expectedCodePoint: number | null,
description: string,
expected: boolean,
) => {
if (expectedCodePoint !== null && char.length === 1) {
expect(char.codePointAt(0)).toBe(expectedCodePoint);
}
expect(Strings.isSpace(char)).toBe(expected);
},
);
});
describe("areCharactersVisuallyEqual", () => {
it("should return true for identical characters", () => {
expect(Strings.areCharactersVisuallyEqual("a", "a")).toBe(true);
expect(Strings.areCharactersVisuallyEqual("!", "!")).toBe(true);
});
it("should return false for different characters", () => {
expect(Strings.areCharactersVisuallyEqual("a", "b")).toBe(false);
expect(Strings.areCharactersVisuallyEqual("!", "?")).toBe(false);
});
it("should return true for equivalent apostrophe variants", () => {
expect(Strings.areCharactersVisuallyEqual("'", "'")).toBe(true);
expect(Strings.areCharactersVisuallyEqual("'", "'")).toBe(true);
expect(Strings.areCharactersVisuallyEqual("'", "ʼ")).toBe(true);
});
it("should return true for equivalent quote variants", () => {
expect(Strings.areCharactersVisuallyEqual('"', '"')).toBe(true);
expect(Strings.areCharactersVisuallyEqual('"', '"')).toBe(true);
expect(Strings.areCharactersVisuallyEqual('"', "„")).toBe(true);
});
it("should return true for equivalent dash variants", () => {
expect(Strings.areCharactersVisuallyEqual("-", "")).toBe(true);
expect(Strings.areCharactersVisuallyEqual("-", "—")).toBe(true);
expect(Strings.areCharactersVisuallyEqual("", "—")).toBe(true);
});
it("should return true for equivalent comma variants", () => {
expect(Strings.areCharactersVisuallyEqual(",", "")).toBe(true);
});
it("should return false for characters from different equivalence groups", () => {
expect(Strings.areCharactersVisuallyEqual("'", '"')).toBe(false);
expect(Strings.areCharactersVisuallyEqual("-", "'")).toBe(false);
expect(Strings.areCharactersVisuallyEqual(",", '"')).toBe(false);
});
describe("should check russian specific equivalences", () => {
it.each([
{
desc: "е and ё are equivalent",
char1: "е",
char2: "ё",
expected: true,
},
{
desc: "e and ё are equivalent",
char1: "e",
char2: "ё",
expected: true,
},
{
desc: "е and e are equivalent",
char1: "е",
char2: "e",
expected: true,
},
{
desc: "non-equivalent characters return false",
char1: "а",
char2: "б",
expected: false,
},
{
desc: "non-equivalent characters return false (2)",
char1: "a",
char2: "б",
expected: false,
},
])("$desc", ({ char1, char2, expected }) => {
expect(
Strings.areCharactersVisuallyEqual(char1, char2, "russian"),
).toBe(expected);
});
});
});
});