import { describe, it, expect, beforeEach, afterEach, vi } from "vitest";
import * as Strings from "../../src/ts/utils/strings";
describe("string utils", () => {
describe("highlightMatches", () => {
const shouldHighlight = [
{
description: "word at the beginning",
text: "Start here.",
matches: ["Start"],
expected: 'Start here.',
},
{
description: "word at the end",
text: "reach the end",
matches: ["end"],
expected: 'reach the end',
},
{
description: "mutliple matches",
text: "one two three",
matches: ["one", "three"],
expected:
'one two three',
},
{
description: "repeated matches",
text: "one two two",
matches: ["two"],
expected:
'one two two',
},
{
description: "longest possible match",
text: "abc ab",
matches: ["ab", "abc"],
expected:
'abc ab',
},
{
description: "if wrapped in parenthesis",
text: "(test)",
matches: ["test"],
expected: '(test)',
},
{
description: "if wrapped in commas",
text: ",test,",
matches: ["test"],
expected: ',test,',
},
{
description: "if wrapped in underscores",
text: "_test_",
matches: ["test"],
expected: '_test_',
},
{
description: "words in russian",
text: "Привет, мир!",
matches: ["Привет", "мир"],
expected:
'Привет, мир!',
},
{
description: "words with chinese punctuation",
text: "你好,世界!",
matches: ["你好", "世界"],
expected:
'你好,世界!',
},
{
description: "words with arabic punctuation",
text: "؟مرحبا، بكم؛",
matches: ["مرحبا", "بكم"],
expected:
'؟مرحبا، بكم؛',
},
{
description: "standalone numbers",
text: "My number is 1234.",
matches: ["1234"],
expected: 'My number is 1234.',
},
];
const shouldNotHighlight = [
{
description: "a match within a longer word",
text: "together",
matches: ["get"],
},
{
description: "a match with leading letters",
text: "welcome",
matches: ["come"],
},
{
description: "a match with trailing letters",
text: "comets",
matches: ["come"],
},
{
description: "japanese matches within longer words",
text: "こんにちは世界",
matches: ["こんにちは"],
},
{
description: "numbers within words",
text: "abc1234def",
matches: ["1234"],
},
];
const returnOriginal = [
{
description: "if matches is an empty array",
text: "Nothing to match.",
matches: [],
},
{
description: "if matches has an empty string only",
text: "Nothing to match.",
matches: [""],
},
{
description: "if no matches found in text",
text: "Hello world.",
matches: ["absent"],
},
{
description: "if text is empty",
text: "",
matches: ["anything"],
},
];
it.each(shouldHighlight)(
"should highlight $description",
({ text, matches, expected }) => {
expect(Strings.highlightMatches(text, matches)).toBe(expected);
},
);
it.each(shouldNotHighlight)(
"should not highlight $description",
({ text, matches }) => {
expect(Strings.highlightMatches(text, matches)).toBe(text);
},
);
it.each(returnOriginal)(
"should return original text $description",
({ text, matches }) => {
expect(Strings.highlightMatches(text, matches)).toBe(text);
},
);
});
describe("splitIntoCharacters", () => {
it("splits regular characters", () => {
expect(Strings.splitIntoCharacters("abc")).toEqual(["a", "b", "c"]);
});
it("splits characters outside of the bmp", () => {
expect(Strings.splitIntoCharacters("t𐑩e")).toEqual(["t", "𐑩", "e"]);
});
});
describe("replaceControlCharacters", () => {
it.each([
// Basic tab conversions
["\\t", "\t", "single tab"],
["\\t\\t\\t", "\t\t\t", "multiple tabs"],
["hello\\tworld", "hello\tworld", "tab between words"],
["\\tstart", "\tstart", "tab at start"],
["end\\t", "end\t", "tab at end"],
// Basic newline conversions
["\\n", " \n", "single newline with space prefix"],
["hello\\nworld", "hello \nworld", "newline between words with space"],
["\\nstart", " \nstart", "newline at start with space"],
["end\\n", "end \n", "newline at end with space"],
// Complex newline handling (after first two regexes)
["a\\n", "a \n", "single char followed by newline gets space prefix"],
["hello\\n", "hello \n", "word followed by newline gets space prefix"],
// Double-escaped sequences (should become single-escaped)
["\\\\t", "\\t", "double-escaped tab becomes single-escaped"],
[
"\\\\n",
"\\ \n",
"double-escaped newline becomes backslash + space + newline",
],
["\\\\t\\\\n", "\\t\\ \n", "multiple double-escaped sequences"],
// Mixed scenarios
[
"\\t\\n\\\\t",
"\t \n\\t",
"mix of tab, newline, and double-escaped tab",
],
[
"hello\\tworld\\ntest\\\\t",
"hello\tworld \ntest\\t",
"complex mixed scenario",
],
// Edge cases
["", "", "empty string"],
["no escapes", "no escapes", "string with no escape sequences"],
["\\", "\\", "single backslash"],
["\\x", "\\x", "backslash with non-control character"],
// Escaped backslashes that don't precede control chars
["\\\\", "\\\\", "double backslash not followed by control char"],
["\\\\x", "\\\\x", "double backslash followed by non-control char"],
])(
"should convert %s to %s (%s)",
(input: string, expected: string, _description: string) => {
expect(Strings.replaceControlCharacters(input)).toBe(expected);
},
);
});
describe("hasRTLCharacters", () => {
it.each([
// LTR characters should return false
[false, "hello", "basic Latin text"],
[false, "world123", "Latin text with numbers"],
[false, "test!", "Latin text with punctuation"],
[false, "ABC", "uppercase Latin text"],
[false, "", "empty string"],
[false, "123", "numbers only"],
[false, "!@#$%", "punctuation and symbols only"],
[false, " ", "whitespace only"],
// Common LTR scripts
[false, "Здравствуй", "Cyrillic text"],
[false, "Bonjour", "Latin with accents"],
[false, "Καλημέρα", "Greek text"],
[false, "こんにちは", "Japanese Hiragana"],
[false, "你好", "Chinese characters"],
[false, "안녕하세요", "Korean text"],
// RTL characters should return true - Arabic
[true, "مرحبا", "Arabic text"],
[true, "السلام", "Arabic phrase"],
[true, "العربية", "Arabic word"],
[true, "٠١٢٣٤٥٦٧٨٩", "Arabic-Indic digits"],
// RTL characters should return true - Hebrew
[true, "שלום", "Hebrew text"],
[true, "עברית", "Hebrew word"],
[true, "ברוך", "Hebrew name"],
// RTL characters should return true - Persian/Farsi
[true, "سلام", "Persian text"],
[true, "فارسی", "Persian word"],
// Mixed content (should return true if ANY RTL characters are present)
[true, "hello مرحبا", "mixed LTR and Arabic"],
[true, "123 שלום", "numbers and Hebrew"],
[true, "test سلام!", "Latin, Persian, and punctuation"],
[true, "مرحبا123", "Arabic with numbers"],
[true, "hello؟", "Latin with Arabic punctuation"],
// Edge cases with various Unicode ranges
[false, "𝕳𝖊𝖑𝖑𝖔", "mathematical bold text (LTR)"],
[false, "🌍🌎🌏", "emoji"],
] as const)(
"should return %s for word '%s' (%s)",
(expected: boolean, word: string, _description: string) => {
expect(Strings.__testing.hasRTLCharacters(word)[0]).toBe(expected);
},
);
});
describe("isWordRightToLeft", () => {
beforeEach(() => {
Strings.clearWordDirectionCache();
});
it.each([
// Basic functionality - should use hasRTLCharacters result when word has core content
[false, "hello", false, "LTR word in LTR language"],
[
false,
"hello",
true,
"LTR word in RTL language (word direction overrides language)",
],
[
true,
"مرحبا",
false,
"RTL word in LTR language (word direction overrides language)",
],
[true, "مرحبا", true, "RTL word in RTL language"],
// Punctuation stripping behavior
[false, "hello!", false, "LTR word with trailing punctuation"],
[false, "!hello", false, "LTR word with leading punctuation"],
[false, "!hello!", false, "LTR word with surrounding punctuation"],
[true, "مرحبا؟", false, "RTL word with trailing punctuation"],
[true, "؟مرحبا", false, "RTL word with leading punctuation"],
[true, "؟مرحبا؟", false, "RTL word with surrounding punctuation"],
// Fallback to language direction for empty/neutral content
[false, "", false, "empty string falls back to LTR language"],
[true, "", true, "empty string falls back to RTL language"],
[false, "!!!", false, "punctuation only falls back to LTR language"],
[true, "!!!", true, "punctuation only falls back to RTL language"],
[false, " ", false, "whitespace only falls back to LTR language"],
[true, " ", true, "whitespace only falls back to RTL language"],
// Numbers behavior (numbers are neutral, follow hasRTLCharacters detection)
[false, "123", false, "regular digits are not RTL"],
[false, "123", true, "regular digits are not RTL regardless of language"],
[true, "١٢٣", false, "Arabic-Indic digits are detected as RTL"],
[true, "١٢٣", true, "Arabic-Indic digits are detected as RTL"],
] as const)(
"should return %s for word '%s' with languageRTL=%s (%s)",
(
expected: boolean,
word: string,
languageRTL: boolean,
_description: string,
) => {
expect(Strings.isWordRightToLeft(word, languageRTL)[0]).toBe(expected);
},
);
it("should return languageRTL for undefined word", () => {
expect(Strings.isWordRightToLeft(undefined, false)[0]).toBe(false);
expect(Strings.isWordRightToLeft(undefined, true)[0]).toBe(true);
});
// testing reverseDirection
it("should return true for LTR word with reversed direction", () => {
expect(Strings.isWordRightToLeft("hello", false, true)[0]).toBe(true);
expect(Strings.isWordRightToLeft("hello", true, true)[0]).toBe(true);
});
it("should return false for RTL word with reversed direction", () => {
expect(Strings.isWordRightToLeft("مرحبا", true, true)[0]).toBe(false);
expect(Strings.isWordRightToLeft("مرحبا", false, true)[0]).toBe(false);
});
it("should return reverse of languageRTL for undefined word with reversed direction", () => {
expect(Strings.isWordRightToLeft(undefined, false, true)[0]).toBe(true);
expect(Strings.isWordRightToLeft(undefined, true, true)[0]).toBe(false);
});
describe("caching", () => {
let mapGetSpy: ReturnType;
let mapSetSpy: ReturnType;
let mapClearSpy: ReturnType;
beforeEach(() => {
mapGetSpy = vi.spyOn(Map.prototype, "get");
mapSetSpy = vi.spyOn(Map.prototype, "set");
mapClearSpy = vi.spyOn(Map.prototype, "clear");
});
afterEach(() => {
mapGetSpy.mockRestore();
mapSetSpy.mockRestore();
mapClearSpy.mockRestore();
});
it("should use cache for repeated calls", () => {
// First call should cache the result (cache miss)
const result1 = Strings.isWordRightToLeft("hello", false);
expect(result1[0]).toBe(false);
expect(mapSetSpy).toHaveBeenCalledWith("hello", [false, 0]);
// Reset spies to check second call
mapGetSpy.mockClear();
mapSetSpy.mockClear();
// Second call should use cache (cache hit)
const result2 = Strings.isWordRightToLeft("hello", false);
expect(result2[0]).toBe(false);
expect(mapGetSpy).toHaveBeenCalledWith("hello");
expect(mapSetSpy).not.toHaveBeenCalled(); // Should not set again
// Cache should work regardless of language direction for same word
mapGetSpy.mockClear();
mapSetSpy.mockClear();
const result3 = Strings.isWordRightToLeft("hello", true);
expect(result3[0]).toBe(false); // Still false because "hello" is LTR regardless of language
expect(mapGetSpy).toHaveBeenCalledWith("hello");
expect(mapSetSpy).not.toHaveBeenCalled(); // Should not set again
});
it("should cache based on core word without punctuation", () => {
// First call should cache the result for core "hello"
const result1 = Strings.isWordRightToLeft("hello", false);
expect(result1[0]).toBe(false);
expect(mapSetSpy).toHaveBeenCalledWith("hello", [false, 0]);
mapGetSpy.mockClear();
mapSetSpy.mockClear();
// These should all use the same cache entry since they have the same core
const result2 = Strings.isWordRightToLeft("hello!", false);
expect(result2[0]).toBe(false);
expect(mapGetSpy).toHaveBeenCalledWith("hello");
expect(mapSetSpy).not.toHaveBeenCalled();
mapGetSpy.mockClear();
mapSetSpy.mockClear();
const result3 = Strings.isWordRightToLeft("!hello", false);
expect(result3[0]).toBe(false);
expect(mapGetSpy).toHaveBeenCalledWith("hello");
expect(mapSetSpy).not.toHaveBeenCalled();
mapGetSpy.mockClear();
mapSetSpy.mockClear();
const result4 = Strings.isWordRightToLeft("!hello!", false);
expect(result4[0]).toBe(false);
expect(mapGetSpy).toHaveBeenCalledWith("hello");
expect(mapSetSpy).not.toHaveBeenCalled();
});
it("should handle cache clearing", () => {
// Cache a result
Strings.isWordRightToLeft("test", false);
expect(mapSetSpy).toHaveBeenCalledWith("test", [false, 0]);
// Clear cache
Strings.clearWordDirectionCache();
expect(mapClearSpy).toHaveBeenCalled();
mapGetSpy.mockClear();
mapSetSpy.mockClear();
mapClearSpy.mockClear();
// Should work normally after cache clear (cache miss again)
const result = Strings.isWordRightToLeft("test", false);
expect(result[0]).toBe(false);
expect(mapSetSpy).toHaveBeenCalledWith("test", [false, 0]);
});
it("should demonstrate cache miss vs cache hit behavior", () => {
// Test cache miss - first time seeing this word
const result1 = Strings.isWordRightToLeft("unique", false);
expect(result1[0]).toBe(false);
expect(mapGetSpy).toHaveBeenCalledWith("unique");
expect(mapSetSpy).toHaveBeenCalledWith("unique", [false, 0]);
mapGetSpy.mockClear();
mapSetSpy.mockClear();
// Test cache hit - same word again
const result2 = Strings.isWordRightToLeft("unique", false);
expect(result2[0]).toBe(false);
expect(mapGetSpy).toHaveBeenCalledWith("unique");
expect(mapSetSpy).not.toHaveBeenCalled(); // No cache set on hit
mapGetSpy.mockClear();
mapSetSpy.mockClear();
// Test cache miss - different word
const result3 = Strings.isWordRightToLeft("different", false);
expect(result3[0]).toBe(false);
expect(mapGetSpy).toHaveBeenCalledWith("different");
expect(mapSetSpy).toHaveBeenCalledWith("different", [false, 0]);
});
});
});
describe("isSpace", () => {
it.each([
// Should return true for directly typable spaces
[" ", 0x0020, "regular space", true],
["\u2002", 0x2002, "en space", true],
["\u2003", 0x2003, "em space", true],
["\u2009", 0x2009, "thin space", true],
[" ", 0x3000, "ideographic space", true],
["\u00A0", 0x00a0, "non-breaking space", true],
["\u2007", 0x2007, "figure space", true],
["\u2008", 0x2008, "punctuation space", true],
["\u200A", 0x200a, "hair space", true],
["", 0x200b, "zero-width space", true],
// Should return false for other characters
["\t", 0x0009, "tab", false],
["a", 0x0061, "letter a", false],
["A", 0x0041, "letter A", false],
["1", 0x0031, "digit 1", false],
["!", 0x0021, "exclamation mark", false],
["\n", 0x000a, "newline", false],
["\r", 0x000d, "carriage return", false],
// Edge cases
["", null, "empty string", false],
[" ", null, "two spaces", false],
["ab", null, "two letters", false],
])(
"should return %s for %s (U+%s - %s)",
(
char: string,
expectedCodePoint: number | null,
description: string,
expected: boolean,
) => {
if (expectedCodePoint !== null && char.length === 1) {
expect(char.codePointAt(0)).toBe(expectedCodePoint);
}
expect(Strings.isSpace(char)).toBe(expected);
},
);
});
describe("areCharactersVisuallyEqual", () => {
it("should return true for identical characters", () => {
expect(Strings.areCharactersVisuallyEqual("a", "a")).toBe(true);
expect(Strings.areCharactersVisuallyEqual("!", "!")).toBe(true);
});
it("should return false for different characters", () => {
expect(Strings.areCharactersVisuallyEqual("a", "b")).toBe(false);
expect(Strings.areCharactersVisuallyEqual("!", "?")).toBe(false);
});
it("should return true for equivalent apostrophe variants", () => {
expect(Strings.areCharactersVisuallyEqual("'", "'")).toBe(true);
expect(Strings.areCharactersVisuallyEqual("'", "'")).toBe(true);
expect(Strings.areCharactersVisuallyEqual("'", "ʼ")).toBe(true);
});
it("should return true for equivalent quote variants", () => {
expect(Strings.areCharactersVisuallyEqual('"', '"')).toBe(true);
expect(Strings.areCharactersVisuallyEqual('"', '"')).toBe(true);
expect(Strings.areCharactersVisuallyEqual('"', "„")).toBe(true);
});
it("should return true for equivalent dash variants", () => {
expect(Strings.areCharactersVisuallyEqual("-", "–")).toBe(true);
expect(Strings.areCharactersVisuallyEqual("-", "—")).toBe(true);
expect(Strings.areCharactersVisuallyEqual("–", "—")).toBe(true);
});
it("should return true for equivalent comma variants", () => {
expect(Strings.areCharactersVisuallyEqual(",", "‚")).toBe(true);
});
it("should return false for characters from different equivalence groups", () => {
expect(Strings.areCharactersVisuallyEqual("'", '"')).toBe(false);
expect(Strings.areCharactersVisuallyEqual("-", "'")).toBe(false);
expect(Strings.areCharactersVisuallyEqual(",", '"')).toBe(false);
});
describe("should check russian specific equivalences", () => {
it.each([
{
desc: "е and ё are equivalent",
char1: "е",
char2: "ё",
expected: true,
},
{
desc: "e and ё are equivalent",
char1: "e",
char2: "ё",
expected: true,
},
{
desc: "е and e are equivalent",
char1: "е",
char2: "e",
expected: true,
},
{
desc: "non-equivalent characters return false",
char1: "а",
char2: "б",
expected: false,
},
{
desc: "non-equivalent characters return false (2)",
char1: "a",
char2: "б",
expected: false,
},
])("$desc", ({ char1, char2, expected }) => {
expect(
Strings.areCharactersVisuallyEqual(char1, char2, "russian"),
).toBe(expected);
});
});
});
});