591 lines
21 KiB
TypeScript
591 lines
21 KiB
TypeScript
import { describe, it, expect, beforeEach, afterEach, vi } from "vitest";
|
||
import * as Strings from "../../src/ts/utils/strings";
|
||
|
||
describe("string utils", () => {
|
||
describe("highlightMatches", () => {
|
||
const shouldHighlight = [
|
||
{
|
||
description: "word at the beginning",
|
||
text: "Start here.",
|
||
matches: ["Start"],
|
||
expected: '<span class="highlight">Start</span> here.',
|
||
},
|
||
{
|
||
description: "word at the end",
|
||
text: "reach the end",
|
||
matches: ["end"],
|
||
expected: 'reach the <span class="highlight">end</span>',
|
||
},
|
||
{
|
||
description: "mutliple matches",
|
||
text: "one two three",
|
||
matches: ["one", "three"],
|
||
expected:
|
||
'<span class="highlight">one</span> two <span class="highlight">three</span>',
|
||
},
|
||
{
|
||
description: "repeated matches",
|
||
text: "one two two",
|
||
matches: ["two"],
|
||
expected:
|
||
'one <span class="highlight">two</span> <span class="highlight">two</span>',
|
||
},
|
||
{
|
||
description: "longest possible match",
|
||
text: "abc ab",
|
||
matches: ["ab", "abc"],
|
||
expected:
|
||
'<span class="highlight">abc</span> <span class="highlight">ab</span>',
|
||
},
|
||
{
|
||
description: "if wrapped in parenthesis",
|
||
text: "(test)",
|
||
matches: ["test"],
|
||
expected: '(<span class="highlight">test</span>)',
|
||
},
|
||
{
|
||
description: "if wrapped in commas",
|
||
text: ",test,",
|
||
matches: ["test"],
|
||
expected: ',<span class="highlight">test</span>,',
|
||
},
|
||
{
|
||
description: "if wrapped in underscores",
|
||
text: "_test_",
|
||
matches: ["test"],
|
||
expected: '_<span class="highlight">test</span>_',
|
||
},
|
||
{
|
||
description: "words in russian",
|
||
text: "Привет, мир!",
|
||
matches: ["Привет", "мир"],
|
||
expected:
|
||
'<span class="highlight">Привет</span>, <span class="highlight">мир</span>!',
|
||
},
|
||
{
|
||
description: "words with chinese punctuation",
|
||
text: "你好,世界!",
|
||
matches: ["你好", "世界"],
|
||
expected:
|
||
'<span class="highlight">你好</span>,<span class="highlight">世界</span>!',
|
||
},
|
||
{
|
||
description: "words with arabic punctuation",
|
||
text: "؟مرحبا، بكم؛",
|
||
matches: ["مرحبا", "بكم"],
|
||
expected:
|
||
'؟<span class="highlight">مرحبا</span>، <span class="highlight">بكم</span>؛',
|
||
},
|
||
{
|
||
description: "standalone numbers",
|
||
text: "My number is 1234.",
|
||
matches: ["1234"],
|
||
expected: 'My number is <span class="highlight">1234</span>.',
|
||
},
|
||
];
|
||
const shouldNotHighlight = [
|
||
{
|
||
description: "a match within a longer word",
|
||
text: "together",
|
||
matches: ["get"],
|
||
},
|
||
{
|
||
description: "a match with leading letters",
|
||
text: "welcome",
|
||
matches: ["come"],
|
||
},
|
||
{
|
||
description: "a match with trailing letters",
|
||
text: "comets",
|
||
matches: ["come"],
|
||
},
|
||
{
|
||
description: "japanese matches within longer words",
|
||
text: "こんにちは世界",
|
||
matches: ["こんにちは"],
|
||
},
|
||
{
|
||
description: "numbers within words",
|
||
text: "abc1234def",
|
||
matches: ["1234"],
|
||
},
|
||
];
|
||
const returnOriginal = [
|
||
{
|
||
description: "if matches is an empty array",
|
||
text: "Nothing to match.",
|
||
matches: [],
|
||
},
|
||
{
|
||
description: "if matches has an empty string only",
|
||
text: "Nothing to match.",
|
||
matches: [""],
|
||
},
|
||
{
|
||
description: "if no matches found in text",
|
||
text: "Hello world.",
|
||
matches: ["absent"],
|
||
},
|
||
{
|
||
description: "if text is empty",
|
||
text: "",
|
||
matches: ["anything"],
|
||
},
|
||
];
|
||
it.each(shouldHighlight)(
|
||
"should highlight $description",
|
||
({ text, matches, expected }) => {
|
||
expect(Strings.highlightMatches(text, matches)).toBe(expected);
|
||
},
|
||
);
|
||
it.each(shouldNotHighlight)(
|
||
"should not highlight $description",
|
||
({ text, matches }) => {
|
||
expect(Strings.highlightMatches(text, matches)).toBe(text);
|
||
},
|
||
);
|
||
it.each(returnOriginal)(
|
||
"should return original text $description",
|
||
({ text, matches }) => {
|
||
expect(Strings.highlightMatches(text, matches)).toBe(text);
|
||
},
|
||
);
|
||
});
|
||
|
||
describe("splitIntoCharacters", () => {
|
||
it("splits regular characters", () => {
|
||
expect(Strings.splitIntoCharacters("abc")).toEqual(["a", "b", "c"]);
|
||
});
|
||
it("splits characters outside of the bmp", () => {
|
||
expect(Strings.splitIntoCharacters("t𐑩e")).toEqual(["t", "𐑩", "e"]);
|
||
});
|
||
});
|
||
describe("replaceControlCharacters", () => {
|
||
it.each([
|
||
// Basic tab conversions
|
||
["\\t", "\t", "single tab"],
|
||
["\\t\\t\\t", "\t\t\t", "multiple tabs"],
|
||
["hello\\tworld", "hello\tworld", "tab between words"],
|
||
["\\tstart", "\tstart", "tab at start"],
|
||
["end\\t", "end\t", "tab at end"],
|
||
|
||
// Basic newline conversions
|
||
["\\n", " \n", "single newline with space prefix"],
|
||
["hello\\nworld", "hello \nworld", "newline between words with space"],
|
||
["\\nstart", " \nstart", "newline at start with space"],
|
||
["end\\n", "end \n", "newline at end with space"],
|
||
|
||
// Complex newline handling (after first two regexes)
|
||
["a\\n", "a \n", "single char followed by newline gets space prefix"],
|
||
["hello\\n", "hello \n", "word followed by newline gets space prefix"],
|
||
|
||
// Double-escaped sequences (should become single-escaped)
|
||
["\\\\t", "\\t", "double-escaped tab becomes single-escaped"],
|
||
[
|
||
"\\\\n",
|
||
"\\ \n",
|
||
"double-escaped newline becomes backslash + space + newline",
|
||
],
|
||
["\\\\t\\\\n", "\\t\\ \n", "multiple double-escaped sequences"],
|
||
|
||
// Mixed scenarios
|
||
[
|
||
"\\t\\n\\\\t",
|
||
"\t \n\\t",
|
||
"mix of tab, newline, and double-escaped tab",
|
||
],
|
||
[
|
||
"hello\\tworld\\ntest\\\\t",
|
||
"hello\tworld \ntest\\t",
|
||
"complex mixed scenario",
|
||
],
|
||
|
||
// Edge cases
|
||
["", "", "empty string"],
|
||
["no escapes", "no escapes", "string with no escape sequences"],
|
||
["\\", "\\", "single backslash"],
|
||
["\\x", "\\x", "backslash with non-control character"],
|
||
|
||
// Escaped backslashes that don't precede control chars
|
||
["\\\\", "\\\\", "double backslash not followed by control char"],
|
||
["\\\\x", "\\\\x", "double backslash followed by non-control char"],
|
||
])(
|
||
"should convert %s to %s (%s)",
|
||
(input: string, expected: string, _description: string) => {
|
||
expect(Strings.replaceControlCharacters(input)).toBe(expected);
|
||
},
|
||
);
|
||
});
|
||
|
||
describe("hasRTLCharacters", () => {
|
||
it.each([
|
||
// LTR characters should return false
|
||
[false, "hello", "basic Latin text"],
|
||
[false, "world123", "Latin text with numbers"],
|
||
[false, "test!", "Latin text with punctuation"],
|
||
[false, "ABC", "uppercase Latin text"],
|
||
[false, "", "empty string"],
|
||
[false, "123", "numbers only"],
|
||
[false, "!@#$%", "punctuation and symbols only"],
|
||
[false, " ", "whitespace only"],
|
||
|
||
// Common LTR scripts
|
||
[false, "Здравствуй", "Cyrillic text"],
|
||
[false, "Bonjour", "Latin with accents"],
|
||
[false, "Καλημέρα", "Greek text"],
|
||
[false, "こんにちは", "Japanese Hiragana"],
|
||
[false, "你好", "Chinese characters"],
|
||
[false, "안녕하세요", "Korean text"],
|
||
|
||
// RTL characters should return true - Arabic
|
||
[true, "مرحبا", "Arabic text"],
|
||
[true, "السلام", "Arabic phrase"],
|
||
[true, "العربية", "Arabic word"],
|
||
[true, "٠١٢٣٤٥٦٧٨٩", "Arabic-Indic digits"],
|
||
|
||
// RTL characters should return true - Hebrew
|
||
[true, "שלום", "Hebrew text"],
|
||
[true, "עברית", "Hebrew word"],
|
||
[true, "ברוך", "Hebrew name"],
|
||
|
||
// RTL characters should return true - Persian/Farsi
|
||
[true, "سلام", "Persian text"],
|
||
[true, "فارسی", "Persian word"],
|
||
|
||
// Mixed content (should return true if ANY RTL characters are present)
|
||
[true, "hello مرحبا", "mixed LTR and Arabic"],
|
||
[true, "123 שלום", "numbers and Hebrew"],
|
||
[true, "test سلام!", "Latin, Persian, and punctuation"],
|
||
[true, "مرحبا123", "Arabic with numbers"],
|
||
[true, "hello؟", "Latin with Arabic punctuation"],
|
||
|
||
// Edge cases with various Unicode ranges
|
||
[false, "𝕳𝖊𝖑𝖑𝖔", "mathematical bold text (LTR)"],
|
||
[false, "🌍🌎🌏", "emoji"],
|
||
] as const)(
|
||
"should return %s for word '%s' (%s)",
|
||
(expected: boolean, word: string, _description: string) => {
|
||
expect(Strings.__testing.hasRTLCharacters(word)[0]).toBe(expected);
|
||
},
|
||
);
|
||
});
|
||
|
||
describe("isWordRightToLeft", () => {
|
||
beforeEach(() => {
|
||
Strings.clearWordDirectionCache();
|
||
});
|
||
|
||
it.each([
|
||
// Basic functionality - should use hasRTLCharacters result when word has core content
|
||
[false, "hello", false, "LTR word in LTR language"],
|
||
[
|
||
false,
|
||
"hello",
|
||
true,
|
||
"LTR word in RTL language (word direction overrides language)",
|
||
],
|
||
[
|
||
true,
|
||
"مرحبا",
|
||
false,
|
||
"RTL word in LTR language (word direction overrides language)",
|
||
],
|
||
[true, "مرحبا", true, "RTL word in RTL language"],
|
||
|
||
// Punctuation stripping behavior
|
||
[false, "hello!", false, "LTR word with trailing punctuation"],
|
||
[false, "!hello", false, "LTR word with leading punctuation"],
|
||
[false, "!hello!", false, "LTR word with surrounding punctuation"],
|
||
[true, "مرحبا؟", false, "RTL word with trailing punctuation"],
|
||
[true, "؟مرحبا", false, "RTL word with leading punctuation"],
|
||
[true, "؟مرحبا؟", false, "RTL word with surrounding punctuation"],
|
||
|
||
// Fallback to language direction for empty/neutral content
|
||
[false, "", false, "empty string falls back to LTR language"],
|
||
[true, "", true, "empty string falls back to RTL language"],
|
||
[false, "!!!", false, "punctuation only falls back to LTR language"],
|
||
[true, "!!!", true, "punctuation only falls back to RTL language"],
|
||
[false, " ", false, "whitespace only falls back to LTR language"],
|
||
[true, " ", true, "whitespace only falls back to RTL language"],
|
||
|
||
// Numbers behavior (numbers are neutral, follow hasRTLCharacters detection)
|
||
[false, "123", false, "regular digits are not RTL"],
|
||
[false, "123", true, "regular digits are not RTL regardless of language"],
|
||
[true, "١٢٣", false, "Arabic-Indic digits are detected as RTL"],
|
||
[true, "١٢٣", true, "Arabic-Indic digits are detected as RTL"],
|
||
] as const)(
|
||
"should return %s for word '%s' with languageRTL=%s (%s)",
|
||
(
|
||
expected: boolean,
|
||
word: string,
|
||
languageRTL: boolean,
|
||
_description: string,
|
||
) => {
|
||
expect(Strings.isWordRightToLeft(word, languageRTL)[0]).toBe(expected);
|
||
},
|
||
);
|
||
|
||
it("should return languageRTL for undefined word", () => {
|
||
expect(Strings.isWordRightToLeft(undefined, false)[0]).toBe(false);
|
||
expect(Strings.isWordRightToLeft(undefined, true)[0]).toBe(true);
|
||
});
|
||
|
||
// testing reverseDirection
|
||
it("should return true for LTR word with reversed direction", () => {
|
||
expect(Strings.isWordRightToLeft("hello", false, true)[0]).toBe(true);
|
||
expect(Strings.isWordRightToLeft("hello", true, true)[0]).toBe(true);
|
||
});
|
||
it("should return false for RTL word with reversed direction", () => {
|
||
expect(Strings.isWordRightToLeft("مرحبا", true, true)[0]).toBe(false);
|
||
expect(Strings.isWordRightToLeft("مرحبا", false, true)[0]).toBe(false);
|
||
});
|
||
it("should return reverse of languageRTL for undefined word with reversed direction", () => {
|
||
expect(Strings.isWordRightToLeft(undefined, false, true)[0]).toBe(true);
|
||
expect(Strings.isWordRightToLeft(undefined, true, true)[0]).toBe(false);
|
||
});
|
||
|
||
describe("caching", () => {
|
||
let mapGetSpy: ReturnType<typeof vi.spyOn>;
|
||
let mapSetSpy: ReturnType<typeof vi.spyOn>;
|
||
let mapClearSpy: ReturnType<typeof vi.spyOn>;
|
||
|
||
beforeEach(() => {
|
||
mapGetSpy = vi.spyOn(Map.prototype, "get");
|
||
mapSetSpy = vi.spyOn(Map.prototype, "set");
|
||
mapClearSpy = vi.spyOn(Map.prototype, "clear");
|
||
});
|
||
|
||
afterEach(() => {
|
||
mapGetSpy.mockRestore();
|
||
mapSetSpy.mockRestore();
|
||
mapClearSpy.mockRestore();
|
||
});
|
||
|
||
it("should use cache for repeated calls", () => {
|
||
// First call should cache the result (cache miss)
|
||
const result1 = Strings.isWordRightToLeft("hello", false);
|
||
expect(result1[0]).toBe(false);
|
||
expect(mapSetSpy).toHaveBeenCalledWith("hello", [false, 0]);
|
||
|
||
// Reset spies to check second call
|
||
mapGetSpy.mockClear();
|
||
mapSetSpy.mockClear();
|
||
|
||
// Second call should use cache (cache hit)
|
||
const result2 = Strings.isWordRightToLeft("hello", false);
|
||
expect(result2[0]).toBe(false);
|
||
expect(mapGetSpy).toHaveBeenCalledWith("hello");
|
||
expect(mapSetSpy).not.toHaveBeenCalled(); // Should not set again
|
||
|
||
// Cache should work regardless of language direction for same word
|
||
mapGetSpy.mockClear();
|
||
mapSetSpy.mockClear();
|
||
|
||
const result3 = Strings.isWordRightToLeft("hello", true);
|
||
expect(result3[0]).toBe(false); // Still false because "hello" is LTR regardless of language
|
||
expect(mapGetSpy).toHaveBeenCalledWith("hello");
|
||
expect(mapSetSpy).not.toHaveBeenCalled(); // Should not set again
|
||
});
|
||
|
||
it("should cache based on core word without punctuation", () => {
|
||
// First call should cache the result for core "hello"
|
||
const result1 = Strings.isWordRightToLeft("hello", false);
|
||
expect(result1[0]).toBe(false);
|
||
expect(mapSetSpy).toHaveBeenCalledWith("hello", [false, 0]);
|
||
|
||
mapGetSpy.mockClear();
|
||
mapSetSpy.mockClear();
|
||
|
||
// These should all use the same cache entry since they have the same core
|
||
const result2 = Strings.isWordRightToLeft("hello!", false);
|
||
expect(result2[0]).toBe(false);
|
||
expect(mapGetSpy).toHaveBeenCalledWith("hello");
|
||
expect(mapSetSpy).not.toHaveBeenCalled();
|
||
|
||
mapGetSpy.mockClear();
|
||
mapSetSpy.mockClear();
|
||
|
||
const result3 = Strings.isWordRightToLeft("!hello", false);
|
||
expect(result3[0]).toBe(false);
|
||
expect(mapGetSpy).toHaveBeenCalledWith("hello");
|
||
expect(mapSetSpy).not.toHaveBeenCalled();
|
||
|
||
mapGetSpy.mockClear();
|
||
mapSetSpy.mockClear();
|
||
|
||
const result4 = Strings.isWordRightToLeft("!hello!", false);
|
||
expect(result4[0]).toBe(false);
|
||
expect(mapGetSpy).toHaveBeenCalledWith("hello");
|
||
expect(mapSetSpy).not.toHaveBeenCalled();
|
||
});
|
||
|
||
it("should handle cache clearing", () => {
|
||
// Cache a result
|
||
Strings.isWordRightToLeft("test", false);
|
||
expect(mapSetSpy).toHaveBeenCalledWith("test", [false, 0]);
|
||
|
||
// Clear cache
|
||
Strings.clearWordDirectionCache();
|
||
expect(mapClearSpy).toHaveBeenCalled();
|
||
|
||
mapGetSpy.mockClear();
|
||
mapSetSpy.mockClear();
|
||
mapClearSpy.mockClear();
|
||
|
||
// Should work normally after cache clear (cache miss again)
|
||
const result = Strings.isWordRightToLeft("test", false);
|
||
expect(result[0]).toBe(false);
|
||
expect(mapSetSpy).toHaveBeenCalledWith("test", [false, 0]);
|
||
});
|
||
|
||
it("should demonstrate cache miss vs cache hit behavior", () => {
|
||
// Test cache miss - first time seeing this word
|
||
const result1 = Strings.isWordRightToLeft("unique", false);
|
||
expect(result1[0]).toBe(false);
|
||
expect(mapGetSpy).toHaveBeenCalledWith("unique");
|
||
expect(mapSetSpy).toHaveBeenCalledWith("unique", [false, 0]);
|
||
|
||
mapGetSpy.mockClear();
|
||
mapSetSpy.mockClear();
|
||
|
||
// Test cache hit - same word again
|
||
const result2 = Strings.isWordRightToLeft("unique", false);
|
||
expect(result2[0]).toBe(false);
|
||
expect(mapGetSpy).toHaveBeenCalledWith("unique");
|
||
expect(mapSetSpy).not.toHaveBeenCalled(); // No cache set on hit
|
||
|
||
mapGetSpy.mockClear();
|
||
mapSetSpy.mockClear();
|
||
|
||
// Test cache miss - different word
|
||
const result3 = Strings.isWordRightToLeft("different", false);
|
||
expect(result3[0]).toBe(false);
|
||
expect(mapGetSpy).toHaveBeenCalledWith("different");
|
||
expect(mapSetSpy).toHaveBeenCalledWith("different", [false, 0]);
|
||
});
|
||
});
|
||
});
|
||
|
||
describe("isSpace", () => {
|
||
it.each([
|
||
// Should return true for directly typable spaces
|
||
[" ", 0x0020, "regular space", true],
|
||
["\u2002", 0x2002, "en space", true],
|
||
["\u2003", 0x2003, "em space", true],
|
||
["\u2009", 0x2009, "thin space", true],
|
||
[" ", 0x3000, "ideographic space", true],
|
||
["\u00A0", 0x00a0, "non-breaking space", true],
|
||
["\u2007", 0x2007, "figure space", true],
|
||
["\u2008", 0x2008, "punctuation space", true],
|
||
["\u200A", 0x200a, "hair space", true],
|
||
["", 0x200b, "zero-width space", true],
|
||
|
||
// Should return false for other characters
|
||
["\t", 0x0009, "tab", false],
|
||
["a", 0x0061, "letter a", false],
|
||
["A", 0x0041, "letter A", false],
|
||
["1", 0x0031, "digit 1", false],
|
||
["!", 0x0021, "exclamation mark", false],
|
||
["\n", 0x000a, "newline", false],
|
||
["\r", 0x000d, "carriage return", false],
|
||
|
||
// Edge cases
|
||
["", null, "empty string", false],
|
||
[" ", null, "two spaces", false],
|
||
["ab", null, "two letters", false],
|
||
])(
|
||
"should return %s for %s (U+%s - %s)",
|
||
(
|
||
char: string,
|
||
expectedCodePoint: number | null,
|
||
description: string,
|
||
expected: boolean,
|
||
) => {
|
||
if (expectedCodePoint !== null && char.length === 1) {
|
||
expect(char.codePointAt(0)).toBe(expectedCodePoint);
|
||
}
|
||
expect(Strings.isSpace(char)).toBe(expected);
|
||
},
|
||
);
|
||
});
|
||
|
||
describe("areCharactersVisuallyEqual", () => {
|
||
it("should return true for identical characters", () => {
|
||
expect(Strings.areCharactersVisuallyEqual("a", "a")).toBe(true);
|
||
expect(Strings.areCharactersVisuallyEqual("!", "!")).toBe(true);
|
||
});
|
||
|
||
it("should return false for different characters", () => {
|
||
expect(Strings.areCharactersVisuallyEqual("a", "b")).toBe(false);
|
||
expect(Strings.areCharactersVisuallyEqual("!", "?")).toBe(false);
|
||
});
|
||
|
||
it("should return true for equivalent apostrophe variants", () => {
|
||
expect(Strings.areCharactersVisuallyEqual("'", "'")).toBe(true);
|
||
expect(Strings.areCharactersVisuallyEqual("'", "'")).toBe(true);
|
||
expect(Strings.areCharactersVisuallyEqual("'", "ʼ")).toBe(true);
|
||
});
|
||
|
||
it("should return true for equivalent quote variants", () => {
|
||
expect(Strings.areCharactersVisuallyEqual('"', '"')).toBe(true);
|
||
expect(Strings.areCharactersVisuallyEqual('"', '"')).toBe(true);
|
||
expect(Strings.areCharactersVisuallyEqual('"', "„")).toBe(true);
|
||
});
|
||
|
||
it("should return true for equivalent dash variants", () => {
|
||
expect(Strings.areCharactersVisuallyEqual("-", "–")).toBe(true);
|
||
expect(Strings.areCharactersVisuallyEqual("-", "—")).toBe(true);
|
||
expect(Strings.areCharactersVisuallyEqual("–", "—")).toBe(true);
|
||
});
|
||
|
||
it("should return true for equivalent comma variants", () => {
|
||
expect(Strings.areCharactersVisuallyEqual(",", "‚")).toBe(true);
|
||
});
|
||
|
||
it("should return false for characters from different equivalence groups", () => {
|
||
expect(Strings.areCharactersVisuallyEqual("'", '"')).toBe(false);
|
||
expect(Strings.areCharactersVisuallyEqual("-", "'")).toBe(false);
|
||
expect(Strings.areCharactersVisuallyEqual(",", '"')).toBe(false);
|
||
});
|
||
|
||
describe("should check russian specific equivalences", () => {
|
||
it.each([
|
||
{
|
||
desc: "е and ё are equivalent",
|
||
char1: "е",
|
||
char2: "ё",
|
||
expected: true,
|
||
},
|
||
{
|
||
desc: "e and ё are equivalent",
|
||
char1: "e",
|
||
char2: "ё",
|
||
expected: true,
|
||
},
|
||
{
|
||
desc: "е and e are equivalent",
|
||
char1: "е",
|
||
char2: "e",
|
||
expected: true,
|
||
},
|
||
{
|
||
desc: "non-equivalent characters return false",
|
||
char1: "а",
|
||
char2: "б",
|
||
expected: false,
|
||
},
|
||
{
|
||
desc: "non-equivalent characters return false (2)",
|
||
char1: "a",
|
||
char2: "б",
|
||
expected: false,
|
||
},
|
||
])("$desc", ({ char1, char2, expected }) => {
|
||
expect(
|
||
Strings.areCharactersVisuallyEqual(char1, char2, "russian"),
|
||
).toBe(expected);
|
||
});
|
||
});
|
||
});
|
||
});
|