Fix wrong regex for CJK character detection #157231 (#161825)

This commit is contained in:
Wonseop Kim 2023-07-25 02:05:27 +09:00 committed by GitHub
parent 6bae659a1d
commit ca2cdc9037
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 12 additions and 7 deletions

View file

@ -11,14 +11,17 @@ describe('getFont', () => {
it(`returns 'noto-cjk' when matching cjk characters`, () => {
const cjkStrings = [
'vi-Hani: 关',
'ko: 全',
'ja: 入',
'ko: 김',
'ja-katakana: カタカナ',
'ja-hiragana: ひらがな',
'ja-han: 入',
'zh-Hant-HK: 免',
'zh-Hant: 令',
'zh-Hans: 令',
'random: おあいい 漢字 あい 抵 令',
String.fromCharCode(0x4ee4),
String.fromCodePoint(0x9aa8),
'random: おあいい 漢字 あい 抵 令 대시보드',
String.fromCharCode(0x4ee4), // 令
String.fromCharCode(0xd574), // 해
String.fromCodePoint(0x9aa8), // 骨
];
for (const cjkString of cjkStrings) {

View file

@ -6,10 +6,12 @@
*/
export function getFont(text: string) {
// We are matching Han characters which is one of the supported unicode scripts
// We are matching Han/Hangul/Hiragana/Katakana characters which is one of the supported unicode scripts
// (you can see the full list of supported scripts here: http://www.unicode.org/standard/supported.html).
// This will match Chinese, Japanese, Korean and some other Asian languages.
const isCKJ = /\p{Script=Han}/gu.test(text);
const isCKJ = /\p{Script=Han}|\p{Script=Hangul}|\p{Script=Hiragana}|\p{Script=Katakana}/gu.test(
text
);
if (isCKJ) {
return 'noto-cjk';
} else {