-
Notifications
You must be signed in to change notification settings - Fork 13
Expand file tree
/
Copy pathindex.js
More file actions
163 lines (139 loc) · 4.85 KB
/
index.js
File metadata and controls
163 lines (139 loc) · 4.85 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
'use strict'
const HIGH_SURROGATE_START = 0xd800
const HIGH_SURROGATE_END = 0xdbff
const LOW_SURROGATE_START = 0xdc00
const REGIONAL_INDICATOR_START = 0x1f1e6
const REGIONAL_INDICATOR_END = 0x1f1ff
const FITZPATRICK_MODIFIER_START = 0x1f3fb
const FITZPATRICK_MODIFIER_END = 0x1f3ff
const VARIATION_MODIFIER_START = 0xfe00
const VARIATION_MODIFIER_END = 0xfe0f
const DIACRITICAL_MARKS_START = 0x20d0
const DIACRITICAL_MARKS_END = 0x20ff
const ZWJ = 0x200d
const GRAPHEMS = [
0x0308, // ( ◌̈ ) COMBINING DIAERESIS
0x0937, // ( ष ) DEVANAGARI LETTER SSA
0x0937, // ( ष ) DEVANAGARI LETTER SSA
0x093F, // ( ि ) DEVANAGARI VOWEL SIGN I
0x093F, // ( ि ) DEVANAGARI VOWEL SIGN I
0x0BA8, // ( ந ) TAMIL LETTER NA
0x0BBF, // ( ி ) TAMIL VOWEL SIGN I
0x0BCD, // ( ◌்) TAMIL SIGN VIRAMA
0x0E31, // ( ◌ั ) THAI CHARACTER MAI HAN-AKAT
0x0E33, // ( ำ ) THAI CHARACTER SARA AM
0x0E40, // ( เ ) THAI CHARACTER SARA E
0x0E49, // ( เ ) THAI CHARACTER MAI THO
0x1100, // ( ᄀ ) HANGUL CHOSEONG KIYEOK
0x1161, // ( ᅡ ) HANGUL JUNGSEONG A
0x11A8 // ( ᆨ ) HANGUL JONGSEONG KIYEOK
]
function runes (string) {
if (typeof string !== 'string') {
throw new Error('string cannot be undefined or null')
}
const result = []
let i = 0
let increment = 0
while (i < string.length) {
increment += nextUnits(i + increment, string)
if (isGraphem(string[i + increment])) {
increment++
}
if (isVariationSelector(string[i + increment])) {
increment++
}
if (isDiacriticalMark(string[i + increment])) {
increment++
}
if (isZeroWidthJoiner(string[i + increment])) {
increment++
continue
}
result.push(string.substring(i, i + increment))
i += increment
increment = 0
}
return result
}
// Decide how many code units make up the current character.
// BMP characters: 1 code unit
// Non-BMP characters (represented by surrogate pairs): 2 code units
// Emoji with skin-tone modifiers: 4 code units (2 code points)
// Country flags: 4 code units (2 code points)
// Variations: 2 code units
function nextUnits (i, string) {
const current = string[i]
// If we don't have a value that is part of a surrogate pair, or we're at
// the end, only take the value at i
if (!isFirstOfSurrogatePair(current) || i === string.length - 1) {
return 1
}
const currentPair = current + string[i + 1]
let nextPair = string.substring(i + 2, i + 5)
// Country flags are comprised of two regional indicator symbols,
// each represented by a surrogate pair.
// See http://emojipedia.org/flags/
// If both pairs are regional indicator symbols, take 4
if (isRegionalIndicator(currentPair) && isRegionalIndicator(nextPair)) {
return 4
}
// If the next pair make a Fitzpatrick skin tone
// modifier, take 4
// See http://emojipedia.org/modifiers/
// Technically, only some code points are meant to be
// combined with the skin tone modifiers. This function
// does not check the current pair to see if it is
// one of them.
if (isFitzpatrickModifier(nextPair)) {
return 4
}
return 2
}
function isFirstOfSurrogatePair (string) {
return string && betweenInclusive(string[0].charCodeAt(0), HIGH_SURROGATE_START, HIGH_SURROGATE_END)
}
function isRegionalIndicator (string) {
return betweenInclusive(codePointFromSurrogatePair(string), REGIONAL_INDICATOR_START, REGIONAL_INDICATOR_END)
}
function isFitzpatrickModifier (string) {
return betweenInclusive(codePointFromSurrogatePair(string), FITZPATRICK_MODIFIER_START, FITZPATRICK_MODIFIER_END)
}
function isVariationSelector (string) {
return typeof string === 'string' && betweenInclusive(string.charCodeAt(0), VARIATION_MODIFIER_START, VARIATION_MODIFIER_END)
}
function isDiacriticalMark (string) {
return typeof string === 'string' && betweenInclusive(string.charCodeAt(0), DIACRITICAL_MARKS_START, DIACRITICAL_MARKS_END)
}
function isGraphem (string) {
return typeof string === 'string' && GRAPHEMS.indexOf(string.charCodeAt(0)) !== -1
}
function isZeroWidthJoiner (string) {
return typeof string === 'string' && string.charCodeAt(0) === ZWJ
}
function codePointFromSurrogatePair (pair) {
const highOffset = pair.charCodeAt(0) - HIGH_SURROGATE_START
const lowOffset = pair.charCodeAt(1) - LOW_SURROGATE_START
return (highOffset << 10) + lowOffset + 0x10000
}
function betweenInclusive (value, lower, upper) {
return value >= lower && value <= upper
}
function substring (string, start, width) {
const chars = runes(string)
if (start === undefined) {
return string
}
if (start >= chars.length) {
return ''
}
const rest = chars.length - start
const stringWidth = width === undefined ? rest : width
let endIndex = start + stringWidth
if (endIndex > (start + rest)) {
endIndex = undefined
}
return chars.slice(start, endIndex).join('')
}
module.exports = runes
module.exports.substr = substring