forked from dotcypress/runes
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathindex.js
More file actions
168 lines (134 loc) · 4.82 KB
/
index.js
File metadata and controls
168 lines (134 loc) · 4.82 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
'use strict';
var HIGH_SURROGATE_START = 0xd800;
var HIGH_SURROGATE_END = 0xdbff;
var LOW_SURROGATE_START = 0xdc00;
var REGIONAL_INDICATOR_START = 0x1f1e6;
var REGIONAL_INDICATOR_END = 0x1f1ff;
var FITZPATRICK_MODIFIER_START = 0x1f3fb;
var FITZPATRICK_MODIFIER_END = 0x1f3ff;
var VARIATION_MODIFIER_START = 0xfe00;
var VARIATION_MODIFIER_END = 0xfe0f;
var DIACRITICAL_MARKS_START = 0x20d0;
var DIACRITICAL_MARKS_END = 0x20ff;
var ZWJ = 0x200d;
var GRAPHEMS = [0x0308, // ( ◌̈ ) COMBINING DIAERESIS
0x0937, // ( ष ) DEVANAGARI LETTER SSA
0x0937, // ( ष ) DEVANAGARI LETTER SSA
0x093F, // ( ि ) DEVANAGARI VOWEL SIGN I
0x093F, // ( ि ) DEVANAGARI VOWEL SIGN I
0x0BA8, // ( ந ) TAMIL LETTER NA
0x0BBF, // ( ி ) TAMIL VOWEL SIGN I
0x0BCD, // ( ◌்) TAMIL SIGN VIRAMA
0x0E31, // ( ◌ั ) THAI CHARACTER MAI HAN-AKAT
0x0E33, // ( ำ ) THAI CHARACTER SARA AM
0x0E40, // ( เ ) THAI CHARACTER SARA E
0x0E49, // ( เ ) THAI CHARACTER MAI THO
0x1100, // ( ᄀ ) HANGUL CHOSEONG KIYEOK
0x1161, // ( ᅡ ) HANGUL JUNGSEONG A
0x11A8 // ( ᆨ ) HANGUL JONGSEONG KIYEOK
];
function runes(string) {
if (typeof string !== 'string') {
throw new Error('string cannot be undefined or null');
}
var result = [];
var i = 0;
var increment = 0;
while (i < string.length) {
increment += nextUnits(i + increment, string);
if (isGraphem(string[i + increment])) {
increment++;
}
if (isVariationSelector(string[i + increment])) {
increment++;
}
if (isDiacriticalMark(string[i + increment])) {
increment++;
}
if (isZeroWidthJoiner(string[i + increment])) {
increment++;
continue;
}
result.push(string.substring(i, i + increment));
i += increment;
increment = 0;
}
return result;
} // Decide how many code units make up the current character.
// BMP characters: 1 code unit
// Non-BMP characters (represented by surrogate pairs): 2 code units
// Emoji with skin-tone modifiers: 4 code units (2 code points)
// Country flags: 4 code units (2 code points)
// Variations: 2 code units
function nextUnits(i, string) {
var current = string[i]; // If we don't have a value that is part of a surrogate pair, or we're at
// the end, only take the value at i
if (!isFirstOfSurrogatePair(current) || i === string.length - 1) {
return 1;
}
var currentPair = current + string[i + 1];
var nextPair = string.substring(i + 2, i + 5); // Country flags are comprised of two regional indicator symbols,
// each represented by a surrogate pair.
// See http://emojipedia.org/flags/
// If both pairs are regional indicator symbols, take 4
if (isRegionalIndicator(currentPair) && isRegionalIndicator(nextPair)) {
return 4;
} // If the next pair make a Fitzpatrick skin tone
// modifier, take 4
// See http://emojipedia.org/modifiers/
// Technically, only some code points are meant to be
// combined with the skin tone modifiers. This function
// does not check the current pair to see if it is
// one of them.
if (isFitzpatrickModifier(nextPair)) {
return 4;
}
return 2;
}
function isFirstOfSurrogatePair(string) {
return string && betweenInclusive(string[0].charCodeAt(0), HIGH_SURROGATE_START, HIGH_SURROGATE_END);
}
function isRegionalIndicator(string) {
return betweenInclusive(codePointFromSurrogatePair(string), REGIONAL_INDICATOR_START, REGIONAL_INDICATOR_END);
}
function isFitzpatrickModifier(string) {
return betweenInclusive(codePointFromSurrogatePair(string), FITZPATRICK_MODIFIER_START, FITZPATRICK_MODIFIER_END);
}
function isVariationSelector(string) {
return typeof string === 'string' && betweenInclusive(string.charCodeAt(0), VARIATION_MODIFIER_START, VARIATION_MODIFIER_END);
}
function isDiacriticalMark(string) {
return typeof string === 'string' && betweenInclusive(string.charCodeAt(0), DIACRITICAL_MARKS_START, DIACRITICAL_MARKS_END);
}
function isGraphem(string) {
return typeof string === 'string' && GRAPHEMS.indexOf(string.charCodeAt(0)) !== -1;
}
function isZeroWidthJoiner(string) {
return typeof string === 'string' && string.charCodeAt(0) === ZWJ;
}
function codePointFromSurrogatePair(pair) {
var highOffset = pair.charCodeAt(0) - HIGH_SURROGATE_START;
var lowOffset = pair.charCodeAt(1) - LOW_SURROGATE_START;
return (highOffset << 10) + lowOffset + 0x10000;
}
function betweenInclusive(value, lower, upper) {
return value >= lower && value <= upper;
}
function substring(string, start, width) {
var chars = runes(string);
if (start === undefined) {
return string;
}
if (start >= chars.length) {
return '';
}
var rest = chars.length - start;
var stringWidth = width === undefined ? rest : width;
var endIndex = start + stringWidth;
if (endIndex > start + rest) {
endIndex = undefined;
}
return chars.slice(start, endIndex).join('');
}
module.exports = runes;
module.exports.substr = substring;