Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
40 commits
Select commit Hold shift + click to select a range
e556a1f
initial code
dilijev Jan 2, 2017
5521cbb
don't use readline
dilijev Jan 2, 2017
9a2df79
convert to LF
dilijev Jan 2, 2017
12771f9
Tried using Lazy module.
dilijev Jan 2, 2017
7cb6a6e
Produce full list of equivalence classes; try to render to file.
dilijev Jan 2, 2017
cfab014
Fix bug, render to file.
dilijev Jan 2, 2017
07b4326
mappings.ts (UnicodeData.txt to source table)
dilijev Jan 3, 2017
baa700b
Create mapping from UnicodeData.txt
dilijev Jan 3, 2017
4f8c8f5
Arguments; initial CaseFoldingRecord
dilijev Jan 3, 2017
e2aff0d
updated args parsing
dilijev Jan 3, 2017
8eb28f1
canonicalizeDeltas; createFromCaseFoldingRecord; various cleanup
dilijev Jan 3, 2017
c6a5736
Cleanup
dilijev Jan 3, 2017
95ac38a
Sort and insertion order of Row objects.
dilijev Jan 4, 2017
7e579d7
comments cleanup
dilijev Jan 4, 2017
583b69f
Refactored and moved files around to make this more maintainable. Out…
dilijev Jan 4, 2017
ade76b6
Tried to use modules properly, gave up and back to generating a singl…
dilijev Jan 4, 2017
4997ee9
Refactored tests and some cleanup.
dilijev Jan 6, 2017
4471d31
Try to convert to modules and reuse code again.
dilijev Jan 7, 2017
7f09964
Update to use external modules properly.
dilijev Jan 8, 2017
33ea95a
Extend prototypes in a more encapsulated way.
dilijev Jan 8, 2017
8da62fb
Restore TableToEquiv to working order.
dilijev Jan 9, 2017
2d6c0fa
Added EquivClass.ts
dilijev Jan 9, 2017
1365a64
Update to use 'export default' and be more es6-modules-like.
dilijev Jan 9, 2017
cebe669
Use EquivClass to generate the table of UnicodeData mappings.
dilijev Jan 10, 2017
4f52af1
createFromCaseFoldingEntry
dilijev Jan 10, 2017
19d7135
Add package.json
dilijev Jan 10, 2017
d7ffefb
Folding
dilijev Jan 10, 2017
60dbdcd
Transitive closure finished, things looking mostly good. Now working …
dilijev Jan 11, 2017
47c5b28
format normalization
dilijev Jan 11, 2017
60054ea
Row.expandRows
dilijev Jan 11, 2017
c7a2722
regression-suite
dilijev Jan 11, 2017
48e1524
Update regression suite generation and notes.
dilijev Jan 12, 2017
ffe0005
Add UCD 7.0
dilijev Jan 12, 2017
8eb0e71
update regression generation
dilijev Jan 12, 2017
ea73457
Add UCD 6.2 and 6.3
dilijev Jan 12, 2017
197bf9f
Generate non-unicode-flag tests and check which ones should not fold …
dilijev Jan 12, 2017
51597f7
Update build settings.
dilijev Mar 27, 2017
6e35ad1
Add missing newlines at EOF
dilijev Mar 27, 2017
5ae0ed7
Fix deprecation warning
dilijev Mar 27, 2017
2368d72
WIP
dilijev May 10, 2017
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions tools/Unicode/CaseInsensitive/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
node_modules/
typings/
*.js
*.map
*.txt
out/
16 changes: 16 additions & 0 deletions tools/Unicode/CaseInsensitive/.vscode/settings.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
// Place your settings in this file to overwrite default and user settings.
{
"files.exclude": {
"**/.svn": true,
"**/.hg": true,
"**/.git": true,

".gitignore": false,
"node_modules/": true,
"**/*.js": true,
"**/*.js.map": true,

".vscode/": false,
"**/.DS_Store": true
}
}
10 changes: 10 additions & 0 deletions tools/Unicode/CaseInsensitive/.vscode/tasks.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
{
// See https://go.microsoft.com/fwlink/?LinkId=733558
// for the documentation about the tasks.json format
"version": "0.1.0",
"command": "tsc",
"isShellCommand": true,
"args": ["-p", "."],
"showOutput": "silent",
"problemMatcher": "$tsc"
}
239 changes: 239 additions & 0 deletions tools/Unicode/CaseInsensitive/EquivClass.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,239 @@
/// <reference path="protos.d.ts" />

const _ = require('lodash');

import Row from './row';
import MappingSource from './MappingSource';
import * as Utils from './utils';
import { ExtendStringProto, ExtendNumberProto } from './prototypes';
ExtendStringProto(String.prototype);
ExtendNumberProto(Number.prototype);

class EquivClass {
codePoints: number[]; // ensure that this is always in sorted order
mappingSource: MappingSource;
category: string;

static SortBy = [
(x: EquivClass) => x.codePoints[0],
(x: EquivClass) => x.codePoints[1],
(x: EquivClass) => x.codePoints[2],
(x: EquivClass) => x.codePoints[3],
// 'mappingSource'
];

// TODO is it useful to define Order? Do we need to sort a collection of these?
// static Order() {
// }

static createFromUnicodeDataEntry(line: string): EquivClass {
const fields = line.trim().split(/\s*;\s*/);

const codePoint = parseInt(fields[0], 16);
const category: string = fields[2];

const uppercase: number = (fields[12] || "").toCodepoint();
const lowercase: number = (fields[13] || "").toCodepoint();
const titlecase: number = (fields[14] || "").toCodepoint();

const equiv: EquivClass = new EquivClass(codePoint, MappingSource.UnicodeData, [uppercase, lowercase, titlecase], category);
return equiv;
}

static createFromCaseFoldingEntry(line: string): EquivClass {
const sanitizedLine: string = line.trim().replace(/(; )?#.*$/, "");
if (!sanitizedLine) {
// handle empty lines
return undefined;
}

const fields: string[] = sanitizedLine.split(/;\s*/);

const codePoint = parseInt(fields[0], 16);
const category = fields[1];
const mapping = parseInt(fields[2], 16);

const equiv: EquivClass = new EquivClass(codePoint, MappingSource.CaseFolding, [mapping], category);
return equiv;
}

constructor(codePoint: number, mappingSource: MappingSource, equivCodePoints: number[] = [], category: string = undefined) {
this.codePoints = [codePoint];
this.mappingSource = mappingSource;
this.category = category;
this.addCodepoints(equivCodePoints);
}

private normalize(): void {
this.codePoints = _(this.codePoints)
.filter(x => typeof x === "number" && x !== 0x131 && x !== 0x130) // explicitly remove Turkish mappings
.sort(Utils.NumericOrder)
.uniq().value();
}

/*
Retrieves the key for this EquivClass, which is the smallest codePoint of the equivalence class.
This codepoint is guaranteed to be the first in the list because that list is sorted by Utils.NumericOrder
*/
getKey(): number {
return this.codePoints[0];
}

isSingleton(): boolean {
return this.codePoints.length === 1;
}

addCodepoint(codePoint: number): void {
this.codePoints.push(codePoint);
this.normalize();
}

addCodepoints(codePoints: number[]): void {
// console.log(JSON.stringify(this.codePoints));
// console.log(JSON.stringify(codePoints));
this.codePoints = this.codePoints.concat(codePoints);
// console.log(JSON.stringify(this.codePoints));
this.normalize();
}

setMappingSource(mappingSource: MappingSource): void {
this.mappingSource = mappingSource;
}

toRows(): Row[] {
let rows = [];

// special case to produce rows with skipCount == 2
if (this.isSpecialPairFormat()) {
// TODO this is probably the wrong place to make this decision (needs to be done when folding rows)
const deltas = [-1, 1]; // special value for deltas for skipCount === 2
const skipCount = 2;

const row = new Row(this.mappingSource, this.codePoints[0], deltas, skipCount)
// this Row now represents 2 entries to adjust the range so folding works correctly
row.endRange = row.beginRange + 1;

return [row];
}

for (let x of this.codePoints) {
const row = new Row(this.mappingSource, x, this.createDeltas(x));
rows.push(row);
}

return rows;
}

toString(): string {
return `${this.category}, ${Utils.MappingSourceToString(this.mappingSource)}, ${this.render()}`;
}

render(): string {
let s = "";
for (const codePoint of this.codePoints) {
s += codePoint.toCppUnicodeHexString() + ",";
}
return s;
}

renderRegressionTests(): string {
let s = "";
for (const codePoint of this.codePoints) {
for (const testPoint of this.codePoints) {
if (codePoint !== testPoint) {
let codePointString = Utils.maybeWrapInBrackets(codePoint.toUnicodeHexString());
let testPointString = Utils.maybeWrapInBrackets(testPoint.toUnicodeHexString());
if (codePointString.length <= 4 && testPointString.length <= 4) {
s += `assertMatches(/\\u${codePointString}/i, 0x${testPoint.toUnicodeHexString()}, '\\u${testPointString}');\n`;
}
s += `assertMatches(/\\u${codePointString}/iu, 0x${testPoint.toUnicodeHexString()}, '\\u${testPointString}');\n`;
}
}
}
return s;
}

private isSpecialPairFormat(): boolean {
const deltas = this.createDeltas(this.codePoints[0]);
if (deltas.length === 2) {
return (deltas[0] === 0 && deltas[1] === 1) ||
(deltas[0] === -1 && deltas[1] === 0);
} else {
return false;
}
}

private createDeltas(baseCodePoint: number): number[] {
let deltas = [];
for (let x of this.codePoints) {
deltas.push(x - baseCodePoint);
}
return deltas;
}

private codePointsEqual(other: EquivClass): boolean {
if (this.codePoints.length !== other.codePoints.length) {
return false;
}

for (const i in this.codePoints) {
if (this.codePoints[i] !== other.codePoints[i]) {
return false;
}
}

return true;
}

private dropCodePoint(codePoint: number): void {
this.codePoints = _(this.codePoints).filter(x => x !== codePoint).value();
}

extendSet(other: EquivClass): boolean {
const startLength = this.codePoints.length;
const origSet = this.codePoints;
this.codePoints = this.codePoints.concat(other.codePoints);
this.normalize();
const endLength = this.codePoints.length;

if (endLength > startLength) {
// if it was extended then go ahead and update the mapping source
// this.mappingSource = Utils.chooseMappingSource(this, other); // TODO REVIEW does this make sense here?
return true;
} else {
this.codePoints = origSet;
return false;
}
}

private isCompatibleWith(other: EquivClass): boolean {
return this.codePoints[0] === other.codePoints[0];
}

fold(other: EquivClass): boolean {
if (!this.isCompatibleWith(other)) {
return false;
}

// if codepoints equal, fold trivially by ignoring other
if (this.codePointsEqual(other)) {
return true;
}

// Drop the Turkish mapping from the set when folding
if (other.category === "T") {
this.dropCodePoint(other.codePoints[1]);
return true;
}

if (this.extendSet(other)) {
this.mappingSource = Utils.chooseMappingSource(this, other); // TODO REVIEW does this make sense here?
return true;
}

return false;
}

}

export default EquivClass;
Loading