Skip to content

Refactor text utils #2144

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Nov 15, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions src/ast.ts
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ import {
normalizePath,
resolvePath,
CharCode,
isTrivialAlphanum
isAlphaOrDecimal
} from "./util";

import {
Expand Down Expand Up @@ -2347,7 +2347,7 @@ export function mangleInternalPath(path: string): string {
if (pos >= 0 && len - pos >= 2) { // at least one char plus dot
let cur = pos;
while (++cur < len) {
if (!isTrivialAlphanum(path.charCodeAt(cur))) {
if (!isAlphaOrDecimal(path.charCodeAt(cur))) {
assert(false); // not a valid external path
return path;
}
Expand Down
14 changes: 7 additions & 7 deletions src/tokenizer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,8 @@ import {
isWhiteSpace,
isIdentifierStart,
isIdentifierPart,
isDecimalDigit,
isOctalDigit
isDecimal,
isOctal
} from "./util";

/** Named token types. */
Expand Down Expand Up @@ -703,7 +703,7 @@ export class Tokenizer extends DiagnosticEmitter {
++pos;
if (maxTokenLength > 1 && pos < end) {
let chr = text.charCodeAt(pos);
if (isDecimalDigit(chr)) {
if (isDecimal(chr)) {
this.pos = pos - 1;
return Token.FLOATLITERAL; // expects a call to readFloat
}
Expand Down Expand Up @@ -1177,7 +1177,7 @@ export class Tokenizer extends DiagnosticEmitter {
var c = text.charCodeAt(this.pos++);
switch (c) {
case CharCode._0: {
if (isTaggedTemplate && this.pos < end && isDecimalDigit(text.charCodeAt(this.pos))) {
if (isTaggedTemplate && this.pos < end && isDecimal(text.charCodeAt(this.pos))) {
++this.pos;
return text.substring(start, this.pos);
}
Expand Down Expand Up @@ -1331,7 +1331,7 @@ export class Tokenizer extends DiagnosticEmitter {
return this.readOctalInteger();
}
}
if (isOctalDigit(text.charCodeAt(pos + 1))) {
if (isOctal(text.charCodeAt(pos + 1))) {
let start = pos;
this.pos = pos + 1;
let value = this.readOctalInteger();
Expand Down Expand Up @@ -1578,7 +1578,7 @@ export class Tokenizer extends DiagnosticEmitter {
if (
++this.pos < end &&
(c = text.charCodeAt(this.pos)) == CharCode.MINUS || c == CharCode.PLUS &&
isDecimalDigit(text.charCodeAt(this.pos + 1))
isDecimal(text.charCodeAt(this.pos + 1))
) {
++this.pos;
}
Expand Down Expand Up @@ -1618,7 +1618,7 @@ export class Tokenizer extends DiagnosticEmitter {
}
sepEnd = pos + 1;
++sepCount;
} else if (!isDecimalDigit(c)) {
} else if (!isDecimal(c)) {
break;
}
++pos;
Expand Down
51 changes: 21 additions & 30 deletions src/util/text.ts
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ export const enum CharCode {
}

/** Tests if the specified character code is some sort of line break. */
export function isLineBreak(c: CharCode): bool {
export function isLineBreak(c: i32): bool {
switch (c) {
case CharCode.LINEFEED:
case CharCode.CARRIAGERETURN:
Expand Down Expand Up @@ -175,45 +175,48 @@ export function isWhiteSpace(c: i32): bool {
}
}

export function isAlpha(c: i32): bool {
let c0 = c | 32; // unify uppercases and lowercases a|A - z|Z
return c0 >= CharCode.a && c0 <= CharCode.z;
}

/** Tests if the specified character code is a valid decimal digit. */
export function isDecimalDigit(c: i32): bool {
export function isDecimal(c: i32): bool {
return c >= CharCode._0 && c <= CharCode._9;
}

/** Tests if the specified character code is a valid octal digit. */
export function isOctalDigit(c: i32): bool {
export function isOctal(c: i32): bool {
return c >= CharCode._0 && c <= CharCode._7;
}

/** Tests if the specified character code is a valid hexadecimal digit. */
export function isHexDigit(c: i32): bool {
return isDecimalDigit(c) || ((c | 32) >= CharCode.a && (c | 32) <= CharCode.f);
export function isHex(c: i32): bool {
let c0 = c | 32; // unify uppercases and lowercases a|A - f|F
return isDecimal(c) || (c0 >= CharCode.a && c0 <= CharCode.f);
}

/** Tests if the specified character code is trivially alphanumeric. */
export function isTrivialAlphanum(code: i32): bool {
return code >= CharCode.a && code <= CharCode.z
|| code >= CharCode.A && code <= CharCode.Z
|| code >= CharCode._0 && code <= CharCode._9;
export function isAlphaOrDecimal(c: i32): bool {
return isAlpha(c) || isDecimal(c);
}

/** Tests if the specified character code is a valid start of an identifier. */
export function isIdentifierStart(c: i32): bool {
let c0 = c | 32; // unify uppercases and lowercases a|A - z|Z
return c0 >= CharCode.a && c0 <= CharCode.z
return isAlpha(c)
|| c == CharCode._
|| c == CharCode.DOLLAR
|| c > 0x7F && isUnicodeIdentifierStart(c);
|| c >= 170 && c <= 65500
&& lookupInUnicodeMap(c as u16, unicodeIdentifierStart);
}

/** Tests if the specified character code is a valid part of an identifier. */
export function isIdentifierPart(c: i32): bool {
const c0 = c | 32; // unify uppercases and lowercases a|A - z|Z
return c0 >= CharCode.a && c0 <= CharCode.z
|| c >= CharCode._0 && c <= CharCode._9
return isAlphaOrDecimal(c)
|| c == CharCode._
|| c == CharCode.DOLLAR
|| c > 0x7F && isUnicodeIdentifierPart(c);
|| c >= 170 && c <= 65500
&& lookupInUnicodeMap(c as u16, unicodeIdentifierPart);
}

// storing as u16 to save memory
Expand Down Expand Up @@ -354,15 +357,13 @@ const unicodeIdentifierPart: u16[] = [
];

function lookupInUnicodeMap(code: u16, map: u16[]): bool {
if (code < map[0]) return false;

var lo = 0;
var hi = map.length;
var mid: i32;
var mid: u32;
var midVal: u16;

while (lo + 1 < hi) {
mid = lo + ((hi - lo) >> 1);
mid = lo + ((hi - lo) >>> 1);
mid -= (mid & 1);
midVal = map[mid];
if (midVal <= code && code <= map[mid + 1]) {
Expand All @@ -377,16 +378,6 @@ function lookupInUnicodeMap(code: u16, map: u16[]): bool {
return false;
}

function isUnicodeIdentifierStart(code: i32): bool {
return code < 170 || code > 65500 ? false :
lookupInUnicodeMap(code as u16, unicodeIdentifierStart);
}

function isUnicodeIdentifierPart(code: i32): bool {
return code < 170 || code > 65500 ? false :
lookupInUnicodeMap(code as u16, unicodeIdentifierPart);
}

const indentX1 = " ";
const indentX2 = " ";
const indentX4 = " ";
Expand Down