package.rewrite-pattern.js Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of regexpu-core Show documentation
Show all versions of regexpu-core Show documentation
regexpu’s core functionality (i.e. `rewritePattern(pattern, flag)`), capable of translating ES6 Unicode regular expressions to ES5.
The newest version!
'use strict';
const generate = require('regjsgen').generate;
const parse = require('regjsparser').parse;
const regenerate = require('regenerate');
const unicodeMatchProperty = require('unicode-match-property-ecmascript');
const unicodeMatchPropertyValue = require('unicode-match-property-value-ecmascript');
const iuMappings = require('./data/iu-mappings.js');
const iBMPMappings = require('./data/i-bmp-mappings.js');
const iuFoldings = require('./data/iu-foldings.js');
const ESCAPE_SETS = require('./data/character-class-escape-sets.js');
const { UNICODE_SET, UNICODE_IV_SET } = require('./data/all-characters.js');
function flatMap(array, callback) {
const result = [];
array.forEach(item => {
const res = callback(item);
if (Array.isArray(res)) {
result.push.apply(result, res);
} else {
result.push(res);
}
});
return result;
}
function regenerateContainsAstral(regenerateData) {
const data = regenerateData.data;
return data.length >= 1 && data[data.length - 1] >= 0x10000;
}
// https://tc39.es/ecma262/#prod-SyntaxCharacter
const SYNTAX_CHARS = /[\\^$.*+?()[\]{}|]/g;
const ASTRAL_SET = regenerate().addRange(0x10000, 0x10FFFF);
const NEWLINE_SET = regenerate().add(
// `LineTerminator`s (https://mths.be/es6#sec-line-terminators):
0x000A, // Line Feed
0x000D, // Carriage Return
0x2028, // Line Separator
0x2029 // Paragraph Separator
);
// Prepare a Regenerate set containing all code points that are supposed to be
// matched by `/./u`. https://mths.be/es6#sec-atom
const DOT_SET_UNICODE = UNICODE_SET.clone() // all Unicode code points
.remove(NEWLINE_SET);
const getCharacterClassEscapeSet = (character, unicode, ignoreCase, shouldApplySCF) => {
if (unicode) {
if (ignoreCase) {
const result = ESCAPE_SETS.UNICODE_IGNORE_CASE.get(character);
if (shouldApplySCF) {
return ESCAPE_SETS.UNICODESET_IGNORE_CASE.get(character);
} else {
return result;
}
}
return ESCAPE_SETS.UNICODE.get(character);
}
return ESCAPE_SETS.REGULAR.get(character);
};
const getUnicodeDotSet = (dotAll) => {
return dotAll ? UNICODE_SET : DOT_SET_UNICODE;
};
const getUnicodePropertyValueSet = (property, value) => {
const path = value ?
`${ property }/${ value }` :
`Binary_Property/${ property }`;
try {
return require(`regenerate-unicode-properties/${ path }.js`);
} catch (exception) {
throw new Error(
`Failed to recognize value \`${ value }\` for property ` +
`\`${ property }\`.`
);
}
};
const handleLoneUnicodePropertyNameOrValue = (value) => {
// It could be a `General_Category` value or a binary property.
// Note: `unicodeMatchPropertyValue` throws on invalid values.
try {
const property = 'General_Category';
const category = unicodeMatchPropertyValue(property, value);
return getUnicodePropertyValueSet(property, category);
} catch (exception) {}
// It’s not a `General_Category` value, so check if it’s a property
// of strings.
try {
return getUnicodePropertyValueSet('Property_of_Strings', value);
} catch (exception) {}
// Lastly, check if it’s a binary property of single code points.
// Note: `unicodeMatchProperty` throws on invalid properties.
const property = unicodeMatchProperty(value);
return getUnicodePropertyValueSet(property);
};
const getUnicodePropertyEscapeSet = (value, isNegative, isUnicodeSetIgnoreCase) => {
const parts = value.split('=');
const firstPart = parts[0];
let set;
if (parts.length == 1) {
set = handleLoneUnicodePropertyNameOrValue(firstPart);
} else {
// The pattern consists of two parts, i.e. `Property=Value`.
const property = unicodeMatchProperty(firstPart);
const value = unicodeMatchPropertyValue(property, parts[1]);
set = getUnicodePropertyValueSet(property, value);
}
if (isNegative) {
if (set.strings) {
throw new Error('Cannot negate Unicode property of strings');
}
return {
characters: (isUnicodeSetIgnoreCase ? UNICODE_IV_SET : UNICODE_SET).clone().remove(set.characters),
strings: new Set()
};
}
return {
characters: set.characters.clone(),
strings: set.strings
// We need to escape strings like *️⃣ to make sure that they can be safely used in unions.
? new Set(set.strings.map(str => str.replace(SYNTAX_CHARS, '\\$&')))
: new Set()
};
};
const getUnicodePropertyEscapeCharacterClassData = (property, isNegative, isUnicodeSetIgnoreCase, shouldApplySCF) => {
const set = getUnicodePropertyEscapeSet(property, isNegative, isUnicodeSetIgnoreCase);
const data = getCharacterClassEmptyData();
const singleChars = shouldApplySCF ? regenerate(set.characters.toArray().map(ch => simpleCaseFolding(ch))) : set.characters;
const caseEqFlags = configGetCaseEqFlags();
if (caseEqFlags) {
for (const codepoint of singleChars.toArray()) {
const list = getCaseEquivalents(codepoint, caseEqFlags);
if (list) {
singleChars.add(list);
}
}
}
data.singleChars = singleChars;
if (set.strings.size > 0) {
data.longStrings = set.strings;
data.maybeIncludesStrings = true;
}
return data;
};
const CASE_EQ_FLAG_NONE = 0b00;
const CASE_EQ_FLAG_BMP = 0b01;
const CASE_EQ_FLAG_UNICODE = 0b10;
function configGetCaseEqFlags() {
let flags = CASE_EQ_FLAG_NONE;
if (config.modifiersData.i === true) {
if (config.transform.modifiers) {
flags |= CASE_EQ_FLAG_BMP;
if (config.flags.unicode || config.flags.unicodeSets) {
flags |= CASE_EQ_FLAG_UNICODE;
}
}
} else if (config.modifiersData.i === undefined) {
if (config.transform.unicodeFlag && config.flags.ignoreCase) {
flags |= CASE_EQ_FLAG_UNICODE;
}
}
return flags;
}
// Given a range of code points, add any case-equivalent code points in that range
// to a set.
regenerate.prototype.iuAddRange = function(min, max, caseEqFlags) {
const $this = this;
do {
const list = getCaseEquivalents(min, caseEqFlags);
if (list) {
$this.add(list);
}
} while (++min <= max);
return $this;
};
regenerate.prototype.iuRemoveRange = function(min, max, caseEqFlags) {
const $this = this;
do {
const list = getCaseEquivalents(min, caseEqFlags);
if (list) {
$this.remove(list);
}
} while (++min <= max);
return $this;
};
const update = (item, pattern) => {
let tree = parse(pattern, config.useUnicodeFlag ? 'u' : '', {
lookbehind: true,
namedGroups: true,
unicodePropertyEscape: true,
unicodeSet: true,
modifiers: true,
});
switch (tree.type) {
case 'characterClass':
case 'group':
case 'value':
// No wrapping needed.
break;
default:
// Wrap the pattern in a non-capturing group.
tree = wrap(tree, pattern);
}
Object.assign(item, tree);
};
const wrap = (tree, pattern) => {
// Wrap the pattern in a non-capturing group.
return {
'type': 'group',
'behavior': 'ignore',
'body': [tree],
'raw': `(?:${ pattern })`
};
};
/**
* Given any codepoint ch, returns false or an array of characters,
* such that for every c in the array,
* c != ch and Canonicalize(~, c) == Canonicalize(~, ch)
*
* where Canonicalize is defined in
* https://tc39.es/ecma262/#sec-runtime-semantics-canonicalize-ch
* @param {number} codePoint input code point
* @param {number} flags bitwise flags composed of CASE_EQ_FLAG_*
* @returns false | number[]
*/
const getCaseEquivalents = (codePoint, flags) => {
if (flags === CASE_EQ_FLAG_NONE) {
return false;
}
let result = ((flags & CASE_EQ_FLAG_UNICODE) ? iuMappings.get(codePoint) : undefined) || [];
if (typeof result === "number") result = [result];
if (flags & CASE_EQ_FLAG_BMP) {
for (const cp of [codePoint].concat(result)) {
// Fast path for ASCII characters
if (cp >= 0x41 && cp <= 0x5a) {
result.push(cp + 0x20);
} else if (cp >= 0x61 && cp <= 0x7a) {
result.push(cp - 0x20);
} else {
result = result.concat(iBMPMappings.get(cp) || []);
}
}
}
return result.length == 0 ? false : result;
};
// https://tc39.es/ecma262/#sec-maybesimplecasefolding
const simpleCaseFolding = (codePoint) => {
// Fast path for ASCII characters
if (codePoint <= 0x7F) {
if (codePoint >= 0x41 && codePoint <= 0x5A) {
return codePoint + 0x20;
}
return codePoint;
}
return iuFoldings.get(codePoint) || codePoint;
}
const buildHandler = (action) => {
switch (action) {
case 'union':
return {
single: (data, cp) => {
data.singleChars.add(cp);
},
regSet: (data, set2) => {
data.singleChars.add(set2);
},
range: (data, start, end) => {
data.singleChars.addRange(start, end);
},
iuRange: (data, start, end, caseEqFlags) => {
data.singleChars.iuAddRange(start, end, caseEqFlags);
},
nested: (data, nestedData) => {
data.singleChars.add(nestedData.singleChars);
for (const str of nestedData.longStrings) data.longStrings.add(str);
if (nestedData.maybeIncludesStrings) data.maybeIncludesStrings = true;
}
};
case 'union-negative': {
const regSet = (data, set2) => {
data.singleChars = UNICODE_SET.clone().remove(set2).add(data.singleChars);
};
return {
single: (data, cp) => {
const unicode = UNICODE_SET.clone();
data.singleChars = data.singleChars.contains(cp) ? unicode : unicode.remove(cp);
},
regSet: regSet,
range: (data, start, end) => {
data.singleChars = UNICODE_SET.clone().removeRange(start, end).add(data.singleChars);
},
iuRange: (data, start, end, caseEqFlags) => {
data.singleChars = UNICODE_SET.clone().iuRemoveRange(start, end, caseEqFlags).add(data.singleChars);
},
nested: (data, nestedData) => {
regSet(data, nestedData.singleChars);
if (nestedData.maybeIncludesStrings) throw new Error('ASSERTION ERROR');
}
};
}
case 'intersection': {
const regSet = (data, set2) => {
if (data.first) data.singleChars = set2;
else data.singleChars.intersection(set2);
};
return {
single: (data, cp) => {
data.singleChars = data.first || data.singleChars.contains(cp) ? regenerate(cp) : regenerate();
data.longStrings.clear();
data.maybeIncludesStrings = false;
},
regSet: (data, set) => {
regSet(data, set);
data.longStrings.clear();
data.maybeIncludesStrings = false;
},
range: (data, start, end) => {
if (data.first) data.singleChars.addRange(start, end);
else data.singleChars.intersection(regenerate().addRange(start, end));
data.longStrings.clear();
data.maybeIncludesStrings = false;
},
iuRange: (data, start, end, caseEqFlags) => {
if (data.first) data.singleChars.iuAddRange(start, end, caseEqFlags);
else data.singleChars.intersection(regenerate().iuAddRange(start, end, caseEqFlags));
data.longStrings.clear();
data.maybeIncludesStrings = false;
},
nested: (data, nestedData) => {
regSet(data, nestedData.singleChars);
if (data.first) {
data.longStrings = nestedData.longStrings;
data.maybeIncludesStrings = nestedData.maybeIncludesStrings;
} else {
for (const str of data.longStrings) {
if (!nestedData.longStrings.has(str)) data.longStrings.delete(str);
}
if (!nestedData.maybeIncludesStrings) data.maybeIncludesStrings = false;
}
}
};
}
case 'subtraction': {
const regSet = (data, set2) => {
if (data.first) data.singleChars.add(set2);
else data.singleChars.remove(set2);
};
return {
single: (data, cp) => {
if (data.first) data.singleChars.add(cp);
else data.singleChars.remove(cp);
},
regSet: regSet,
range: (data, start, end) => {
if (data.first) data.singleChars.addRange(start, end);
else data.singleChars.removeRange(start, end);
},
iuRange: (data, start, end, caseEqFlags) => {
if (data.first) data.singleChars.iuAddRange(start, end, caseEqFlags);
else data.singleChars.iuRemoveRange(start, end, caseEqFlags);
},
nested: (data, nestedData) => {
regSet(data, nestedData.singleChars);
if (data.first) {
data.longStrings = nestedData.longStrings;
data.maybeIncludesStrings = nestedData.maybeIncludesStrings;
} else {
for (const str of data.longStrings) {
if (nestedData.longStrings.has(str)) data.longStrings.delete(str);
}
}
}
};
}
// The `default` clause is only here as a safeguard; it should never be
// reached. Code coverage tools should ignore it.
/* node:coverage ignore next */
default:
throw new Error(`Unknown set action: ${ characterClassItem.kind }`);
}
};
const getCharacterClassEmptyData = () => ({
transformed: config.transform.unicodeFlag,
singleChars: regenerate(),
longStrings: new Set(),
hasEmptyString: false,
first: true,
maybeIncludesStrings: false
});
const concatCaseEquivalents = (codePoint, caseEqFlags) => {
const caseEquivalents = getCaseEquivalents(codePoint, caseEqFlags);
if (caseEquivalents) {
return [codePoint, ...caseEquivalents];
}
return [codePoint];
};
const computeClassStrings = (classStrings, regenerateOptions, caseEqFlags, shouldApplySCF) => {
let data = getCharacterClassEmptyData();
for (const string of classStrings.strings) {
if (string.characters.length === 1) {
const codePoint = shouldApplySCF ? simpleCaseFolding(string.characters[0].codePoint) : string.characters[0].codePoint
concatCaseEquivalents(codePoint, caseEqFlags).forEach((cp) => {
data.singleChars.add(cp);
});
} else {
let stringifiedString = '';
if (caseEqFlags) {
for (const ch of string.characters) {
const codePoint = shouldApplySCF ? simpleCaseFolding(ch.codePoint) : ch.codePoint;
const set = regenerate(concatCaseEquivalents(codePoint, caseEqFlags));
stringifiedString += set.toString(regenerateOptions);
}
} else {
for (const ch of string.characters) {
const codePoint = shouldApplySCF ? simpleCaseFolding(ch.codePoint) : ch.codePoint;
if (codePoint !== ch.codePoint) {
stringifiedString += regenerate(codePoint).toString(regenerateOptions);
} else {
stringifiedString += generate(ch);
}
}
}
data.longStrings.add(stringifiedString);
data.maybeIncludesStrings = true;
}
}
return data;
}
const computeCharacterClass = (characterClassItem, regenerateOptions, shouldApplySCF) => {
let data = getCharacterClassEmptyData();
let handlePositive;
let handleNegative;
let caseEqFlags = configGetCaseEqFlags();
switch (characterClassItem.kind) {
case 'union':
handlePositive = buildHandler('union');
handleNegative = buildHandler('union-negative');
break;
case 'intersection':
handlePositive = buildHandler('intersection');
handleNegative = buildHandler('subtraction');
if (config.transform.unicodeSetsFlag) data.transformed = true;
if (config.isIgnoreCaseMode) {
shouldApplySCF = true;
}
break;
case 'subtraction':
handlePositive = buildHandler('subtraction');
handleNegative = buildHandler('intersection');
if (config.transform.unicodeSetsFlag) data.transformed = true;
if (config.isIgnoreCaseMode) {
shouldApplySCF = true;
}
break;
// The `default` clause is only here as a safeguard; it should never be
// reached. Code coverage tools should ignore it.
/* node:coverage ignore next */
default:
throw new Error(`Unknown character class kind: ${ characterClassItem.kind }`);
}
for (const item of characterClassItem.body) {
switch (item.type) {
case 'value':
const codePoint = shouldApplySCF ? simpleCaseFolding(item.codePoint) : item.codePoint;
const list = concatCaseEquivalents(codePoint, caseEqFlags);
handlePositive.regSet(data, regenerate(list));
if (list.length > 1) {
data.transformed = true;
}
break;
case 'characterClassRange':
const min = item.min.codePoint;
const max = item.max.codePoint;
if (shouldApplySCF) {
let list = [];
for (let cp = min; cp <= max; cp++) {
list.push(simpleCaseFolding(cp));
}
handlePositive.regSet(data, regenerate(list));
} else {
handlePositive.range(data, min, max);
}
if (caseEqFlags) {
// If shouldApplySCF is true, it is still ok to call iuRange because
// the set [min, max] shares the same case equivalents with scf([min, max])
handlePositive.iuRange(data, min, max, caseEqFlags);
data.transformed = true;
}
break;
case 'characterClassEscape':
handlePositive.regSet(data, getCharacterClassEscapeSet(
item.value,
config.flags.unicode || config.flags.unicodeSets,
config.flags.ignoreCase,
shouldApplySCF
));
break;
case 'unicodePropertyEscape':
const nestedData = getUnicodePropertyEscapeCharacterClassData(
item.value,
item.negative,
config.flags.unicodeSets && config.isIgnoreCaseMode,
shouldApplySCF
);
handlePositive.nested(data, nestedData);
data.transformed =
data.transformed ||
config.transform.unicodePropertyEscapes ||
(config.transform.unicodeSetsFlag && (nestedData.maybeIncludesStrings || characterClassItem.kind !== "union" || item.negative));
break;
case 'characterClass':
const handler = item.negative ? handleNegative : handlePositive;
const res = computeCharacterClass(item, regenerateOptions, shouldApplySCF);
handler.nested(data, res);
data.transformed = true;
break;
case 'classStrings':
handlePositive.nested(data, computeClassStrings(item, regenerateOptions, caseEqFlags, shouldApplySCF));
data.transformed = true;
break;
// The `default` clause is only here as a safeguard; it should never be
// reached. Code coverage tools should ignore it.
/* node:coverage ignore next */
default:
throw new Error(`Unknown term type: ${ item.type }`);
}
data.first = false;
}
if (characterClassItem.negative && data.maybeIncludesStrings) {
throw new SyntaxError('Cannot negate set containing strings');
}
return data;
}
const processCharacterClass = (
characterClassItem,
regenerateOptions,
computed = computeCharacterClass(characterClassItem, regenerateOptions)
) => {
const negative = characterClassItem.negative;
const { singleChars, transformed, longStrings } = computed;
if (transformed) {
// If single chars already contains some astral character, regenerate (bmpOnly: true) will create valid regex strings
const bmpOnly = regenerateContainsAstral(singleChars);
const setStr = singleChars.toString(Object.assign({}, regenerateOptions, { bmpOnly: bmpOnly }));
if (negative) {
if (config.useUnicodeFlag) {
update(characterClassItem, `[^${setStr[0] === '[' ? setStr.slice(1, -1) : setStr}]`)
} else {
if (config.flags.unicode || config.flags.unicodeSets) {
if (config.flags.ignoreCase) {
const astralCharsSet = singleChars.clone().intersection(ASTRAL_SET);
// Assumption: singleChars do not contain lone surrogates.
// Regex like /[^\ud800]/u is not supported
const surrogateOrBMPSetStr = singleChars
.clone()
.remove(astralCharsSet)
.addRange(0xd800, 0xdfff)
.toString({ bmpOnly: true });
// Don't generate negative lookahead for astral characters
// because the case folding is not working anyway as we break
// code points into surrogate pairs.
const astralNegativeSetStr = ASTRAL_SET
.clone()
.remove(astralCharsSet)
.toString(regenerateOptions);
// The transform here does not support lone surrogates.
update(
characterClassItem,
`(?!${surrogateOrBMPSetStr})[^]|${astralNegativeSetStr}`
);
} else {
// Generate negative set directly when case folding is not involved.
const negativeSet = UNICODE_SET.clone().remove(singleChars);
update(characterClassItem, negativeSet.toString(regenerateOptions));
}
} else {
update(characterClassItem, `(?!${setStr})[^]`);
}
}
} else {
const hasEmptyString = longStrings.has('');
const pieces = Array.from(longStrings).sort((a, b) => b.length - a.length);
if (setStr !== '[]' || longStrings.size === 0) {
pieces.splice(pieces.length - (hasEmptyString ? 1 : 0), 0, setStr);
}
update(characterClassItem, pieces.join('|'));
}
}
return characterClassItem;
};
const assertNoUnmatchedReferences = (groups) => {
const unmatchedReferencesNames = Object.keys(groups.unmatchedReferences);
if (unmatchedReferencesNames.length > 0) {
throw new Error(`Unknown group names: ${unmatchedReferencesNames}`);
}
};
const processModifiers = (item, regenerateOptions, groups) => {
const enabling = item.modifierFlags.enabling;
const disabling = item.modifierFlags.disabling;
const oldData = Object.assign({}, config.modifiersData);
for (const flag of enabling) {
config.modifiersData[flag] = true;
}
for (const flag of disabling) {
config.modifiersData[flag] = false;
}
if (config.transform.modifiers) {
delete item.modifierFlags;
item.behavior = 'ignore';
}
item.body = item.body.map(term => {
return processTerm(term, regenerateOptions, groups);
});
config.modifiersData = oldData;
return item;
}
const processTerm = (item, regenerateOptions, groups) => {
switch (item.type) {
case 'dot':
if (config.transform.unicodeFlag) {
update(
item,
getUnicodeDotSet(config.isDotAllMode).toString(regenerateOptions)
);
} else if ((config.modifiersData.s != null ? config.modifiersData.s && config.transform.modifiers : config.transform.dotAllFlag)) {
// TODO: consider changing this at the regenerate level.
update(item, '[^]');
}
break;
case 'characterClass':
item = processCharacterClass(item, regenerateOptions);
break;
case 'unicodePropertyEscape':
const data = getUnicodePropertyEscapeCharacterClassData(item.value, item.negative, config.flags.unicodeSets && config.isIgnoreCaseMode);
if (data.maybeIncludesStrings) {
if (!config.flags.unicodeSets) {
throw new Error(
'Properties of strings are only supported when using the unicodeSets (v) flag.'
);
}
if (config.transform.unicodeSetsFlag) {
data.transformed = true;
item = processCharacterClass(item, regenerateOptions, data);
}
} else if (config.transform.unicodePropertyEscapes || configGetCaseEqFlags()) {
update(
item,
data.singleChars.toString(regenerateOptions)
);
}
break;
case 'characterClassEscape':
if (config.transform.unicodeFlag) {
update(
item,
getCharacterClassEscapeSet(
item.value,
/* config.transform.unicodeFlag implies config.flags.unicode */ true,
config.flags.ignoreCase
).toString(regenerateOptions)
);
}
break;
case 'group':
if (item.behavior == 'normal') {
groups.lastIndex++;
}
if (item.name) {
const name = item.name.value;
if (groups.namesConflicts[name]) {
throw new Error(
`Group '${ name }' has already been defined in this context.`
);
}
groups.namesConflicts[name] = true;
if (config.transform.namedGroups) {
delete item.name;
}
const index = groups.lastIndex;
if (!groups.names[name]) {
groups.names[name] = [];
}
groups.names[name].push(index);
if (groups.onNamedGroup) {
groups.onNamedGroup.call(null, name, index);
}
if (groups.unmatchedReferences[name]) {
delete groups.unmatchedReferences[name];
}
}
if (item.modifierFlags) {
return processModifiers(item, regenerateOptions, groups);
}
/* falls through */
case 'quantifier':
item.body = item.body.map(term => {
return processTerm(term, regenerateOptions, groups);
});
break;
case 'disjunction':
const outerNamesConflicts = groups.namesConflicts;
item.body = item.body.map(term => {
groups.namesConflicts = Object.create(outerNamesConflicts);
return processTerm(term, regenerateOptions, groups);
});
break;
case 'alternative':
item.body = flatMap(item.body, term => {
const res = processTerm(term, regenerateOptions, groups);
// Alternatives cannot contain alternatives; flatten them.
return res.type === 'alternative' ? res.body : res;
});
break;
case 'value':
const codePoint = item.codePoint;
const caseEqFlags = configGetCaseEqFlags();
const list = concatCaseEquivalents(codePoint, caseEqFlags);
if (list.length === 1 && item.kind === "symbol" && codePoint >= 0x20 && codePoint <= 0x7E) {
// skip regenerate when it is a printable ASCII symbol
break;
}
const set = regenerate(list);
update(item, set.toString(regenerateOptions));
break;
case 'reference':
if (item.name) {
const name = item.name.value;
const indexes = groups.names[name];
if (!indexes) {
groups.unmatchedReferences[name] = true;
}
if (config.transform.namedGroups) {
if (indexes) {
const body = indexes.map(index => ({
'type': 'reference',
'matchIndex': index,
'raw': '\\' + index,
}));
if (body.length === 1) {
return body[0];
}
return {
'type': 'alternative',
'body': body,
'raw': body.map(term => term.raw).join(''),
};
}
// This named reference comes before the group where it’s defined,
// so it’s always an empty match.
return {
'type': 'group',
'behavior': 'ignore',
'body': [],
'raw': '(?:)',
};
}
}
break;
case 'anchor':
if (config.modifiersData.m && config.transform.modifiers) {
if (item.kind == 'start') {
update(item, `(?:^|(?<=${NEWLINE_SET.toString()}))`);
} else if (item.kind == 'end') {
update(item, `(?:$|(?=${NEWLINE_SET.toString()}))`);
}
}
case 'empty':
// Nothing to do here.
break;
// The `default` clause is only here as a safeguard; it should never be
// reached. Code coverage tools should ignore it.
/* node:coverage ignore next */
default:
throw new Error(`Unknown term type: ${ item.type }`);
}
return item;
};
const config = {
'flags': {
'ignoreCase': false,
'unicode': false,
'unicodeSets': false,
'dotAll': false,
'multiline': false,
},
'transform': {
'dotAllFlag': false,
'unicodeFlag': false,
'unicodeSetsFlag': false,
'unicodePropertyEscapes': false,
'namedGroups': false,
'modifiers': false,
},
'modifiersData': {
'i': undefined,
's': undefined,
'm': undefined,
},
get useUnicodeFlag() {
return (this.flags.unicode || this.flags.unicodeSets) && !this.transform.unicodeFlag;
},
get isDotAllMode() {
return (this.modifiersData.s !== undefined ? this.modifiersData.s : this.flags.dotAll);
},
get isIgnoreCaseMode() {
return (this.modifiersData.i !== undefined ? this.modifiersData.i : this.flags.ignoreCase);
}
};
const validateOptions = (options) => {
if (!options) return;
for (const key of Object.keys(options)) {
const value = options[key];
switch (key) {
case 'dotAllFlag':
case 'unicodeFlag':
case 'unicodePropertyEscapes':
case 'unicodeSetsFlag':
case 'namedGroups':
if (value != null && value !== false && value !== 'transform') {
throw new Error(`.${key} must be false (default) or 'transform'.`);
}
break;
// todo: remove modifiers: 'parse' in regexpu-core v7
case 'modifiers':
if (value != null && value !== false && value !== 'parse' && value !== 'transform') {
throw new Error(`.${key} must be false (default), 'parse' or 'transform'.`);
}
break;
case 'onNamedGroup':
case 'onNewFlags':
if (value != null && typeof value !== 'function') {
throw new Error(`.${key} must be a function.`);
}
break;
default:
throw new Error(`.${key} is not a valid regexpu-core option.`);
}
}
};
const hasFlag = (flags, flag) => flags ? flags.includes(flag) : false;
const transform = (options, name) => options ? options[name] === 'transform' : false;
const rewritePattern = (pattern, flags, options) => {
validateOptions(options);
config.flags.unicode = hasFlag(flags, 'u');
config.flags.unicodeSets = hasFlag(flags, 'v');
config.flags.ignoreCase = hasFlag(flags, 'i');
config.flags.dotAll = hasFlag(flags, 's');
config.flags.multiline = hasFlag(flags, 'm');
config.transform.dotAllFlag = config.flags.dotAll && transform(options, 'dotAllFlag');
config.transform.unicodeFlag = (config.flags.unicode || config.flags.unicodeSets) && transform(options, 'unicodeFlag');
config.transform.unicodeSetsFlag = config.flags.unicodeSets && transform(options, 'unicodeSetsFlag');
// unicodeFlag: 'transform' implies unicodePropertyEscapes: 'transform'
config.transform.unicodePropertyEscapes = (config.flags.unicode || config.flags.unicodeSets) && (
transform(options, 'unicodeFlag') || transform(options, 'unicodePropertyEscapes')
);
config.transform.namedGroups = transform(options, 'namedGroups');
config.transform.modifiers = transform(options, 'modifiers');
config.modifiersData.i = undefined;
config.modifiersData.s = undefined;
config.modifiersData.m = undefined;
const regjsparserFeatures = {
// Enable every stable RegExp feature by default
'modifiers': true,
'unicodePropertyEscape': true,
'unicodeSet': true,
'namedGroups': true,
'lookbehind': true,
};
const regenerateOptions = {
'hasUnicodeFlag': config.useUnicodeFlag,
'bmpOnly': !config.flags.unicode && !config.flags.unicodeSets
};
const groups = {
'onNamedGroup': options && options.onNamedGroup,
'lastIndex': 0,
'names': Object.create(null), // { [name]: Array }
'namesConflicts': Object.create(null), // { [name]: true }
'unmatchedReferences': Object.create(null) // { [name]: true }
};
const tree = parse(pattern, flags, regjsparserFeatures);
if (config.transform.modifiers) {
if (/\(\?[a-z]*-[a-z]+:/.test(pattern)) {
// the pattern _likely_ contain inline disabled modifiers
// we need to traverse to make sure that they are actually modifiers and to collect them
const allDisabledModifiers = Object.create(null)
const itemStack = [tree];
let node;
while (node = itemStack.pop(), node != undefined) {
if (Array.isArray(node)) {
Array.prototype.push.apply(itemStack, node);
} else if (typeof node == 'object' && node != null) {
for (const key of Object.keys(node)) {
const value = node[key];
if (key == 'modifierFlags') {
for (const flag of value.disabling) {
allDisabledModifiers[flag] = true;
}
} else if (typeof value == 'object' && value != null) {
itemStack.push(value);
}
}
}
}
if (allDisabledModifiers.i) {
config.modifiersData.i = config.flags.ignoreCase;
}
if (allDisabledModifiers.m) {
config.modifiersData.m = config.flags.multiline;
}
if (allDisabledModifiers.s) {
config.modifiersData.s = config.flags.dotAll;
}
}
}
// Note: `processTerm` mutates `tree` and `groups`.
processTerm(tree, regenerateOptions, groups);
assertNoUnmatchedReferences(groups);
const onNewFlags = options && options.onNewFlags;
if (onNewFlags) {
let newFlags = flags.split('').filter((flag) => !config.modifiersData[flag]).join('');
if (config.transform.unicodeSetsFlag) {
newFlags = newFlags.replace('v', 'u');
}
if (config.transform.unicodeFlag) {
newFlags = newFlags.replace('u', '');
}
if (config.transform.dotAllFlag) {
newFlags = newFlags.replace('s', '');
}
onNewFlags(newFlags);
}
return generate(tree);
};
module.exports = rewritePattern;
© 2015 - 2024 Weber Informatics LLC | Privacy Policy