ml-modules.root.data-hub.third-party.fast-xml-parser.src.validator.js Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of marklogic-data-hub Show documentation
Show all versions of marklogic-data-hub Show documentation
Library for Creating an Operational Data Hub on MarkLogic
'use strict';
const util = require('./util');
const defaultOptions = {
allowBooleanAttributes: false, //A tag can have attributes without any value
};
const props = ['allowBooleanAttributes'];
//const tagsPattern = new RegExp("<\\/?([\\w:\\-_\.]+)\\s*\/?>","g");
exports.validate = function (xmlData, options) {
options = util.buildOptions(options, defaultOptions, props);
//xmlData = xmlData.replace(/(\r\n|\n|\r)/gm,"");//make it single line
//xmlData = xmlData.replace(/(^\s*<\?xml.*?\?>)/g,"");//Remove XML starting tag
//xmlData = xmlData.replace(/()/g,"");//Remove DOCTYPE
const tags = [];
let tagFound = false;
//indicates that the root tag has been closed (aka. depth 0 has been reached)
let reachedRoot = false;
if (xmlData[0] === '\ufeff') {
// check for byte order mark (BOM)
xmlData = xmlData.substr(1);
}
for (let i = 0; i < xmlData.length; i++) {
if (xmlData[i] === '<') {
//starting of tag
//read until you reach to '>' avoiding any '>' in attribute value
i++;
if (xmlData[i] === '?') {
i = readPI(xmlData, ++i);
if (i.err) {
return i;
}
} else if (xmlData[i] === '!') {
i = readCommentAndCDATA(xmlData, i);
continue;
} else {
let closingTag = false;
if (xmlData[i] === '/') {
//closing tag
closingTag = true;
i++;
}
//read tagname
let tagName = '';
for (; i < xmlData.length &&
xmlData[i] !== '>' &&
xmlData[i] !== ' ' &&
xmlData[i] !== '\t' &&
xmlData[i] !== '\n' &&
xmlData[i] !== '\r'; i++
) {
tagName += xmlData[i];
}
tagName = tagName.trim();
//console.log(tagName);
if (tagName[tagName.length - 1] === '/') {
//self closing tag without attributes
tagName = tagName.substring(0, tagName.length - 1);
//continue;
i--;
}
if (!validateTagName(tagName)) {
let msg;
if (tagName.trim().length === 0) {
msg = "There is an unnecessary space between tag name and backward slash ' ..'.";
} else {
msg = "Tag '"+tagName+"' is an invalid name.";
}
return getErrorObject('InvalidTag', msg, getLineNumberForPosition(xmlData, i));
}
const result = readAttributeStr(xmlData, i);
if (result === false) {
return getErrorObject('InvalidAttr', "Attributes for '"+tagName+"' have open quote.", getLineNumberForPosition(xmlData, i));
}
let attrStr = result.value;
i = result.index;
if (attrStr[attrStr.length - 1] === '/') {
//self closing tag
attrStr = attrStr.substring(0, attrStr.length - 1);
const isValid = validateAttributeString(attrStr, options);
if (isValid === true) {
tagFound = true;
//continue; //text may presents after self closing tag
} else {
//the result from the nested function returns the position of the error within the attribute
//in order to get the 'true' error line, we need to calculate the position where the attribute begins (i - attrStr.length) and then add the position within the attribute
//this gives us the absolute index in the entire xml, which we can use to find the line at last
return getErrorObject(isValid.err.code, isValid.err.msg, getLineNumberForPosition(xmlData, i - attrStr.length + isValid.err.line));
}
} else if (closingTag) {
if (!result.tagClosed) {
return getErrorObject('InvalidTag', "Closing tag '"+tagName+"' doesn't have proper closing.", getLineNumberForPosition(xmlData, i));
} else if (attrStr.trim().length > 0) {
return getErrorObject('InvalidTag', "Closing tag '"+tagName+"' can't have attributes or invalid starting.", getLineNumberForPosition(xmlData, i));
} else {
const otg = tags.pop();
if (tagName !== otg) {
return getErrorObject('InvalidTag', "Closing tag '"+otg+"' is expected inplace of '"+tagName+"'.", getLineNumberForPosition(xmlData, i));
}
//when there are no more tags, we reached the root level.
if (tags.length == 0) {
reachedRoot = true;
}
}
} else {
const isValid = validateAttributeString(attrStr, options);
if (isValid !== true) {
//the result from the nested function returns the position of the error within the attribute
//in order to get the 'true' error line, we need to calculate the position where the attribute begins (i - attrStr.length) and then add the position within the attribute
//this gives us the absolute index in the entire xml, which we can use to find the line at last
return getErrorObject(isValid.err.code, isValid.err.msg, getLineNumberForPosition(xmlData, i - attrStr.length + isValid.err.line));
}
//if the root level has been reached before ...
if (reachedRoot === true) {
return getErrorObject('InvalidXml', 'Multiple possible root nodes found.', getLineNumberForPosition(xmlData, i));
} else {
tags.push(tagName);
}
tagFound = true;
}
//skip tag text value
//It may include comments and CDATA value
for (i++; i < xmlData.length; i++) {
if (xmlData[i] === '<') {
if (xmlData[i + 1] === '!') {
//comment or CADATA
i++;
i = readCommentAndCDATA(xmlData, i);
continue;
} else {
break;
}
} else if (xmlData[i] === '&') {
const afterAmp = validateAmpersand(xmlData, i);
if (afterAmp == -1)
return getErrorObject('InvalidChar', "char '&' is not expected.", getLineNumberForPosition(xmlData, i));
i = afterAmp;
}
} //end of reading tag text value
if (xmlData[i] === '<') {
i--;
}
}
} else {
if (xmlData[i] === ' ' || xmlData[i] === '\t' || xmlData[i] === '\n' || xmlData[i] === '\r') {
continue;
}
return getErrorObject('InvalidChar', "char '"+xmlData[i]+"' is not expected.", getLineNumberForPosition(xmlData, i));
}
}
if (!tagFound) {
return getErrorObject('InvalidXml', 'Start tag expected.', 1);
} else if (tags.length > 0) {
return getErrorObject('InvalidXml', "Invalid '"+JSON.stringify(tags, null, 4).replace(/\r?\n/g, '')+"' found.", 1);
}
return true;
};
/**
* Read Processing insstructions and skip
* @param {*} xmlData
* @param {*} i
*/
function readPI(xmlData, i) {
var start = i;
for (; i < xmlData.length; i++) {
if (xmlData[i] == '?' || xmlData[i] == ' ') {
//tagname
var tagname = xmlData.substr(start, i - start);
if (i > 5 && tagname === 'xml') {
return getErrorObject('InvalidXml', 'XML declaration allowed only at the start of the document.', getLineNumberForPosition(xmlData, i));
} else if (xmlData[i] == '?' && xmlData[i + 1] == '>') {
//check if valid attribut string
i++;
break;
} else {
continue;
}
}
}
return i;
}
function readCommentAndCDATA(xmlData, i) {
if (xmlData.length > i + 5 && xmlData[i + 1] === '-' && xmlData[i + 2] === '-') {
//comment
for (i += 3; i < xmlData.length; i++) {
if (xmlData[i] === '-' && xmlData[i + 1] === '-' && xmlData[i + 2] === '>') {
i += 2;
break;
}
}
} else if (
xmlData.length > i + 8 &&
xmlData[i + 1] === 'D' &&
xmlData[i + 2] === 'O' &&
xmlData[i + 3] === 'C' &&
xmlData[i + 4] === 'T' &&
xmlData[i + 5] === 'Y' &&
xmlData[i + 6] === 'P' &&
xmlData[i + 7] === 'E'
) {
let angleBracketsCount = 1;
for (i += 8; i < xmlData.length; i++) {
if (xmlData[i] === '<') {
angleBracketsCount++;
} else if (xmlData[i] === '>') {
angleBracketsCount--;
if (angleBracketsCount === 0) {
break;
}
}
}
} else if (
xmlData.length > i + 9 &&
xmlData[i + 1] === '[' &&
xmlData[i + 2] === 'C' &&
xmlData[i + 3] === 'D' &&
xmlData[i + 4] === 'A' &&
xmlData[i + 5] === 'T' &&
xmlData[i + 6] === 'A' &&
xmlData[i + 7] === '['
) {
for (i += 8; i < xmlData.length; i++) {
if (xmlData[i] === ']' && xmlData[i + 1] === ']' && xmlData[i + 2] === '>') {
i += 2;
break;
}
}
}
return i;
}
var doubleQuote = '"';
var singleQuote = "'";
/**
* Keep reading xmlData until '<' is found outside the attribute value.
* @param {string} xmlData
* @param {number} i
*/
function readAttributeStr(xmlData, i) {
let attrStr = '';
let startChar = '';
let tagClosed = false;
for (; i < xmlData.length; i++) {
if (xmlData[i] === doubleQuote || xmlData[i] === singleQuote) {
if (startChar === '') {
startChar = xmlData[i];
} else if (startChar !== xmlData[i]) {
//if vaue is enclosed with double quote then single quotes are allowed inside the value and vice versa
continue;
} else {
startChar = '';
}
} else if (xmlData[i] === '>') {
if (startChar === '') {
tagClosed = true;
break;
}
}
attrStr += xmlData[i];
}
if (startChar !== '') {
return false;
}
return {
value: attrStr,
index: i,
tagClosed: tagClosed
};
}
/**
* Select all the attributes whether valid or invalid.
*/
const validAttrStrRegxp = new RegExp('(\\s*)([^\\s=]+)(\\s*=)?(\\s*([\'"])(([\\s\\S])*?)\\5)?', 'g');
//attr, ="sd", a="amit's", a="sd"b="saf", ab cd=""
function validateAttributeString(attrStr, options) {
//console.log("start:"+attrStr+":end");
//if(attrStr.trim().length === 0) return true; //empty string
const matches = util.getAllMatches(attrStr, validAttrStrRegxp);
const attrNames = {};
for (let i = 0; i < matches.length; i++) {
if (matches[i][1].length === 0) {
//nospace before attribute name: a="sd"b="saf"
return getErrorObject('InvalidAttr', "Attribute '"+matches[i][2]+"' has no space in starting.", getPositionFromMatch(attrStr, matches[i][0]))
} else if (matches[i][3] === undefined && !options.allowBooleanAttributes) {
//independent attribute: ab
return getErrorObject('InvalidAttr', "boolean attribute '"+matches[i][2]+"' is not allowed.", getPositionFromMatch(attrStr, matches[i][0]));
}
/* else if(matches[i][6] === undefined){//attribute without value: ab=
return { err: { code:"InvalidAttr",msg:"attribute " + matches[i][2] + " has no value assigned."}};
} */
const attrName = matches[i][2];
if (!validateAttrName(attrName)) {
return getErrorObject('InvalidAttr', "Attribute '"+attrName+"' is an invalid name.", getPositionFromMatch(attrStr, matches[i][0]));
}
if (!attrNames.hasOwnProperty(attrName)) {
//check for duplicate attribute.
attrNames[attrName] = 1;
} else {
return getErrorObject('InvalidAttr', "Attribute '"+attrName+"' is repeated.", getPositionFromMatch(attrStr, matches[i][0]));
}
}
return true;
}
function validateNumberAmpersand(xmlData, i) {
let re = /\d/;
if (xmlData[i] === 'x') {
i++;
re = /[\da-fA-F]/;
}
for (; i < xmlData.length; i++) {
if (xmlData[i] === ';')
return i;
if (!xmlData[i].match(re))
break;
}
return -1;
}
function validateAmpersand(xmlData, i) {
// https://www.w3.org/TR/xml/#dt-charref
i++;
if (xmlData[i] === ';')
return -1;
if (xmlData[i] === '#') {
i++;
return validateNumberAmpersand(xmlData, i);
}
let count = 0;
for (; i < xmlData.length; i++, count++) {
if (xmlData[i].match(/\w/) && count < 20)
continue;
if (xmlData[i] === ';')
break;
return -1;
}
return i;
}
function getErrorObject(code, message, lineNumber) {
return {
err: {
code: code,
msg: message,
line: lineNumber,
},
};
}
function validateAttrName(attrName) {
return util.isName(attrName);
}
// const startsWithXML = /^xml/i;
function validateTagName(tagname) {
return util.isName(tagname) /* && !tagname.match(startsWithXML) */;
}
//this function returns the line number for the character at the given index
function getLineNumberForPosition(xmlData, index) {
var lines = xmlData.substring(0, index).split(/\r?\n/);
return lines.length;
}
//this function returns the position of the last character of match within attrStr
function getPositionFromMatch(attrStr, match) {
return attrStr.indexOf(match) + match.length;
}
© 2015 - 2024 Weber Informatics LLC | Privacy Policy