Please wait. This can take some minutes ...
Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance.
Project price only 1 $
You can buy this project and download/modify it how often you want.
org.owasp.html.HtmlElementTables Maven / Gradle / Ivy
package org.owasp.html;
import java.util.Arrays;
import java.util.Comparator;
import java.util.List;
import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
/**
* Metadata about HTML elements.
*/
public final class HtmlElementTables {
/** Pseudo element index for text nodes. */
public static final int TEXT_NODE = -1;
/** Maps between element indices and element names. */
private final HtmlElementNames elementNames;
/** Relates elements and the elements that can contain them. */
private final DenseElementBinaryMatrix canContain;
/**
* Relates element names and the elements that are closed when that close
* tag appears.
*/
private final DenseElementBinaryMatrix closedOnClose;
/**
* Relates element names and the elements that are closed when that open
* tag appears.
*/
private final DenseElementBinaryMatrix closedOnOpen;
/**
* Close tags besides the tag itself which close the tag.
*/
private final SparseElementToElements explicitClosers;
/**
* Elements in order which are implicitly opened when a descendant tag is
* lexically nested within an ancestor.
*/
private final SparseElementMultitable impliedElements;
/**
* The kind of character data that can appear in an element.
*/
private final TextContentModel textContentModel;
/** The elements that can be resumed after misnested inline tags. */
private final DenseElementSet resumable;
private final int DIR_TAG;
private final int OL_TAG;
private final int UL_TAG;
private final int LI_TAG;
private final int SELECT_TAG;
private final int OPTION_TAG;
private final int OPTGROUP_TAG;
private final int SCRIPT_TAG;
private final int STYLE_TAG;
private final int TABLE_TAG;
private final int TBODY_TAG;
private final int TFOOT_TAG;
private final int THEAD_TAG;
private final int TR_TAG;
private final int TD_TAG;
private final int TH_TAG;
private final int CAPTION_TAG;
private final int COL_TAG;
private final int COLGROUP_TAG;
private final int IFRAME_TAG;
private final FreeWrapper[] FREE_WRAPPERS;
private final int[] LI_TAG_ARR;
private final int[] OPTION_TAG_ARR;
/** {@code }, {@code }, etc. */
private final DenseElementSet nofeatureElements;
/** */
public HtmlElementTables(
HtmlElementNames elementNames,
DenseElementBinaryMatrix canContain,
DenseElementBinaryMatrix closedOnClose,
DenseElementBinaryMatrix closedOnOpen,
SparseElementToElements explicitClosers,
SparseElementMultitable impliedElements,
TextContentModel textContentModel,
DenseElementSet resumable
) {
this.elementNames = elementNames;
this.canContain = canContain;
this.closedOnClose = closedOnClose;
this.closedOnOpen = closedOnOpen;
this.explicitClosers = explicitClosers;
this.impliedElements = impliedElements;
this.textContentModel = textContentModel;
this.resumable = resumable;
// Most of the information above is extracted by interrogating a browser
// via html-containment.html
// That does a good job of extracting relationships between elements.
// It doesn't do such a good job with understanding scoping relationships
// between elements, so we hard-code some tables needed to allow embedding
// regardless of element scoping relationships that are extracted from the
// HTML 5 spec.
DIR_TAG = indexForName("dir");
OL_TAG = indexForName("ol");
UL_TAG = indexForName("ul");
LI_TAG = indexForName("li");
SELECT_TAG = indexForName("select");
OPTION_TAG = indexForName("option");
OPTGROUP_TAG = indexForName("opgroup");
SCRIPT_TAG = indexForName("script");
STYLE_TAG = indexForName("style");
TABLE_TAG = indexForName("table");
TBODY_TAG = indexForName("tbody");
TFOOT_TAG = indexForName("tfoot");
THEAD_TAG = indexForName("thead");
TR_TAG = indexForName("tr");
TD_TAG = indexForName("td");
TH_TAG = indexForName("th");
CAPTION_TAG = indexForName("caption");
COL_TAG = indexForName("col");
COLGROUP_TAG = indexForName("colgroup");
IFRAME_TAG = indexForName("iframe");
ImmutableList freeWrappers = ImmutableList.of(
new FreeWrapper(
LI_TAG,
// LI_TAG is allowed here since an LI can appear when an LI is on
// top of the stack. It will be popped and the new LI will be
// opened.
new int[] { DIR_TAG, OL_TAG, UL_TAG, LI_TAG },
new int[] { UL_TAG }),
new FreeWrapper(
OPTION_TAG, new int[] { SELECT_TAG, OPTGROUP_TAG, OPTION_TAG },
new int[] { SELECT_TAG }),
new FreeWrapper(
OPTGROUP_TAG, new int[] { SELECT_TAG, OPTGROUP_TAG },
new int[] { SELECT_TAG }),
new FreeWrapper(
TD_TAG, new int[] { TR_TAG, TD_TAG, TH_TAG },
new int[] { TABLE_TAG, TBODY_TAG, TR_TAG }),
new FreeWrapper(
TH_TAG, new int[] { TR_TAG, TD_TAG, TH_TAG },
new int[] { TABLE_TAG, TBODY_TAG, TR_TAG }),
new FreeWrapper(
TR_TAG, new int[] { TBODY_TAG, THEAD_TAG, TFOOT_TAG, TR_TAG, TD_TAG, TH_TAG },
new int[] { TABLE_TAG, TBODY_TAG }),
new FreeWrapper(
TBODY_TAG, new int[] { TABLE_TAG, THEAD_TAG, TBODY_TAG, TFOOT_TAG },
new int[] { TABLE_TAG }),
new FreeWrapper(
THEAD_TAG, new int[] { TABLE_TAG, THEAD_TAG, TBODY_TAG, TFOOT_TAG },
new int[] { TABLE_TAG }),
new FreeWrapper(
TFOOT_TAG, new int[] { TABLE_TAG, THEAD_TAG, TBODY_TAG, TFOOT_TAG },
new int[] { TABLE_TAG }),
new FreeWrapper(
CAPTION_TAG,
new int[] { TABLE_TAG },
new int[] { TABLE_TAG }),
new FreeWrapper(
COL_TAG, new int[] { COLGROUP_TAG },
new int[] { TABLE_TAG, COLGROUP_TAG }),
new FreeWrapper(
COLGROUP_TAG, new int[] { TABLE_TAG }, new int[] { TABLE_TAG })
);
int maxDescIdx = -1;
for (FreeWrapper freeWrapper : freeWrappers) {
maxDescIdx = Math.max(freeWrapper.desc, maxDescIdx);
}
FreeWrapper[] freeWrapperArr = new FreeWrapper[maxDescIdx + 1];
for (FreeWrapper freeWrapper : freeWrappers) {
freeWrapperArr[freeWrapper.desc] = freeWrapper;
}
FREE_WRAPPERS = freeWrapperArr;
LI_TAG_ARR = new int[] { LI_TAG };
OPTION_TAG_ARR = new int[] { OPTION_TAG };
boolean[] nofeatureBits = new boolean[this.nElementTypes()];
nofeatureBits[indexForName("noscript")] =
nofeatureBits[indexForName("noframes")] =
nofeatureBits[indexForName("noembed")] = true;
this.nofeatureElements = new DenseElementSet(nofeatureBits);
}
/** True if parent can directly contain child. */
public boolean canContain(int parent, int child) {
if (nofeatureElements.get(parent)) {
// It's hard to interrogate a browser about the behavior of
// in scriptless mode using JavaScript, and the
// behavior of is more dangerous when in that mode,
// so we hardcode that mode here as a worst case assumption.
return true;
}
return child == TEXT_NODE
? canContainText(parent)
: canContain.get(parent, child);
}
/** The element index for the element with the given name. */
public int indexForName(String canonName) {
return elementNames.getElementNameIndex(canonName);
}
/** The element index for the element with the given name. */
public String canonNameForIndex(int index) {
return elementNames.canonNames.get(index);
}
/** The elements that can be resumed after misnested inline tags. */
public boolean resumable(int index) {
return resumable.get(index);
}
/**
* Whether parsing can produce an element with the given index that contains
* a text node.
*/
public boolean canContainText(int index) {
return textContentModel.canContainText(index);
}
/**
* Whether parsing can produce an element with the given index that contains
* a text node that has human readable text instead of script or style
* source code.
*/
public boolean canContainPlainText(int index) {
return textContentModel.canContainPlainText(index)
// The iframe's content is specified in very odd ways
// https://dev.w3.org/html5/pf-summary/Overview.html#the-iframe-element
// """
// When used in HTML documents, the allowed content model of iframe
// elements is text, except that invoking the HTML fragment parsing
// algorithm with the iframe element as the context element and the text
// contents as the input must result in a list of nodes that are all
// phrasing content, with no parse errors having occurred, with no
// script elements being anywhere in the list or as descendants of
// elements in the list, and with all the elements in the list
// (including their descendants) being themselves conforming.
//
// The iframe element must be empty in XML documents.
// """
//
// The iframe can contain text so canContain is true, but the text is
// not freeform plain text. The latter has the effect of making
// HtmlPolicyBuilder built policies disallow text inside iframe
// elements.
&& index != IFRAME_TAG
;
}
boolean canContainComment(int ix) {
return textContentModel.canContainComment(ix);
}
boolean canContainCharacterReference(int ix) {
return textContentModel.canContainEntities(ix);
}
boolean isTextContentRaw(int ix) {
return textContentModel.isRaw(ix);
}
boolean isUnended(int ix) {
return textContentModel.isUnended(ix);
}
boolean isAlternateCloserFor(int closeTag, int openElement) {
return explicitClosers.get(openElement, closeTag);
}
boolean closedOnOpen(int alreadyOpenElement, int openTag) {
return closedOnOpen.get(alreadyOpenElement, openTag);
}
boolean closedOnClose(int alreadyOpenElement, int closeTag) {
return closedOnClose.get(alreadyOpenElement, closeTag);
}
/**
* The number of element types which is also the exclusive upper bound on
* element indices.
*/
public int nElementTypes() {
return elementNames.canonNames.size();
}
private static final class FreeWrapper {
final int desc;
final boolean[] allowedContainers;
final int[] implied;
FreeWrapper(int desc, int[] allowedContainers, int[] implied) {
this.desc = desc;
int maxAllowedContainer = -1;
for (int allowedContainer : allowedContainers) {
maxAllowedContainer = Math.max(maxAllowedContainer, allowedContainer);
}
this.allowedContainers = new boolean[maxAllowedContainer + 1];
for (int allowedContainer : allowedContainers) {
this.allowedContainers[allowedContainer] = true;
}
this.implied = implied;
}
}
static final int[] ZERO_INTS = {};
/**
* Elements in order which are implicitly opened when a descendant tag is
* lexically nested within an ancestor.
*/
int[] impliedElements(int anc, int desc) {
//