com.intellij.lang.html.HtmlParsing Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of xml-psi-impl Show documentation
Show all versions of xml-psi-impl Show documentation
A packaging of the IntelliJ Community Edition xml-psi-impl library.
This is release number 1 of trunk branch 142.
The newest version!
/*
* Copyright 2000-2009 JetBrains s.r.o.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* @author max
*/
package com.intellij.lang.html;
import com.intellij.codeInsight.completion.CompletionUtilCore;
import com.intellij.codeInsight.daemon.XmlErrorMessages;
import com.intellij.lang.PsiBuilder;
import com.intellij.openapi.util.text.StringUtil;
import com.intellij.psi.tree.CustomParsingType;
import com.intellij.psi.tree.IElementType;
import com.intellij.psi.tree.ILazyParseableElementType;
import com.intellij.psi.xml.XmlElementType;
import com.intellij.psi.xml.XmlTokenType;
import com.intellij.util.containers.Stack;
import com.intellij.xml.util.HtmlUtil;
import org.jetbrains.annotations.NonNls;
import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.Nullable;
public class HtmlParsing {
@NonNls private static final String TR_TAG = "tr";
@NonNls private static final String TD_TAG = "td";
@NonNls private static final String TH_TAG = "th";
@NonNls private static final String TABLE_TAG = "table";
private final PsiBuilder myBuilder;
private final Stack myTagNamesStack = new Stack();
private final Stack myOriginalTagNamesStack = new Stack();
private final Stack myTagMarkersStack = new Stack();
@NonNls private static final String COMPLETION_NAME = CompletionUtilCore.DUMMY_IDENTIFIER_TRIMMED.toLowerCase();
public HtmlParsing(final PsiBuilder builder) {
myBuilder = builder;
}
public void parseDocument() {
final PsiBuilder.Marker document = mark();
while (token() == XmlTokenType.XML_COMMENT_START) {
parseComment();
}
parseProlog();
PsiBuilder.Marker error = null;
while (!eof()) {
final IElementType tt = token();
if (tt == XmlTokenType.XML_START_TAG_START) {
error = flushError(error);
parseTag();
myTagMarkersStack.clear();
myTagNamesStack.clear();
}
else if (tt == XmlTokenType.XML_COMMENT_START) {
error = flushError(error);
parseComment();
}
else if (tt == XmlTokenType.XML_PI_START) {
error = flushError(error);
parseProcessingInstruction();
}
else if (tt == XmlTokenType.XML_CHAR_ENTITY_REF || tt == XmlTokenType.XML_ENTITY_REF_TOKEN) {
parseReference();
}
else if (tt == XmlTokenType.XML_REAL_WHITE_SPACE || tt == XmlTokenType.XML_DATA_CHARACTERS) {
error = flushError(error);
advance();
} else if (tt == XmlTokenType.XML_END_TAG_START) {
final PsiBuilder.Marker tagEndError = myBuilder.mark();
advance();
if (token() == XmlTokenType.XML_NAME) {
advance();
if (token() == XmlTokenType.XML_TAG_END) {
advance();
}
}
tagEndError.error(XmlErrorMessages.message("xml.parsing.closing.tag.matches.nothing"));
}
else {
if (error == null) error = mark();
advance();
}
}
if (error != null) {
error.error(XmlErrorMessages.message("top.level.element.is.not.completed"));
}
document.done(XmlElementType.HTML_DOCUMENT);
}
@Nullable
private static PsiBuilder.Marker flushError(PsiBuilder.Marker error) {
if (error != null) {
error.error(XmlErrorMessages.message("xml.parsing.unexpected.tokens"));
error = null;
}
return error;
}
private void parseDoctype() {
assert token() == XmlTokenType.XML_DOCTYPE_START : "Doctype start expected";
final PsiBuilder.Marker doctype = mark();
advance();
while (token() != XmlTokenType.XML_DOCTYPE_END && !eof()) advance();
if (eof()) {
error(XmlErrorMessages.message("xml.parsing.unexpected.end.of.file"));
}
else {
advance();
}
doctype.done(XmlElementType.XML_DOCTYPE);
}
private void parseTag() {
assert token() == XmlTokenType.XML_START_TAG_START : "Tag start expected";
String originalTagName;
PsiBuilder.Marker xmlText = null;
while (!eof()) {
final IElementType tt = token();
if (tt == XmlTokenType.XML_START_TAG_START) {
xmlText = terminateText(xmlText);
final PsiBuilder.Marker tag = mark();
// Start tag header
advance();
if (token() != XmlTokenType.XML_NAME) {
error(XmlErrorMessages.message("xml.parsing.tag.name.expected"));
originalTagName = "";
}
else {
originalTagName = myBuilder.getTokenText();
advance();
}
String tagName = StringUtil.toLowerCase(originalTagName);
while (childTerminatesParentInStack(tagName)) {
PsiBuilder.Marker top = closeTag();
top.doneBefore(XmlElementType.HTML_TAG, tag);
}
myTagMarkersStack.push(tag);
myTagNamesStack.push(tagName);
myOriginalTagNamesStack.push(originalTagName);
parseHeader(tagName);
if (token() == XmlTokenType.XML_EMPTY_ELEMENT_END) {
advance();
doneTag(tag);
continue;
}
if (token() == XmlTokenType.XML_TAG_END) {
advance();
}
else {
error(XmlErrorMessages.message("tag.start.is.not.closed"));
doneTag(tag);
continue;
}
if (HtmlUtil.isSingleHtmlTagL(tagName)) {
final PsiBuilder.Marker footer = mark();
if (token() == XmlTokenType.XML_END_TAG_START) {
advance();
if (token() == XmlTokenType.XML_NAME) {
if (tagName.equalsIgnoreCase(myBuilder.getTokenText())) {
advance();
footer.drop();
if (token() == XmlTokenType.XML_TAG_END) {
advance();
}
doneTag(tag);
continue;
}
}
}
footer.rollbackTo();
doneTag(tag);
}
}
else if (tt == XmlTokenType.XML_PI_START) {
xmlText = terminateText(xmlText);
parseProcessingInstruction();
}
else if (tt == XmlTokenType.XML_ENTITY_REF_TOKEN) {
xmlText = terminateText(xmlText);
parseReference();
}
else if (tt == XmlTokenType.XML_CHAR_ENTITY_REF) {
xmlText = startText(xmlText);
parseReference();
}
else if (tt == XmlTokenType.XML_CDATA_START) {
xmlText = startText(xmlText);
parseCData();
}
else if (tt == XmlTokenType.XML_COMMENT_START) {
xmlText = startText(xmlText);
parseComment();
}
else if (tt == XmlTokenType.XML_BAD_CHARACTER) {
xmlText = startText(xmlText);
final PsiBuilder.Marker error = mark();
advance();
error.error(XmlErrorMessages.message("unescaped.ampersand.or.nonterminated.character.entity.reference"));
}
else if (tt instanceof CustomParsingType || tt instanceof ILazyParseableElementType) {
xmlText = terminateText(xmlText);
advance();
}
else if (token() == XmlTokenType.XML_END_TAG_START) {
xmlText = terminateText(xmlText);
final PsiBuilder.Marker footer = mark();
advance();
if (token() == XmlTokenType.XML_NAME) {
String endName = StringUtil.toLowerCase(myBuilder.getTokenText());
final String parentTagName = !myTagNamesStack.isEmpty() ? myTagNamesStack.peek() : "";
if (!parentTagName.equals(endName) && !endName.endsWith(COMPLETION_NAME)) {
final boolean isOptionalTagEnd = HtmlUtil.isOptionalEndForHtmlTagL(parentTagName);
final boolean hasChancesToMatch = HtmlUtil.isOptionalEndForHtmlTagL(endName) ? childTerminatesParentInStack(endName) : myTagNamesStack.contains(endName);
if (hasChancesToMatch) {
footer.rollbackTo();
if (isOptionalTagEnd) {
doneTag(myTagMarkersStack.peek());
}
else {
error(XmlErrorMessages.message("named.element.is.not.closed", myOriginalTagNamesStack.peek()));
doneTag(myTagMarkersStack.peek());
}
continue;
}
else {
advance();
if (token() == XmlTokenType.XML_TAG_END) advance();
footer.error(XmlErrorMessages.message("xml.parsing.closing.tag.matches.nothing"));
continue;
}
}
advance();
while (token() != XmlTokenType.XML_TAG_END && token() != XmlTokenType.XML_START_TAG_START && token() != XmlTokenType.XML_END_TAG_START && !eof()) {
error(XmlErrorMessages.message("xml.parsing.unexpected.token"));
advance();
}
}
else {
error(XmlErrorMessages.message("xml.parsing.closing.tag.name.missing"));
}
footer.drop();
if (token() == XmlTokenType.XML_TAG_END) {
advance();
}
else {
error(XmlErrorMessages.message("xml.parsing.closing.tag.is.not.done"));
}
if (hasTags()) doneTag(myTagMarkersStack.peek());
} else if ((token() == XmlTokenType.XML_REAL_WHITE_SPACE || token() == XmlTokenType.XML_DATA_CHARACTERS) && !hasTags()) {
xmlText = terminateText(xmlText);
advance();
} else {
xmlText = startText(xmlText);
advance();
}
}
terminateText(xmlText);
while (hasTags()) {
final String tagName = myTagNamesStack.peek();
if (!HtmlUtil.isOptionalEndForHtmlTagL(tagName) && !"html".equals(tagName) && !"body".equals(tagName)) {
error(XmlErrorMessages.message("named.element.is.not.closed", myOriginalTagNamesStack.peek()));
}
doneTag(myTagMarkersStack.peek());
}
}
private boolean hasTags() {
return !myTagNamesStack.isEmpty();
}
private PsiBuilder.Marker closeTag() {
myTagNamesStack.pop();
myOriginalTagNamesStack.pop();
return myTagMarkersStack.pop();
}
private void doneTag(PsiBuilder.Marker tag) {
tag.done(XmlElementType.HTML_TAG);
final String tagName = myTagNamesStack.peek();
closeTag();
final String parentTagName = hasTags() ? myTagNamesStack.peek() : "";
boolean isInlineTagContainer = HtmlUtil.isInlineTagContainerL(parentTagName);
boolean isOptionalTagEnd = HtmlUtil.isOptionalEndForHtmlTagL(parentTagName);
if (isInlineTagContainer && HtmlUtil.isHtmlBlockTagL(tagName) && isOptionalTagEnd && !HtmlUtil.isPossiblyInlineTag(tagName)) {
PsiBuilder.Marker top = closeTag();
top.doneBefore(XmlElementType.HTML_TAG, tag);
}
}
private void parseHeader(String tagName) {
boolean freeMakerTag = !tagName.isEmpty() && '#' == tagName.charAt(0);
do {
final IElementType tt = token();
if (freeMakerTag) {
if (tt == XmlTokenType.XML_EMPTY_ELEMENT_END ||
tt == XmlTokenType.XML_TAG_END ||
tt == XmlTokenType.XML_END_TAG_START ||
tt == XmlTokenType.XML_START_TAG_START) break;
advance();
}
else {
if (tt == XmlTokenType.XML_NAME) {
parseAttribute();
}
else if (tt == XmlTokenType.XML_CHAR_ENTITY_REF || tt == XmlTokenType.XML_ENTITY_REF_TOKEN) {
parseReference();
}
else {
break;
}
}
}
while (!eof());
}
private boolean childTerminatesParentInStack(final String childName) {
boolean isCell = TD_TAG.equals(childName) || TH_TAG.equals(childName);
boolean isRow = TR_TAG.equals(childName);
boolean isStructure = isStructure(childName);
for (int i = myTagNamesStack.size() - 1; i >= 0; i--) {
String parentName = myTagNamesStack.get(i);
final boolean isParentTable = TABLE_TAG.equals(parentName);
final boolean isParentStructure = isStructure(parentName);
if (isCell && (TR_TAG.equals(parentName) || isParentStructure || isParentTable) ||
isRow && (isParentStructure || isParentTable) ||
isStructure && isParentTable) {
return false;
}
if ("li".equals(childName) && ("ul".equals(parentName) || "ol".equals(parentName))) {
return false;
}
if (HtmlUtil.canTerminate(childName, parentName)) {
return true;
}
}
return false;
}
private static boolean isStructure(String childName) {
return "thead".equals(childName) || "tbody".equals(childName) || "tfoot".equals(childName);
}
@NotNull
private PsiBuilder.Marker startText(@Nullable PsiBuilder.Marker xmlText) {
if (xmlText == null) {
xmlText = mark();
assert xmlText != null;
}
return xmlText;
}
protected final PsiBuilder.Marker mark() {
return myBuilder.mark();
}
@Nullable
private static PsiBuilder.Marker terminateText(@Nullable PsiBuilder.Marker xmlText) {
if (xmlText != null) {
xmlText.done(XmlElementType.XML_TEXT);
xmlText = null;
}
return xmlText;
}
private void parseCData() {
assert token() == XmlTokenType.XML_CDATA_START;
final PsiBuilder.Marker cdata = mark();
while (token() != XmlTokenType.XML_CDATA_END && !eof()) {
advance();
}
if (!eof()) {
advance();
}
cdata.done(XmlElementType.XML_CDATA);
}
protected void parseComment() {
final PsiBuilder.Marker comment = mark();
advance();
while (true) {
final IElementType tt = token();
if (tt == XmlTokenType.XML_COMMENT_CHARACTERS || tt == XmlTokenType.XML_CONDITIONAL_COMMENT_START
|| tt == XmlTokenType.XML_CONDITIONAL_COMMENT_START_END || tt == XmlTokenType.XML_CONDITIONAL_COMMENT_END_START
|| tt == XmlTokenType.XML_CONDITIONAL_COMMENT_END) {
advance();
continue;
}
if (tt == XmlTokenType.XML_ENTITY_REF_TOKEN || tt == XmlTokenType.XML_CHAR_ENTITY_REF) {
parseReference();
continue;
}
if (tt == XmlTokenType.XML_BAD_CHARACTER) {
final PsiBuilder.Marker error = mark();
advance();
error.error(XmlErrorMessages.message("xml.parsing.bad.character"));
continue;
}
if (tt == XmlTokenType.XML_COMMENT_END) {
advance();
}
break;
}
comment.done(XmlElementType.XML_COMMENT);
}
private void parseReference() {
if (token() == XmlTokenType.XML_CHAR_ENTITY_REF) {
advance();
}
else if (token() == XmlTokenType.XML_ENTITY_REF_TOKEN) {
final PsiBuilder.Marker ref = mark();
advance();
ref.done(XmlElementType.XML_ENTITY_REF);
}
else {
assert false : "Unexpected token";
}
}
private void parseAttribute() {
assert token() == XmlTokenType.XML_NAME;
final PsiBuilder.Marker att = mark();
advance();
if (token() == XmlTokenType.XML_EQ) {
advance();
parseAttributeValue();
att.done(XmlElementType.XML_ATTRIBUTE);
}
else {
att.done(XmlElementType.XML_ATTRIBUTE);
}
}
private void parseAttributeValue() {
final PsiBuilder.Marker attValue = mark();
if (token() == XmlTokenType.XML_ATTRIBUTE_VALUE_START_DELIMITER) {
while (true) {
final IElementType tt = token();
if (tt == null || tt == XmlTokenType.XML_ATTRIBUTE_VALUE_END_DELIMITER || tt == XmlTokenType.XML_END_TAG_START || tt == XmlTokenType
.XML_EMPTY_ELEMENT_END ||
tt == XmlTokenType.XML_START_TAG_START) {
break;
}
if (tt == XmlTokenType.XML_BAD_CHARACTER) {
final PsiBuilder.Marker error = mark();
advance();
error.error(XmlErrorMessages.message("unescaped.ampersand.or.nonterminated.character.entity.reference"));
}
else if (tt == XmlTokenType.XML_ENTITY_REF_TOKEN) {
parseReference();
}
else {
advance();
}
}
if (token() == XmlTokenType.XML_ATTRIBUTE_VALUE_END_DELIMITER) {
advance();
}
else {
error(XmlErrorMessages.message("xml.parsing.unclosed.attribute.value"));
}
}
else {
if (token() != XmlTokenType.XML_TAG_END && token() != XmlTokenType.XML_EMPTY_ELEMENT_END) {
advance(); // Single token att value
}
}
attValue.done(XmlElementType.XML_ATTRIBUTE_VALUE);
}
private void parseProlog() {
while (true) {
final IElementType tt = token();
if (tt == XmlTokenType.XML_COMMENT_START) {
parseComment();
}
else if (tt == XmlTokenType.XML_REAL_WHITE_SPACE) {
advance();
}
else {
break;
}
}
final PsiBuilder.Marker prolog = mark();
while (true) {
final IElementType tt = token();
if (tt == XmlTokenType.XML_PI_START) {
parseProcessingInstruction();
}
else if (tt == XmlTokenType.XML_DOCTYPE_START) {
parseDoctype();
}
else if (tt == XmlTokenType.XML_COMMENT_START) {
parseComment();
}
else if (tt == XmlTokenType.XML_REAL_WHITE_SPACE) {
advance();
}
else {
break;
}
}
prolog.done(XmlElementType.XML_PROLOG);
}
private void parseProcessingInstruction() {
assert token() == XmlTokenType.XML_PI_START;
final PsiBuilder.Marker pi = mark();
advance();
if (token() == XmlTokenType.XML_NAME || token() == XmlTokenType.XML_PI_TARGET) {
advance();
}
while (token() == XmlTokenType.XML_NAME) {
advance();
if (token() == XmlTokenType.XML_EQ) {
advance();
}
else {
error(XmlErrorMessages.message("expected.attribute.eq.sign"));
}
parseAttributeValue();
}
if (token() == XmlTokenType.XML_PI_END) {
advance();
}
else {
error(XmlErrorMessages.message("xml.parsing.unterminated.processing.instruction"));
}
pi.done(XmlElementType.XML_PROCESSING_INSTRUCTION);
}
protected final IElementType token() {
return myBuilder.getTokenType();
}
protected final boolean eof() {
return myBuilder.eof();
}
protected final void advance() {
myBuilder.advanceLexer();
}
private void error(final String message) {
myBuilder.error(message);
}
}