org.netbeans.lib.xml.lexer.XMLLexer Maven / Gradle / Ivy
The newest version!
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.netbeans.lib.xml.lexer;
import org.netbeans.api.xml.lexer.XMLTokenId;
import org.netbeans.api.lexer.Token;
import org.netbeans.spi.lexer.Lexer;
import org.netbeans.spi.lexer.LexerInput;
import org.netbeans.spi.lexer.LexerRestartInfo;
import org.netbeans.spi.lexer.TokenFactory;
/**
* Lexical analyzer for XML. Based on original XML lexer from xml/editor module.
*
* @author Petr Nejedly
* @author Miloslav Metelka
* @author Jan Lahoda
* @author Marek Fukala
* @author Tomasz Slota
* @version 1.00
*/
public class XMLLexer implements Lexer {
private LexerInput input;
private TokenFactory tokenFactory;
public Object state() {
Integer encoded = (this.state << 1) + (subInternalDTD ? 1 : 0);
return encoded;
}
private void loadState(final Object state) {
if (state == null) {
subState = INIT;
this.state = INIT;
subInternalDTD = false;
} else {
int encoded = ((Integer) state).intValue();
this.state = (encoded & (0xff << 1)) >> 1;
subInternalDTD = encoded % 2 == 1;
}
}
/**
* Internal state of the lexical analyzer before entering subanalyzer of
* character references. It is initially set to INIT, but before first
* usage, this will be overwritten with state, which originated
* ransition to charref subanalyzer.
*/
protected int state = INIT;
/**
* Internal state of the lexical analyzer before entering subanalyzer of
* character references. It is initially set to INIT, but before first
* usage, this will be overwritten with state, which originated
* ransition to charref subanalyzer.
*/
protected int subState = INIT;
/**
* Identifies internal DTD layer. Most of functionality is same
* as at document layer, however there are minor exceptions.
* @see isInternalDTD checks in code
*/
protected boolean subInternalDTD = false;
/** Initial internal state of the analyzer */
public static final int INIT = 0;
// Internal states I = in state
// P = expected (char probed but not consumed)
// A = after (char probed and consumed)
// states, whcih are observable between token lexing are numbered from .
// states, which are used just within the nextToken() loop are numbered >= 100.
// the following states are observable from outside. We need to keep the number of those states < 64
private static final int ISI_TEXT = 1; // Plain text between tags
private static final int ISI_ERROR = 2; // Syntax error in XML syntax
private static final int ISP_ENDTAG_X = 3; // X-switch after ENDTAG's name
private static final int ISP_TAG_X = 4; // X-switch after TAG's name
private static final int ISP_ARG_X = 5; // X-switch after ARGUMENT's name
private static final int ISP_EQ = 6; // X-switch after '=' in TAG's ARGUMENT
private static final int ISP_EQ_WS = 7; // In WS after '='
private static final int ISI_VAL_APOS = 8; // Single-quoted value - may contain " chars
private static final int ISI_VAL_QUOT = 9; // Double-quoted value - may contain ' chars
private static final int ISI_SGML_DECL = 10;
private static final int ISA_REF = 11; // when comes to character reference, e.g. &, after &
private static final int ISI_PI = 12; //after ...
private static final int ISP_PI_TARGET_WS = 13; //after ...|
private static final int ISI_PI_CONTENT = 14; //in PI content
private static final int ISP_DECL_CHARS = 15;
private static final int ISP_DECL_STRING = 16;
private static final int ISP_PI_CONTENT_QMARK = 17; //spotet ? in content
private static final int ISI_CDATA = 18;
// observable just because the EOF condition
private static final int ISI_XML_COMMENT = 31; // Somewhere after "