org.netbeans.modules.web.jspparser.FastOpenInfoParser Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.netbeans.modules.web.jspparser;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.StringReader;
import java.util.logging.Level;
import java.util.logging.Logger;
import javax.swing.text.BadLocationException;
import javax.swing.text.StyledDocument;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import org.netbeans.modules.web.api.webmodule.WebModule;
import org.netbeans.modules.web.jsps.parserapi.JspParserAPI;
import org.openide.filesystems.FileObject;
import org.xml.sax.Attributes;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;
import org.netbeans.modules.xml.api.EncodingUtil;
import org.openide.cookies.EditorCookie;
import org.openide.loaders.DataObject;
import org.openide.nodes.Node;
/**
* JSP 'open info' parser allowing to fastly determine encoding for JSPs in standart syntax
* with DD NOT specifying JSPs encodinf or syntax (at least 95% af all JSPs)
*
* How the encoding is currently detected:
* 1) find deplyment descriptor from given webmodule
* 2) if found, parse it and find following elements
*
*
* ||
*
*
* 3) if any of the nested elements found, give it up and return null (and let jasper parser to determine the encoding)
* 4) if the DD is not found or it doesn't contain the elements from #2 test if the file is JSP document (according to the extension)
* 5) if the file is a XML document give it up (so far - we can easily implement a simple enc. parser for XMLs as well)
* 6) the page is standard syntax - parse first 8kB of text and...
* 7) if <%@page encoding="xxx"%> is found return the encoding value
* 8) if <%@page encoding="xxx"%> is NOT found find <%@page contentType="mimetype; char-set=xxx"%>
* 9) if CT found return encoding from it
*
* @author Marek Fukala
*/
public class FastOpenInfoParser {
private static final Logger LOGGER = Logger.getLogger(FastOpenInfoParser.class.getName());
static FastOpenInfoParser get(WebModule wm) {
return new FastOpenInfoParser(wm);
}
private final WebModule wm;
/** Creates a new instance of FastOpenInfoParser */
private FastOpenInfoParser(WebModule wm) {
this.wm = wm;
}
public JspParserAPI.JspOpenInfo getJspOpenInfo(FileObject fo, boolean useEditor) {
long start = 0;
if (LOGGER.isLoggable(Level.FINE)) {
start = System.currentTimeMillis();
}
try {
if (wm != null && wm.getDocumentBase() != null && useEditor) {
return null; //better let the parser do it
}
//if there isn't a webmodule detect the encoding from the file only
if (wm != null) {
//find deployment descriptor
FileObject documentBase = wm.getDocumentBase();
if (documentBase != null) {
FileObject dd = wm.getDeploymentDescriptor();
//test whether the DD exists, if not parse the JSP file
if (dd != null) {
//parse the DD and try to find element with and elements
DDParseInfo ddParseInfo = parse(new InputSource(dd.getInputStream())); //parse with default encoding
//if the DD defines encoding or marks jsps as xml documents return null
if (ddParseInfo.definesEncoding || ddParseInfo.marksXMLDocuments) {
return null;
}
}
}
}
String enc = null;
InputStream is = null;
// when the file is locked there cannot be used read input stream from fileObject
if (fo.isLocked()) {
try {
DataObject dataObject = DataObject.find(fo);
Node.Cookie obj = dataObject.getLookup().lookup(org.openide.cookies.EditorCookie.class);
StyledDocument document = ((EditorCookie) obj).getDocument();
if (document != null) {
// can be null if the document wasn't loaded yet
String text = document.getText(0, document.getLength() < 8192 ? document.getLength() : 8192);
is = new ByteArrayInputStream(text.getBytes());
}
} catch (BadLocationException ex) {
LOGGER.log(Level.SEVERE, null, ex);
}
}
//get encoding from the disk file if webmodule is null and useEditor is true (during file save)
//#64418 - create a ByteArrayInputStream - we need a an inputstream with marks supported
if (is == null) {
byte[] buffer = new byte[8192*4];
InputStream _is = fo.getInputStream();
int readed = _is.read(buffer);
is = new ByteArrayInputStream(buffer, 0, readed);
_is.close();
}
if (isXMLSyntax(fo)) {
//XML document - detect encoding acc. to fisrt 4 bytes or xml prolog
enc = EncodingUtil.detectEncoding(is);
} else {
//JSP in standart syntax
//find <%@page encoding or contentType attributes
enc = parseEncodingFromFile(is);
}
LOGGER.fine("[fast open parser] detected " + enc + " encoding.");
return enc == null ? null : new JspParserAPI.JspOpenInfo(isXMLSyntax(fo), enc);
} catch (IOException e) {
LOGGER.log(Level.INFO, null, e);
} catch (SAXException se) {
LOGGER.log(Level.INFO, null, se);
} catch (ParserConfigurationException pce) {
LOGGER.log(Level.INFO, null, pce);
} finally {
if (LOGGER.isLoggable(Level.FINE)) {
LOGGER.fine("[fast open parser] taken " + (System.currentTimeMillis() - start) + "ms.");
}
}
return null;
}
private static String parseEncodingFromFile(InputStream is) throws IOException {
InputStreamReader isr = new InputStreamReader(is); //read with default encoding
//read only first 8kB of text
char[] buffer = new char[8192];
int readed = isr.read(buffer);
isr.close();
return parseJspText(buffer, readed);
}
private static boolean isXMLSyntax(FileObject fo) {
String ext = fo.getExt();
if (ext != null && ("jspx".equalsIgnoreCase(ext) || "tagx".equalsIgnoreCase(ext))) { // NOI18N
return true;
}
return false;
}
//JSP encoding parser
private static final String PAGE = "page";
private static final String ENCODING = "pageEncoding";
private static final String CONTENTYPE = "contentType";
private static final String CHARSET = "charset=";
private static final int P_INIT = 0;
private static final int P_LT = 1; //after <
private static final int P_LT_PER = 2; //after <%
private static final int P_LT_PER_ATS = 3; //after <%@
private static final int P_PD = 4; //in page directive
private static final int P_APER = 5; //after closing %
private static final int P_ENC = 7; //after 'encoding' attribute
private static final int P_ENC_EQ = 8; //after encoding=
private static final int P_ENC_EQ_VAL = 9; //after encoding="
private static final int P_CT = 11; //after 'contentType' attribute
private static final int P_CT_EQ = 12; //after contentType=
private static final int P_CT_EQ_VAL = 13; //after contentType="
private static final int P_CT_VAL_CHS = 14; //after contentType="TYPE; char-set=
private static String parseJspText(char[] buffer, int len) {
String contentType = null;
int state = P_INIT;
int i = 0;
int pos = -1;
while (i < len) {
char c = buffer[i];
switch (state) {
case P_INIT:
if (c == '<') { // NOI18N
state = P_LT;
}
i++;
break;
case P_LT:
switch (c) {
case '%': // NOI18N
state = P_LT_PER;
break;
default:
state = P_INIT;
break;
}
i++;
break;
case P_LT_PER:
switch (c) {
case '@': // NOI18N
state = P_LT_PER_ATS;
break;
default:
state = P_INIT;
break;
}
i++;
break;
case P_LT_PER_ATS:
if (c == ' ' || c == '\t') { // NOI18N
i++;
break;
} else if (prescanFor(buffer, i, PAGE)) {
state = P_PD;
i = i + PAGE.length();
break;
}
state = P_INIT;
i++;
break;
case P_PD:
if (prescanFor(buffer, i, ENCODING)) {
state = P_ENC;
i = i + ENCODING.length();
break;
} else if (prescanFor(buffer, i, CONTENTYPE)) {
state = P_CT;
i = i + CONTENTYPE.length();
break;
} else if (c == '%') { // NOI18N
state = P_APER;
}
i++;
break;
case P_APER:
if (c == '>') { // NOI18N
state = P_INIT;
} else {
state = P_PD;
}
i++;
break;
case P_ENC:
switch (c) {
case ' ': // NOI18N
case '\t': // NOI18N
;
break;
case '=': // NOI18N
state = P_ENC_EQ;
break;
case '%': // NOI18N
state = P_APER;
break;
default:
state = P_PD;
break;
}
i++;
break;
case P_ENC_EQ:
switch (c) {
case ' ': // NOI18N
case '\t': // NOI18N
break;
case '"': // NOI18N
state = P_ENC_EQ_VAL;
pos = i + 1;
break;
case '%': // NOI18N
state = P_APER;
break;
default:
state = P_PD;
break;
}
i++;
break;
case P_ENC_EQ_VAL:
switch (c) {
case '"': // NOI18N
return new String(buffer, pos, i - pos); //return the encoding attr value
//break;
default:
}
i++;
break;
case P_CT:
switch(c) {
case ' ': // NOI18N
case '\t': // NOI18N
break;
case '=': // NOI18N
state = P_CT_EQ;
break;
case '%': // NOI18N
state = P_APER;
break;
default:
state = P_PD;
break;
}
i++;
break;
case P_CT_EQ:
switch(c) {
case ' ': // NOI18N
case '\t': // NOI18N
break;
case '"': // NOI18N
state = P_CT_EQ_VAL;
break;
case '%': // NOI18N
state = P_APER;
break;
default:
state = P_PD;
break;
}
i++;
break;
case P_CT_EQ_VAL:
if (prescanFor(buffer, i, CHARSET)) {
state = P_CT_VAL_CHS;
i = i + CHARSET.length();
pos = i;
break;
} else if (c == '"') { // NOI18N
state = P_PD;
break;
}
i++;
break;
case P_CT_VAL_CHS:
switch(c) {
case '"': // NOI18N
contentType = new String(buffer, pos, i - pos); //return the encoding attr value
state = P_PD;
break;
default:
}
i++;
break;
} //eof state switch
}
//returns either contentType value or null; encoding is returned directly from the parser (has priority over CT)
return contentType;
}
private static boolean prescanFor(char[] buffer, int position, String text) {
if ((buffer.length - position) < text.length()) {
return false; //too short buffer - the text cannot be there
}
for (int i = 0; i < text.length(); i++) {
if (buffer[position + i] != text.charAt(i)) {
return false;
}
}
return true;
}
static final String JSP_PROPERTY_GROUP = "jsp-property-group";
static final String PAGE_ENCODING = "page-encoding";
static final String IS_XML = "is-xml";
/** returns an array of booleans - the first states whether the dd contains a element
* with defined encoding resp. marks a set of JSPs to be xml documents. */
private static DDParseInfo parse(InputSource src) throws IOException, SAXException, ParserConfigurationException {
SAXParserFactory factory = SAXParserFactory.newInstance();
factory.setValidating(false);
SAXParser parser = factory.newSAXParser();
final DDParseInfo ddParseInfo = new DDParseInfo();
class Handler extends DefaultHandler {
private boolean inJspPropertyGroup = false;
@Override
public void startElement(String uri, String localname, String qname, Attributes attr) throws SAXException {
String tagName = qname.toLowerCase();
if (JSP_PROPERTY_GROUP.equals(tagName)) {
inJspPropertyGroup = true;
}
if (inJspPropertyGroup) {
if (PAGE_ENCODING.equals(tagName)) {
ddParseInfo.definesEncoding = true;
}
if (IS_XML.equals(tagName)) {
ddParseInfo.marksXMLDocuments = true;
}
}
}
@Override
public void endElement(String uri, String localname, String qname) throws SAXException {
String tagName = qname.toLowerCase();
if (JSP_PROPERTY_GROUP.equals(tagName)) {
inJspPropertyGroup = false;
}
}
@Override
public InputSource resolveEntity (String publicId, String systemId) {
return new InputSource(new StringReader("")); //prevent the parser to use catalog entity resolver // NOI18N
}
}
parser.parse(src, new Handler());
return ddParseInfo;
}
private static final class DDParseInfo {
public boolean definesEncoding, marksXMLDocuments;
public DDParseInfo() {
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy