All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.intershop.beehive.isml.internal.TemplatePrecompileUtils Maven / Gradle / Ivy

The newest version!
/*
 * Copyright 2021 Intershop Communications AG.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.intershop.beehive.isml.internal;

import com.intershop.beehive.isml.capi.ISMLCompilerConfiguration;
import com.intershop.beehive.isml.capi.ISMLException;
import com.intershop.beehive.isml.capi.ISMLTemplateConstants;
import com.intershop.beehive.isml.internal.parser.ISMLtoJSPcompiler;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.BufferedInputStream;
import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.io.SequenceInputStream;
import java.io.UnsupportedEncodingException;
import java.nio.charset.Charset;
import java.nio.charset.IllegalCharsetNameException;
import java.util.regex.Matcher;
import java.util.regex.Pattern;


/**
 * This helper class offers a method to convert an .isml file into an jsp
 * file.
 */

public class TemplatePrecompileUtils
{
    /**
     * pattern for the iscontent charset
     */

    private Pattern patternCharSet = null;

    /**
     *pattern for the iscontent type
     */

    private Pattern patternType = null;

    /**
     * pattern for the xml encoding
     */

    private Pattern patternXML = null;

    /**
     * Default encoding for generated jsp and java source files.
     */

    private static final String JAVA_ENCODING = "UTF8";

    /**
     * Encoding for UTF-16 with big-endian .
     */

    private static final String UNICODE_BIG = "UTF-16BE";

    /**
     * Encoding for UTF-16 with little-endian .
     */

    private static final String UNICODE_LITTLE = "UTF-16LE";

    /**
     * The default character set.
     */
    // private final String defaultCharSet;

    
    private final Logger logger = LoggerFactory.getLogger(getClass());

    /** 
     * Configuration of the isml compiler. 
     */
    private ISMLCompilerConfiguration compilerConfiguration;

    /**
     * The constructor. Creates a new TemplatePrecompileUtils class. 

*/ public TemplatePrecompileUtils(ISMLCompilerConfiguration compilerConfiguration) { this.compilerConfiguration = compilerConfiguration; // we're looking for 'charset' attribute from ISML ISCONTENT tag patternCharSet = Pattern.compile("(iscontent)[^>]+(charset)[:blank:]*=[:blank:]*(\"|')([^\"']+?)(\"|')", Pattern.CASE_INSENSITIVE); // we're looking also for 'type' attribute from ISML ISCONTENT tag patternType = Pattern.compile("(iscontent)[^>]+(type)[:blank:]*=[:blank:]*(\"|')([^\"']+?)(\"|')", Pattern.CASE_INSENSITIVE); // look for the encoding attribute in the xml header patternXML = Pattern.compile("(<[?]xml)[^>]+(encoding)[:blank:]*=[:blank:]*(\"|')([^\"']+?)(\"|')", Pattern.CASE_INSENSITIVE); } /** * This method executes the 1st compilation step. It compiles the isml file to a jsp file * and creates all required directories and removes outdated files on the fly. * * @param sourceFile - the source ISML file * @param destinationFile - the resulting JSP file * @throws IOException if an IO error occurs during the process. * @throws ISMLException if the compilation failed. */ public void compileISML(File sourceFile, File destinationFile) throws IOException, ISMLException { // template source input stream InputStream sourceIn = null; // JSP output stream OutputStreamWriter jspOut = null; File jspOutputFile = destinationFile; // log, which ISML file should be compiled logger.debug("Compiling ISML file: {} to {}", sourceFile.getAbsolutePath(), jspOutputFile.getAbsolutePath()); // get template encoding TemplateEncodingProps templateEncProps = findIsmlEncoding(sourceFile); // determine the encoding of the resulting jspFile String outCharset = findJspEncoding(templateEncProps); logger.debug("Using charset {} to write JSP file.", outCharset); try { InputStream inStream = new ByteArrayInputStream(unicodeEscape(sourceFile, templateEncProps.getIsmlCharset())); if (!templateEncProps.isIsContentPresent()) { // dummy iscontent entry String defaultIsContent = ""; // insert dummy iscontent at the beginning of the file inStream = new SequenceInputStream(new ByteArrayInputStream(defaultIsContent.getBytes()), inStream); } // JavaCC will work with the Unicode-escaped data to preserve integrity sourceIn = new BufferedInputStream(inStream, ISMLTemplateConstants.DEFAULT_TEMPLATE_BUFFERSIZE); sourceIn.mark(Integer.MAX_VALUE); // this is the writer that the JavaCC compiler classes will use jspOut = new OutputStreamWriter(new FileOutputStream(jspOutputFile), outCharset); // compile ISMLtoJSPcompiler pagePreProcessor = new ISMLtoJSPcompiler(sourceIn); if (!pagePreProcessor.compileTemplate(ISMLtoJSPcompiler.ALLOW_ALL, jspOut, sourceFile, sourceIn)) { throw new ISMLException("Failed to compile ISML to JSP."); } jspOut.flush(); } catch (ISMLException sevx) { // first close the output stream if (jspOut != null) { try { jspOut.close(); } catch (IOException e) { // do nothing logger.debug(e.getMessage(), e); } } // remove .jsp file, because it contains incomplete content if (jspOutputFile.exists() && jspOutputFile.isFile()) { jspOutputFile.delete(); } throw sevx; } finally { // do cleanup if (sourceIn != null) { try { sourceIn.close(); } catch (IOException e) { // do nothing logger.debug(e.getMessage(), e); } sourceIn = null; } if (jspOut != null) { try { jspOut.close(); } catch (IOException e) { // do nothing logger.debug(e.getMessage(), e); } jspOut = null; } } } protected ISMLCompilerConfiguration getCompilerConfiguration() { if (null != compilerConfiguration) { return compilerConfiguration; } return new ISMLCompilerConfiguration() { @Override public String getDefaultContentEncoding() { return ISMLTemplateConstants.DEFAULT_CHARSET; } @Override public String getJspEncoding(String mimeType) { return getDefaultContentEncoding(); } }; } /** * Helper method, try to find charset encoding in first 1024 bytes * in the provided template file (assuming ASCII compatible charset). * * @param aFile the file object for the template * @return the encoding for the template, or the system encoding, if an * error occurs or none is found */ protected TemplateEncodingProps findIsmlEncoding(File aFile) { String ismlCharset = null; String jspCharset = null; String xmlCharset = null; String type = null; byte[] scan = new byte[1024]; String scanString = null; FileInputStream scanFile = null; TemplateEncodingProps encProps = null; boolean isIsContentPresent = false; try { scanFile = new FileInputStream(aFile); scanFile.read(scan, 0, scanFile.available()<1024?scanFile.available():1024); // check for real unicode if (scan[0]==(byte)0xFF && scan[1]==(byte)0xFE) { // little-endian unicode file ismlCharset = UNICODE_LITTLE; } else if (scan[0]==(byte)0xFE && scan[1]==(byte)0xFF) { // big-endian unicode file ismlCharset = UNICODE_BIG; } else if (scan[0]==(byte)0xEF && scan[1]==(byte)0xBB && scan[2]==(byte)0xBF) { // utf-8 encoded file ismlCharset = JAVA_ENCODING; } scanString = new String(scan, JAVA_ENCODING); // check for ISML ISCONTENT tag with attribute charset using regexp engine if (patternCharSet != null) //may only be null if screwed during the static init. { Matcher matcher = patternCharSet.matcher(scanString); if (matcher.find()) { String isContentHTMLCharset = matcher.group(4); // convert the ISCONTENT charset attribute to a Java charset jspCharset = CharacterSetMappings.mapHttpToCharset(isContentHTMLCharset); // test, if the ISCONTENT charset is valid try { byte[] testChar = {(byte)80}; new String(testChar, jspCharset); } catch (UnsupportedEncodingException usex) { logger.debug("The ISCONTENT charset attribute {} doesn't describe a valid charset.", isContentHTMLCharset); jspCharset = null; } // if no file prefix was found, use the the ISCONTENT charset // to read the isml file if (ismlCharset == null) { ismlCharset = jspCharset; } } // else do nothing } // else proceed; charset can not be determined, will use system default // check if at least one ISCONTENT tag is present if (scanString.toUpperCase().indexOf("ISCONTENT") != -1) { isIsContentPresent = true; } } catch (Exception e) { logger.error("A problem occurred while trying to find the charset for the template: {}", e.getMessage()); ismlCharset = null; } finally { if (scanFile != null) { try { scanFile.close(); } catch (Exception e) { // do nothing logger.debug(e.getMessage(), e); } } } if (ismlCharset == null) { ismlCharset = System.getProperty("file.encoding"); logger.debug("Could not determine ISML charset, assuming systems: {}", ismlCharset); } else { logger.debug("Using charset {} to read ISML file.", ismlCharset); } // check for ISML ISCONTENT tag with attribute type using regexp engine if (patternType != null) //may only be null if screwed during the static init. { try { // convert the file string to the given isml charset String typeString = new String(scan, ismlCharset); Matcher matcher = patternType.matcher(typeString); if (matcher.find()) { type = matcher.group(4); } // else do nothing } catch (UnsupportedEncodingException ex) { // charset is not valid, so set type to null type = ISMLTemplateConstants.TYPE_HTML; } } // check for an xml head tag if (patternXML != null) //may only be null if screwed during the static init. { try { // convert the file string to the given isml charset String xmlString = new String(scan, ismlCharset); Matcher matcher = patternXML.matcher(xmlString); if (matcher.find()) { String xmlEncodingCharset = matcher.group(4); // convert the xml charset attribute to a Java charset xmlCharset = CharacterSetMappings.mapHttpToCharset(xmlEncodingCharset); // test, if the xml charset is valid try { byte[] testChar = {(byte)80}; new String(testChar, xmlCharset); type = ISMLTemplateConstants.TYPE_XML; } catch (UnsupportedEncodingException usex) { logger.debug("The XML encoding attribute {} doesn't describe a valid charset.", xmlEncodingCharset); xmlCharset = null; } } // else do nothing } catch (UnsupportedEncodingException ex) { // charset is not valid, so set type to null xmlCharset = null; } } encProps = new TemplateEncodingProps(ismlCharset, jspCharset, xmlCharset, type, isIsContentPresent); return encProps; } /** * A helper method to determine the file encoding for the resulting JSP * output file. * * @param templateEncRes - an container for the template charset, mime type * and the contentCharset flag * * @return the file encoding to use for the resulting JSP output file */ protected String findJspEncoding(TemplateEncodingProps templateEncRes) { String jspCharset = templateEncRes.getJspCharset(); String type = templateEncRes.getMimeType(); String defaultContentCharset = CharacterSetMappings.mapHttpToCharset(determineDefaultHTMLCharset()); String cutomEncoding = null; // if no type is specified, assume "text/html" if (type == null) { type = ISMLTemplateConstants.TYPE_HTML; } if (type.equalsIgnoreCase(ISMLTemplateConstants.TYPE_HTML)) { // 1. determine, if a custom charset mapping exists for text/html cutomEncoding = getCustomCharsetMapping(ISMLTemplateConstants.TYPE_HTML); if (cutomEncoding != null) { return cutomEncoding; } // 2. no explicit charset given, take a look at the default content encoding // settings return defaultContentCharset; } else if (type.startsWith(ISMLTemplateConstants.TYPE_XML)) { // 1. detemerine the xml header charset if (templateEncRes.getXmlCharset() != null) { return templateEncRes.getXmlCharset(); } // 2. use the ISCONTENT charset, if one was given if (jspCharset != null) { return jspCharset; } // 3. determine, if a custom charset mapping exists for text/xml cutomEncoding = getCustomCharsetMapping(ISMLTemplateConstants.TYPE_XML); if (cutomEncoding != null) { return cutomEncoding; } // 4. use the default charset return defaultContentCharset; } else if (type.startsWith("text/")) { // 1. use the ISCONTENT charset, if one was given if (jspCharset != null) { return jspCharset; } // 2. determine, if a custom charset mapping exists for text/* cutomEncoding = getCustomCharsetMapping("text/*"); if (cutomEncoding != null) { return cutomEncoding; } // 3. use the default charset return defaultContentCharset; } // 1. determine, if a custom charset mapping exists for the given mime type cutomEncoding = getCustomCharsetMapping(type); if (cutomEncoding != null) { return cutomEncoding; } // 2. in all other cases, use the default return defaultContentCharset; } /** * A helper method, which checks, if a custom encoding for a special MIME * type is defined. The cutom MIME type in the properties has the syntax. * * intershop.template.encoding.<mimeType> = <custom encoding name> * * @param key * the MIME type, for which a mapping should be searched * @return the Java charset name or null, if no mapping is defined or the * charset name is not supported */ protected String getCustomCharsetMapping(String key) { String charsetName = getCompilerConfiguration().getJspEncoding(key); if (charsetName != null) { charsetName = CharacterSetMappings.mapHttpToCharset(charsetName); try { Charset.isSupported(charsetName); } catch (IllegalCharsetNameException ilex) { logger.error("The custom MIME type contentEncoding for the MIME type {} charset {} is not valid!", key, charsetName); return null; } return charsetName; } return null; } /** * Gets the default character set. * * @return the default character set */ protected String getDefaultCharSet() { String encoding = getCompilerConfiguration().getDefaultContentEncoding(); return null != encoding? encoding: ISMLTemplateConstants.DEFAULT_CHARSET; } /** * A helper method, which determins the default html charset, either given * in the appserver.properties file as "intershop.template.DefaultContentEncoding" * or "UTF-8" as default * * @return the default content charset, which should be used */ protected String determineDefaultHTMLCharset() { // initialize default value String encodingCharSet = getDefaultCharSet(); // test the decoding char set try { byte[] testChar = {(byte)80}; String javaCharset = CharacterSetMappings.mapHttpToCharset(encodingCharSet); new String(testChar, javaCharset); // no exception raised } catch (UnsupportedEncodingException e) { /* invalid char set, disable decoding */ logger.error("The ContentEncoding charset {} is not valid! Using Charset : {}", encodingCharSet, ISMLTemplateConstants.DEFAULT_CHARSET); encodingCharSet = ISMLTemplateConstants.DEFAULT_CHARSET; } // set the charset for encoding return encodingCharSet; } // process input file converting to unicode-escaped data // this step is necessary for JavaCC not screwing with original data /** * Process input file converting to unicode-escaped data. * This step is necessary for JavaCC not screwing with original data. * * @param aFile the file object for the template * @param charset the presumed charset for the template * * @return byte array containing the unicode-escaped data for * the template. * * @throws IOException if an IO error occurs during the process. */ protected byte[] unicodeEscape(File aFile, String charset) throws IOException { byte[] bytes = null; byte[] prefix = null; BufferedReader rd = null; PrintWriter wr = null; try { BufferedInputStream bufInStream = new BufferedInputStream(new FileInputStream(aFile)); rd = new BufferedReader(new InputStreamReader(bufInStream, charset)); ByteArrayOutputStream baos = new ByteArrayOutputStream(); wr = new PrintWriter(new OutputStreamWriter(baos)); if (UNICODE_BIG.equalsIgnoreCase(charset) || UNICODE_LITTLE.equalsIgnoreCase(charset)) { // cut off the utf-16 file prefix prefix = new byte[2]; bufInStream.read(prefix, 0, prefix.length); } else if (JAVA_ENCODING.equalsIgnoreCase(charset)) { // cut off the utf-8 file prefix, if it exists bufInStream.mark(3); prefix = new byte[3]; bufInStream.read(prefix, 0, prefix.length); if (!(prefix[0]==(byte)0xEF && prefix[1]==(byte)0xBB && prefix[2]==(byte)0xBF)) { // the first three characters are not significant for utf-8, // so reset the reader bufInStream.reset(); } } int c; while((c = rd.read()) != -1) { if (isASCII((char) c)) { wr.write(c); } else { wr.write(unicodeToString((char) c)); } } wr.flush(); bytes = baos.toByteArray(); } catch (FileNotFoundException e) { logger.error("An error occurred while trying to apply the Unicode-escaping conversion to the template: {}", e.getMessage()); throw new ISMLException(e); } catch (UnsupportedEncodingException e) { logger.error("An error occurred while trying to apply the Unicode-escaping conversion to the template: {}", e.getMessage()); throw new ISMLException(e); } finally { if (rd!=null) { try { rd.close(); } catch (Exception e) { // do nothing logger.debug(e.getMessage(), e); } } if (wr!=null) { try { wr.close(); } catch (Exception e) { // do nothing logger.debug(e.getMessage(), e); } } } return bytes; } /** * Helper method, converts non-ASCII characters * in a given String to unicode-escaped equivalents. * * @param s the String * @return the converted string */ protected String convertString(String s) { int len = s.length(); StringBuilder sb = new StringBuilder(); for (int i=0; i=32 && c<=126) || (c=='\r') || (c=='\n') || (c=='\t'); } /** * Helper method, converts a character to its unicode-escaped counterpart. * * @param c the Character * @return char array containing the unicode-escaped string for the given * character. */ private char[] unicodeToString(char c) { char[] ca = { '\\', 'u', '\0', '\0', '\0', '\0' }; ca[2] = hexToChar((c >> 12) & 0x0f); ca[3] = hexToChar((c >> 8) & 0x0f); ca[4] = hexToChar((c >> 4) & 0x0f); ca[5] = hexToChar((c ) & 0x0f); return ca; } /** * Helper method, returns the representative character for a single * hex value (0-F). * * @param hex the Hex value * @return the equivalent char */ private char hexToChar (int hex) { if (hex < 10) return (char)(hex + '0'); else return (char)(hex - 10 + 'a'); } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy