All Downloads are FREE. Search and download functionalities are using the official Maven repository.

net.segoia.util.strings.StringUtil Maven / Gradle / Ivy

The newest version!
/**
 * commons - Various Java Utils
 * Copyright (C) 2009  Adrian Cristian Ionescu - https://github.com/acionescu
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *         http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package net.segoia.util.strings;

import java.io.IOException;
import java.io.StringWriter;
import java.io.Writer;
import java.util.List;

import net.segoia.util.parser.ParseEventHandlerConfig;
import net.segoia.util.parser.ParseResponse;
import net.segoia.util.parser.Parser;
import net.segoia.util.parser.ParserException;
import net.segoia.util.parser.ParserHandlerFactory;
import net.segoia.util.parser.event.ConfigurableParseEventHandler;
import net.segoia.util.parser.event.DefaultParseEventHandler;

public class StringUtil {
    private static Parser unescapeHtmlParser;
    private static Parser htmlSanitizer;

    static {
	unescapeHtmlParser = new Parser();
	try {
	    unescapeHtmlParser
		    .addSymbols("[,DOC_START:IGNORE_EMPTY,doc_start,true],[&,GROUP_START:SEPARATE,html_escape_start,true,html_escape_end,html_escaper],[;,GROUP_END:UNGROUP,html_escape_end],['{}',WORKER:STRING_CONCAT,string_concat],['{lt=<,gt=>,amp=&,quot=\\\\\\\",apos=\\'}',WORKER:MAPPER,html_escaper]");
	} catch (ParserException e) {
	    // TODO Auto-generated catch block
	    e.printStackTrace();
	}

	unescapeHtmlParser.setHandlerFactory(new ParserHandlerFactory(new DefaultParseEventHandler()));
	unescapeHtmlParser.getParseContextConfig().getNestedSymbols().setCaseInsensitive(true);

	htmlSanitizer = new Parser();
	try {
	    htmlSanitizer
		    .addSymbols("[,DOC_START:IGNORE_EMPTY,doc_start,true],[,GROUP_END,es1],[,GROUP_END,end_tag]");
	} catch (ParserException e) {
	    // TODO Auto-generated catch block
	    e.printStackTrace();
	}

	htmlSanitizer.setHandlerFactory(new ParserHandlerFactory(new DefaultParseEventHandler()));
	htmlSanitizer.getParseContextConfig().getNestedSymbols().setCaseInsensitive(true);
    }

    /**
     * 

* Escapes the characters in a String using JavaScript String rules to a Writer. *

* *

* A null string input has no effect. *

* * @see #escapeJavaScript(java.lang.String) * @param out * Writer to write escaped string into * @param str * String to escape values in, may be null * @throws IllegalArgumentException * if the Writer is null * @throws IOException * if error occurs on underlying Writer **/ public static void escapeJavaScript(Writer out, String str) throws IOException { escapeJavaStyleString(out, str, true); } /** *

* Worker method for the {@link #escapeJavaScript(String)} method. *

* * @param str * String to escape values in, may be null * @param escapeSingleQuotes * escapes single quotes if true * @return the escaped string */ private static String escapeJavaStyleString(String str, boolean escapeSingleQuotes) { if (str == null) { return null; } try { StringWriter writer = new StringWriter(str.length() * 2); escapeJavaStyleString(writer, str, escapeSingleQuotes); return writer.toString(); } catch (IOException ioe) { // this should never ever happen while writing to a StringWriter ioe.printStackTrace(); return null; } } /** *

* Worker method for the {@link #escapeJavaScript(String)} method. *

* * @param out * write to receieve the escaped string * @param str * String to escape values in, may be null * @param escapeSingleQuote * escapes single quotes if true * @throws IOException * if an IOException occurs */ private static void escapeJavaStyleString(Writer out, String str, boolean escapeSingleQuote) throws IOException { if (out == null) { throw new IllegalArgumentException("The Writer must not be null"); } if (str == null) { return; } int sz; sz = str.length(); for (int i = 0; i < sz; i++) { char ch = str.charAt(i); // handle unicode if (ch > 0xfff) { out.write("\\u" + hex(ch)); } else if (ch > 0xff) { out.write("\\u0" + hex(ch)); } else if (ch > 0x7f) { out.write("\\u00" + hex(ch)); } else if (ch < 32) { switch (ch) { case '\b': out.write('\\'); out.write('b'); break; case '\n': out.write('\\'); out.write('n'); break; case '\t': out.write('\\'); out.write('t'); break; case '\f': out.write('\\'); out.write('f'); break; case '\r': out.write('\\'); out.write('r'); break; default: if (ch > 0xf) { out.write("\\u00" + hex(ch)); } else { out.write("\\u000" + hex(ch)); } break; } } else { switch (ch) { case '\'': if (escapeSingleQuote) { out.write('\\'); } out.write('\''); break; case '"': out.write('\\'); out.write('"'); break; case '\\': out.write('\\'); out.write('\\'); break; case '/': out.write('\\'); out.write('/'); break; default: out.write(ch); break; } } } } /** *

* Unescapes any Java literals found in the String. For example, it will turn a sequence of * '\' and 'n' into a newline character, unless the '\' is preceded by * another '\'. *

* * @param str * the String to unescape, may be null * @return a new unescaped String, null if null string input */ public static String unescapeJava(String str) { if (str == null) { return null; } try { StringWriter writer = new StringWriter(str.length()); unescapeJava(writer, str); return writer.toString(); } catch (IOException ioe) { // this should never ever happen while writing to a StringWriter ioe.printStackTrace(); return null; } } /** *

* Unescapes any Java literals found in the String to a Writer. *

* *

* For example, it will turn a sequence of '\' and 'n' into a newline character, unless * the '\' is preceded by another '\'. *

* *

* A null string input has no effect. *

* * @param out * the Writer used to output unescaped characters * @param str * the String to unescape, may be null * @throws IllegalArgumentException * if the Writer is null * @throws IOException * if error occurs on underlying Writer */ public static void unescapeJava(Writer out, String str) throws IOException { if (out == null) { throw new IllegalArgumentException("The Writer must not be null"); } if (str == null) { return; } int sz = str.length(); StringBuffer unicode = new StringBuffer(4); boolean hadSlash = false; boolean inUnicode = false; for (int i = 0; i < sz; i++) { char ch = str.charAt(i); if (inUnicode) { // if in unicode, then we're reading unicode // values in somehow unicode.append(ch); if (unicode.length() == 4) { // unicode now contains the four hex digits // which represents our unicode character try { int value = Integer.parseInt(unicode.toString(), 16); out.write((char) value); unicode.setLength(0); inUnicode = false; hadSlash = false; } catch (NumberFormatException nfe) { throw new IOException("Unable to parse unicode value: " + unicode, nfe); } } continue; } if (hadSlash) { // handle an escaped value hadSlash = false; switch (ch) { case '\\': out.write('\\'); break; case '\'': out.write('\''); break; case '\"': out.write('"'); break; case 'r': out.write('\r'); break; case 'f': out.write('\f'); break; case 't': out.write('\t'); break; case 'n': out.write('\n'); break; case 'b': out.write('\b'); break; case 'u': { // uh-oh, we're in unicode country.... inUnicode = true; break; } default: out.write(ch); break; } continue; } else if (ch == '\\') { hadSlash = true; continue; } out.write(ch); } if (hadSlash) { // then we're in the weird case of a \ at the end of the // string, let's output it anyway. out.write('\\'); } } /** *

* Returns an upper case hexadecimal String for the given character. *

* * @param ch * The character to convert. * @return An upper case hexadecimal String */ private static String hex(char ch) { return Integer.toHexString(ch).toUpperCase(); } public static String escapeString(String input, String outputCharsToBeEscaped, String outputEscapeChar) { StringBuffer in = new StringBuffer(input); for (int i = 0; i < outputCharsToBeEscaped.length(); i++) { char c = outputCharsToBeEscaped.charAt(i); StringBuffer out = new StringBuffer(); replace(in, c, outputEscapeChar + c, out); in = out; } return in.toString(); } public static void replace(StringBuffer input, char what, String with, StringBuffer sb) { for (int i = 0; i < input.length(); i++) { char c = input.charAt(i); if (c == what) { sb.append(with); } else { sb.append(c); } } } public static String escapeHtml(String input) { StringBuffer sb = new StringBuffer(); for (int i = 0; i < input.length(); i++) { char c = input.charAt(i); switch (c) { case '<': sb.append("<"); break; case '>': sb.append(">"); break; case '&': sb.append("&"); break; default: sb.append(c); break; } } return sb.toString(); } public static String unescapeHtml(String input) { try { ParseResponse result = unescapeHtmlParser.parse(input); StringBuffer resp = new StringBuffer(); List objectsList = result.getObjectsList(); for (Object o : objectsList) { resp.append(o.toString()); } return resp.toString(); } catch (ParserException e) { // TODO Auto-generated catch block e.printStackTrace(); return null; } } public static String sanitizeHtml(String input) { if (input == null) { return null; } try { ParseResponse result = htmlSanitizer.parse(input); StringBuffer resp = new StringBuffer(); List objectsList = result.getObjectsList(); for (Object o : objectsList) { resp.append(o.toString()); } return resp.toString(); } catch (ParserException e) { // TODO Auto-generated catch block e.printStackTrace(); return null; } } public static void main(String[] args) { // System.out.println("end="+unescapeHtml("<script>alert(\"hopa\")</script>\"")); System.out.println(sanitizeHtml("\n" + "deci : hhfd\n" + "")); } }