All Downloads are FREE. Search and download functionalities are using the official Maven repository.

ma.vi.base.string.Escape Maven / Gradle / Ivy

/*
 * Copyright (c) 2016 Vikash Madhow
 */

package ma.vi.base.string;

import java.text.SimpleDateFormat;
import java.util.HashMap;
import java.util.Map;

import static com.google.common.base.Preconditions.checkArgument;

/**
 * 

* Simple character escaping facility which can escape a set of characters in texts by * preceding them with a start of escape sequence character (default \). The set, specified * on construction, is applied using the {@link #escape(String)} method. *

* *

* The {@link #map(String)} method can then temporarily change the escaped characters * in a string to characters in the unicode private use area (e000-f8ff) so that its * processing is simplified. *

* *

* Finally, the {@link #demap(String)} method can be used to reconstruct the original * string (without backslash for escaped characters) from a remapped string. *

* *

* These three methods should thus be used together for processing string containing * characters which might conflict with surrounding characters when embedded in a * larger body of text (such as character data in XML). *

* * @author Vikash Madhow ([email protected]) */ public class Escape { public Escape(String escapeChars) { this('\\', escapeChars); } public Escape(char startOfEscape, String escapeChars) { checkArgument(escapeChars.length() > 0, "No characters were provided for escaping"); this.startOfEscape = startOfEscape; this.escapeChars = escapeChars.toCharArray(); ordinals = new HashMap<>(); for (int i = 0; i < this.escapeChars.length; i++) { char c = this.escapeChars[i]; checkArgument(!ordinals.containsKey(c), "Character '" + c + "' has been specified more than " + "once for escaping"); ordinals.put(c, i); } } /** * Returns the text with any of the characters to escape * preceded with the start of escape character (default \). */ public String escape(String text) { if (text == null) { return null; } else { StringBuilder escaped = new StringBuilder(); for (int i = 0; i < text.length(); i++) { char c = text.charAt(i); if (ordinals.containsKey(c)) { escaped.append(startOfEscape); } escaped.append(c); } return escaped.toString(); } } public String map(String text) { return map(text, null); } /** * This method can then be used to temporarily change the escaped characters in the * string to characters in the unicode private use area (e000-f8ff) so that processing * of the string would not encounter them and there is very little risk that those * remapped characters would conflict with existing ones in the string. */ public String map(String text, String[] replacements) { if (text == null) { return null; } else { StringBuilder remapped = new StringBuilder(); boolean inEscape = false; for (int i = 0; i < text.length(); i++) { char c = text.charAt(i); if (inEscape) { if (ordinals.containsKey(c)) { int pos = ordinals.get(c); if (replacements != null && replacements.length > pos) { remapped.append(replacements[pos]); } else { remapped.append((char) (PUA + pos)); } } else { remapped.append(startOfEscape).append(c); } inEscape = false; } else if (c == startOfEscape) { inEscape = true; } else { remapped.append(c); } } if (inEscape) { remapped.append(startOfEscape); } return remapped.toString(); } } /** * This method reconstructs the original string (without backslash for escaped * characters) from a remapped string. */ public String demap(String text) { if (text == null) { return null; } else { StringBuilder unescaped = new StringBuilder(); for (int i = 0; i < text.length(); i++) { char c = text.charAt(i); if (c >= PUA && c - PUA < escapeChars.length) { c = escapeChars[c - PUA]; } unescaped.append(c); } return unescaped.toString(); } } /** * Escape a string so that it can be embedded in a JSON object. */ public static String escapeJsonString(String value) { return value.replace("\\", "\\\\") //.replace("'", "\\'") .replace("\"", "\\\"").replace("\n", "\\n") .replace("\r", "\\r").replace("\t", "\\t") .replace("\f", "\\f").replace("\b", "\\b"); } // public static final Escape Json = new Escape('\\', "\\\"bfnrt"); /** * Escapes a string so that it can be embedded in an SQL query. */ public static String escapeSqlString(String value) { return value.replace("\\", "\\\\").replace("'", "''") .replace("\b", "\\b").replace("\f", "\\f") .replace("\n", "\\n").replace("\r", "\\r") .replace("\t", "\\t"); } /** * Escapes single quotes in an ESQL query. */ public static String escapeEsqlQuote(String value) { return value.replace("'", "%q"); } // /** // * Unescape a string obtained from an SQL context to produce its actual value. // */ // public static String unescapeSqlString(CharSequence value) { // String s = value.toString().replace("\\\\", "\\") // .replace("''", "'") // .replace("\\b", "\b") // .replace("\\f", "\f") // .replace("\\n", "\n") // .replace("\\r", "\r") // .replace("\\t", "\t"); // return insertUnicodeCodePoints(s); // } // // /** // * Insert characters from unicode code points // */ // public static String insertUnicodeCodePoints(String s) { // int lastPos = 0; // StringBuilder st = new StringBuilder(); // Matcher matcher = UnicodeEscape.matcher(s); // while (matcher.find()) { // st.append(s, lastPos, matcher.start()); // st.append(toChars(parseInt(matcher.group(1), 16))); // lastPos = matcher.end(); // } // return st.append(s.substring(lastPos)).toString(); // } // // public static final Escape DOUBLE_QUOTE_ESC = new Escape('"'); // public static final Escape SINGLE_QUOTE_ESC = new Escape('\''); // // /** // * Pattern to match unicode escapes of the form \\\\uABCD. // */ // public static final Pattern UnicodeEscape = Pattern.compile("\\\\u([0-9a-fA-F]{4})"); /** * The character used to signal the start of an escape sequence. * Default is \ */ private final char startOfEscape; /** * The characters to escape, in the order that they were supplied * to this class on construction. */ private final char[] escapeChars; /** * The character to escape are mapped to their position in the original * array supplied on construction and those positions are used as the * ordinal value of the character. These ordinals are added to the unicode * {@link #PUA} to the remapped code point for a character in a piece of * text when remapping. */ private final Map ordinals; /** * The Unicode Private Use Area (PUA) is from E000 to F8FF. During processing * (before unescaping), the escaped characters can temporarily be remapped to * this area so as not to interfere with the general processing of the string. */ private static final char PUA = '\uE000'; public static final SimpleDateFormat SERVER_DATE_FORMAT = new SimpleDateFormat("yyyy-MM-dd"); public static final SimpleDateFormat SERVER_TIME_FORMAT = new SimpleDateFormat("HH:mm:ss.SSS"); public static final SimpleDateFormat SERVER_DATE_TIME_FORMAT = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS"); }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy