ucar.nc2.util.EscapeStrings Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of cdm Show documentation
Show all versions of cdm Show documentation
The NetCDF-Java Library is a Java interface to NetCDF files,
as well as to many other types of scientific data formats.
The newest version!
///////////////////////////////////////////////////////////////////////////// // This file is part of the "Java-DAP" project, a Java implementation // of the OPeNDAP Data Access Protocol. // // Copyright (c) 2010, OPeNDAP, Inc. // Copyright (c) 2002,2003 OPeNDAP, Inc. // // Author: James Gallagher
character may not appear on the allowable list, as if it did it would break the 1:1 * and onto mapping between the unescaped character space and the escaped characater space. * * @param in The string in which to replace characters. * @param allowable The set of allowable characters. * @param esc The escape String (typically "%" for a URI or "\" for a regular expression). * @param spaceplus True if spaces should be replaced by '+'. * @return The modified identifier. */ // Useful constants static final byte blank = ((byte)' '); static final byte plus = ((byte)'+'); private static String xescapeString(String in, String allowable, char esc, boolean spaceplus) { try { StringBuffer out = new StringBuffer(); if (in == null) return null; byte[] utf8 = in.getBytes(CDM.utf8Charset); byte[] allow8 = allowable.getBytes(CDM.utf8Charset); for(byte b: utf8) { if(b == blank && spaceplus) { out.append('+'); } else { // search allow8 boolean found = false; for(byte a: allow8) { if(a == b) {found = true; break;} } if(found) {out.append((char)b);} else { String c = Integer.toHexString(b); out.append(esc); if (c.length() < 2) out.append('0'); out.append(c); } } } return out.toString(); } catch (Exception e) { return in; } } private static String escapeString(String in, String allowable) {return xescapeString(in,allowable,_URIEscape,false);} /** * Given a string that contains WWW escape sequences, translate those escape * sequences back into ASCII characters. Return the modified string. * * @param in The string to modify. * @param escape The character used to signal the begining of an escape sequence. * param except If there is some escape code that should not be removed by * this call (e.g., you might not want to remove spaces, %20) use this * parameter to specify that code. The function will then transform all * escapes except that one. * @param spaceplus True if spaces should be replaced by '+'. * @return The modified string. */ private static String xunescapeString(String in, char escape, boolean spaceplus) { try { if (in == null) return null; byte[] utf8 = in.getBytes(CDM.utf8Charset); byte escape8 = (byte)escape; byte[] out = new byte[utf8.length]; // Should be max we need int index8 = 0; for(int i=0;i// // All rights reserved. // // Redistribution and use in source and binary forms, // with or without modification, are permitted provided // that the following conditions are met: // // - Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // // - Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // // - Neither the name of the OPeNDAP nor the names of its contributors may // be used to endorse or promote products derived from this software // without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS // IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED // TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A // PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT // HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED // TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, O // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ///////////////////////////////////////////////////////////////////////////// package ucar.nc2.util; import ucar.nc2.constants.CDM; import ucar.httpservices.HTTPSession; import java.net.*; import java.util.ArrayList; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; /** * User: ndp * Date: Jul 7, 2006 * Time: 10:23:19 AM */ public class EscapeStrings { ////////////////////////////////////////////////////////////////////////// static public org.slf4j.Logger log = org.slf4j.LoggerFactory.getLogger(HTTPSession.class); ////////////////////////////////////////////////////////////////////////// /* // Sets of ascii characters static final String alpha = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"; static final String numeric = "0123456789"; static final String alphaNumeric = alpha + numeric; // Experimentally determined url and query // legal and illegal chars as defined by apache httpclient3 // Sets are larger than strictly necessary static final String httpclient_urllegal = "!#$&'()*+,-./:;=?@_~"; static final String httpclient_querylegal = "!#$&'()*+,-./:;=?@_~%"; // % is difference static final String httpclient_urlillegal = " \"<>[\\]^`{|}%"; static final String httpclient_queryillegal = " \"<>[\\]^`{|}"; // % is difference // Set of all ascii printable non-alphanumeric (aka nan) characters static private final String nonAlphaNumeric = " !\"#$%&'()*+,-./:;<=>?@[]\\^_`|{}~" ; static private final String queryReserved = httpclient_queryillegal; // " ?&=,+;#"; // special parsing meaning in queries static private final String urlReserved = httpclient_urlillegal; // ":/#?"; // special parsing meaning in url // We assume that whoever constructs a url (minus the query) // has properly percent encoded whatever characters need to be encoded. // Sets of characters absolutely DIS-allowed in url. static private final String urlDisallowed = stringUnion(urlReserved); // what else? // Complement of urlDisallowed static private final String urlAllowed = stringDiff(nonAlphaNumeric,urlDisallowed); // This is set of legal characters that can appear unescaped in a url static private final String _allowableInUrl= alphaNumeric + urlAllowed; // Sets of characters absolutely DIS-allowed in query string identifiers. // Basically, this set is determined by what kind of query parsing needs to occur. static private final String queryIdentDisallowed = stringUnion(queryReserved+"\"\\^`|<>[]{}"); // Complement of queryIdentDisallowed static private final String queryIdentAllowed = stringDiff(nonAlphaNumeric,queryIdentDisallowed); // This is set of legal characters that can appear unescaped in a url query. static private final String _allowableInUrlQuery = alphaNumeric + queryIdentAllowed; // Define the set of characters allowable in DAP Identifiers static private final String dapSpecAllowed = "_!~*'-\"" ; //as specified in dap2 spec static private final String _namAllowedInDAP = dapSpecAllowed + "./"; // for groups and structure names static private final String _allowableInDAP = alphaNumeric + _namAllowedInDAP; // This is set of legal characters that can appear unescaped in an OGC query. // See OGC Web Services Common 2.0.0 section 11.3 static private final String _namAllowedInOGC = "-_.!~*'()"; static private final String _disallowedInOGC = stringUnion(queryReserved+stringDiff(nonAlphaNumeric,_namAllowedInOGC)); static private final String _allowableInOGC = alphaNumeric + _namAllowedInOGC; static private final char _URIEscape = '%'; // These are the DEFINITIVE set of non-alphanumeric characters that are legal // in opendap identifiers (according to DAP2 protocol spec). // Add '/' to support group names static private String opendap_identifier_special_characters = "_!~*'-\"" // see dap spec. + "./" ; // less strict // The complete set of legal opendap identifier characters public static String opendap_identifier_characters = alphaNumeric + opendap_identifier_special_characters; */ static protected final String alpha = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"; static protected final String numeric = "0123456789"; static protected final String alphaNumeric = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"; static protected final String httpclient_urllegal = "!#$&'()*+,-./:;=?@_~"; static protected final String httpclient_querylegal = "!#$&'()*+,-./:;=?@_~%"; static protected final String httpclient_urlillegal = " \"<>[\\]^`{|}%"; static protected final String httpclient_queryillegal = " \"<>[\\]^`{|}"; static protected final String nonAlphaNumeric = " !\"#$%&'()*+,-./:;<=>?@[]\\^_`|{}~"; static protected final String queryReserved = " \"<>[\\]^`{|}"; static protected final String urlReserved = " \"<>[\\]^`{|}%"; static protected final String urlDisallowed = " \"<>[\\]^`{|}%"; static protected final String urlAllowed = "!#$&'()*+,-./:;=?@_~"; static protected final String _allowableInUrl = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789!#$&'()*+,-./:;=?@_~"; static protected final String queryIdentDisallowed = " \"\\^`|<>[]{}"; static protected final String queryIdentAllowed = "!#$%&'()*+,-./:;=?@_~"; static protected final String _allowableInUrlQuery = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789!#$%&'()*+,-./:;=?@_~"; static protected final String dapSpecAllowed = "_!~*'-\""; static protected final String _namAllowedInDAP = "_!~*'-\"./"; static protected final String _allowableInDAP = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_!~*'-\"./"; static protected final String _namAllowedInOGC = "-_.!~*'()"; static protected final String _disallowedInOGC = " \"#$%&+,/:;<=>?@[]\\^`|{}"; static protected final String _allowableInOGC = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_.!~*'()"; static protected final String opendap_identifier_special_characters = "_!~*'-\"./"; static protected final String opendap_identifier_characters = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_!~*'-\"./"; static protected final char _URIEscape = '%'; /* * s1 union s2 */ static private String stringUnion(String s) { StringBuilder union = new StringBuilder(); for(int i=0;i = 0) continue; // later occurrence union.append(c); } return union.toString(); } /** * Replace all characters in the String in
not present in the Stringallowable
with * their hexidecimal values (encoded as UTF8) and preceeded by the Stringesc
* * Theesc = hex0 && b <= hex9) return (byte)(b - hex0); if(b >= hexa && b <= hexf) return (byte)(ten + (b - hexa)); if(b >= hexA && b <= hexF) return (byte)(ten + (b - hexA)); throw new NumberFormatException("Illegal hex character: "+b); } /** * Define the DEFINITIVE opendap identifier unescape function. * @param id The identifier to unescape. * @return The unescaped identifier. */ public static String unescapeDAPIdentifier(String id) { String s; try { s = unescapeString(id); } catch (Exception e) { s = null; } return s; } /** * Define the DEFINITIVE URL escape function. * Beware, this is a rather complex operation * * @param url The url string * @return The escaped expression. */ static private final Pattern p = Pattern.compile("([\\w]+)://([.\\w]+(:[\\d]+)?)([/][^?#])?([?][^#]*)?([#].*)?"); public static String escapeURL(String url) { String protocol; String authority; String path; String query; String fragment; if(false) { // We split the url ourselves to minimize character dependencies Matcher m = p.matcher(url); boolean match = m.matches(); if(!match) return null; protocol = m.group(1); authority = m.group(2); path = m.group(3); query = m.group(4); fragment = m.group(5); } else {// faster, but may not work quite right URL u; try {u = new URL(url);} catch (MalformedURLException e) { return null; } protocol = u.getProtocol(); authority = u.getAuthority(); path = u.getPath(); query = u.getQuery(); fragment = u.getRef(); } // Reassemble StringBuilder ret = new StringBuilder(protocol); ret.append("://"); ret.append(authority); if(path != null && path.length() > 0) { // Encode pieces between '/' String pieces[] = path.split("[/]",-1); for(int i=0;i 0) ret.append("/"); ret.append(urlEncode(p)); } } if(query != null && query.length() > 0) { ret.append("?"); ret.append(escapeURLQuery(query)); } if(fragment != null && fragment.length() > 0) { ret.append("#"); ret.append(urlEncode(fragment)); } return ret.toString(); } static int nextpiece(String s, int index, String sep) { index = s.indexOf(sep,index); if(index < 0) index = s.length(); return index; } /** * Define the DEFINITIVE URL constraint expression escape function. * * @param ce The expression to modify. * @return The escaped expression. */ public static String escapeURLQuery(String ce) { try { ce = escapeString(ce, _allowableInUrlQuery); } catch(Exception e) {ce = null;} return ce; } /** * Define the DEFINITIVE URL constraint expression unescape function. * * @param ce The expression to unescape. * @return The unescaped expression. */ public static String unescapeURLQuery(String ce) { try { ce = unescapeString(ce); } catch(Exception e) {ce = null;} return ce; } /** * Define the DEFINITIVE URL escape function. * Note that the whole string is escaped, so * be careful what you pass into this procedure. * * @param s The string to modify. * @return The escaped expression. */ private static String urlEncode(String s) { //try {s = URLEncoder.encode(s,"UTF-8");} catch(Exception e) {s = null;} s = escapeString(s, _allowableInUrl); return s; } /** * Define the DEFINITIVE URL unescape function. * * @param s The string to unescape. * @return The unescaped expression. */ public static String urlDecode(String s) { try { //s = unescapeString(s, _URIEscape, "", false); s = URLDecoder.decode(s,"UTF-8"); } catch(Exception e) {s = null;} return s; } /** * Decode all of the parts of the url including query and fragment * @param url the url to encode */ public static String unescapeURL(String url) { String newurl; newurl = urlDecode(url); return newurl; } /** * Define the DAP escape identifier function. * * @param s The string to encode. * @return The escaped string. */ public static String escapeDAPIdentifier(String s) { return escapeString(s, _allowableInDAP); } /** * Define the OGC Web Services escape function. * * @param s The string to encode. * @return The escaped string. */ public static String escapeOGC(String s) { return escapeString(s, _allowableInOGC); } static public void testOGC() { for (char c : (alphaNumeric + " !\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~").toCharArray()) { String encoded = EscapeStrings.escapeOGC("" + c); System.err.printf("|%c|=|%s|%n", c, encoded); } } /** * Define the OGC unescape function. * * @param s The string to unescape. b * @return The unescaped string. */ public static String unescapeOGC(String s) { return urlDecode(s); } /////////////////////////////////////////////////////////////// /** * backslash escape a string * @param x escape this; may be null * @param reservedChars these chars get a backslash in front of them * @return escaped string */ static public String backslashEscape(String x, String reservedChars) { if (x == null) { return null; } else if (reservedChars == null) { return x; } boolean ok = true; for (int pos = 0; pos < x.length(); pos++) { char c = x.charAt(pos); if (reservedChars.indexOf(c) >= 0) { ok = false; break; } } if (ok) return x; // gotta do it StringBuilder sb = new StringBuilder(x); for (int pos = 0; pos < sb.length(); pos++) { char c = sb.charAt(pos); if (reservedChars.indexOf(c) < 0) { continue; } sb.setCharAt(pos, '\\'); pos++; sb.insert(pos, c); pos++; } return sb.toString(); } /** * backslash unescape a string * @param x unescape this * @return string with \c -> c */ static public String backslashUnescape(String x) { if (!x.contains("\\")) return x; // gotta do it StringBuilder sb = new StringBuilder(x.length()); for (int pos = 0; pos < x.length(); pos++) { char c = x.charAt(pos); if (c == '\\') { c = x.charAt(++pos); // skip backslash, get next cha } sb.append(c); } return sb.toString(); } /** * Tokenize an escaped name using "." as delimiter, skipping "\." * * @param escapedName an escaped name * @return list of tokens */ public static List tokenizeEscapedName(String escapedName) { List result = new ArrayList<>(); int pos = 0; int start = 0; while (true) { pos = escapedName.indexOf(sep, pos+1); if (pos <= 0) break; if ((pos > 0) && escapedName.charAt(pos-1) != '\\') { result.add(escapedName.substring(start, pos)); start = pos+1; } } result.add(escapedName.substring(start, escapedName.length())); // remaining return result; } static private final int sep = '.'; /** * Find first occurence of char c in escapedName, excluding escaped c. * @param escapedName search in this string * @param c for this char but not \\cha * @return pos in string, or -1 */ public static int indexOf(String escapedName, char c) { int pos = 0; while (true) { pos = escapedName.indexOf(c, pos+1); if (pos <= 0) return pos; if ((pos > 0) && escapedName.charAt(pos-1) != '\\') return pos; } } public static void main2(String[] args) { String s = "http://thredds.ucar.edu/thredds/dodsC/fmrc/NCEP/GFS/Global_0p5deg/runs/NCEP-GFS-Global_0p5deg_RUN_2011-07-15T00:00:00Z.html.asc?Total_cloud_cover_low_cloud%5B1:1:1%5D%5B0:1:360%5D%5B0:1:719%5D"; log.debug("%s%n", s); log.debug("%s%n", unescapeURL(s)); } public static void main(String[] args) { String s = "https://localhost:8443/thredds/admin/log/access/"; System.out.printf("%s%n", s); System.out.printf("%s%n", escapeURL(s)); } public static void mainOld(String[] args) throws Exception { /* Ignore if (args.length > 0) { for (String s : args) { LogStream.out.println("id2www - Input: \"" + s + "\" Output: \"" + id2www(s) + "\" recaptured: " + www2id(id2www(s))); } for (String s : args) { String out = id2www(s); LogStream.out.println("www2id - Input: \"" + out + "\" Output: \"" + www2id(out) + "\" recaptured: " + id2www(www2id(out))); } } else { char[] allBytes = new char[256]; for (int b = 0; b < 256; b++) allBytes[b] = (char) b; String allChars = new String(allBytes); LogStream.out.println("id2www All Characters"); LogStream.out.println("Input String: \"" + allChars + "\""); LogStream.out.println("Output String: \"" + id2www(allChars) + "\""); LogStream.out.println("Recaptured String: \"" + www2id(id2www(allChars)) + "\" "); } */ } /** * This method is used to normalize strings prio * to their inclusion in XML documents. XML has certain parsing requirements * around reserved characters. These reserved characters must be replaced with * symbols recognized by the XML parser as place holder for the actual symbol. * * The rule for this normalization is as follows: * * *
* * @param s The String to be normalized. * @return The normalized String. */ // Some handy definitons. static final String xmlGT = ">"; static final String xmlLT = "<"; static final String xmlAmp = "&"; static final String xmlApos = "'"; static final String xmlQuote = """; public static String normalizeToXML(String s) { StringBuffer sb = new StringBuffer(s); for (int offset = 0; offset < sb.length(); offset++) { char c = sb.charAt(offset); switch (c) { case '>': // GreaterThan sb.replace(offset, offset + 1, xmlGT); break; case '<': // Less Than sb.replace(offset, offset + 1, xmlLT); break; case '&': // Ampersand sb.replace(offset, offset + 1, xmlAmp); break; case '\'': // Single Quote sb.replace(offset, offset + 1, xmlApos); break; case '\"': // Double Quote sb.replace(offset, offset + 1, xmlQuote); break; default: break; } } return (sb.toString()); } /** * Given a backslash escaped name, * convert to a DAP escaped name * * @param bs the string to DAP encode; may have backslash escapes * @return escaped string */ public static String backslashToDAP(String bs) { StringBuilder buf = new StringBuilder(); int len = bs.length(); for(int i=0;i- The < (less than) character is replaced with < *
- The > (greater than) character is replaced with > *
- The & (ampersand) character is replaced with & *
- The ' (apostrophe) character is replaced with ' *
- The " (double quote) character is replaced with " *
© 2015 - 2024 Weber Informatics LLC | Privacy Policy