All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.jackrabbit.util.ISO9075 Maven / Gradle / Ivy

There is a newer version: 2024.11.18751.20241128T090041Z-241100
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.jackrabbit.util;

import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
 * Implements the encode and decode routines as specified for XML name to SQL
 * identifier conversion in ISO 9075-14:2003.
* If a character c is not valid at a certain position in an XML 1.0 * NCName it is encoded in the form: '_x' + hexValueOf(c) + '_'. */ public class ISO9075 { /** Hidden constructor. */ private ISO9075() { } /** Pattern on an encoded character */ private static final Pattern ENCODE_PATTERN = Pattern.compile("_x\\p{XDigit}{4}_"); /** Padding characters */ private static final char[] PADDING = new char[] {'0', '0', '0'}; /** All the possible hex digits */ private static final String HEX_DIGITS = "0123456789abcdefABCDEF"; /** * Encodes name as specified in ISO 9075. * @param name the String to encode. * @return the encoded String or name if it does * not need encoding. */ public static String encode(String name) { // quick check for root node name if (name.length() == 0) { return name; } if (XMLChar.isValidName(name) && name.indexOf("_x") < 0) { // already valid return name; } else { // encode StringBuffer encoded = new StringBuffer(); for (int i = 0; i < name.length(); i++) { if (i == 0) { // first character of name if (XMLChar.isNameStart(name.charAt(i))) { if (needsEscaping(name, i)) { // '_x' must be encoded encode('_', encoded); } else { encoded.append(name.charAt(i)); } } else { // not valid as first character -> encode encode(name.charAt(i), encoded); } } else if (!XMLChar.isName(name.charAt(i))) { encode(name.charAt(i), encoded); } else { if (needsEscaping(name, i)) { // '_x' must be encoded encode('_', encoded); } else { encoded.append(name.charAt(i)); } } } return encoded.toString(); } } /** * Encodes path as specified in ISO 9075. Please note that * the character '[' is not encoded but rather interpreted as * the start of an index in a path segment. * * @param path the String to encode. * @return the encoded String. */ public static String encodePath(String path) { String[] names = Text.explode(path, '/', true); StringBuffer encoded = new StringBuffer(path.length()); for (int i = 0; i < names.length; i++) { // detect index String index = null; int idx = names[i].indexOf('['); if (idx != -1) { index = names[i].substring(idx); names[i] = names[i].substring(0, idx); } encoded.append(encode(names[i])); if (index != null) { encoded.append(index); } if (i < names.length - 1) { encoded.append('/'); } } return encoded.toString(); } /** * Decodes the name. * @param name the String to decode. * @return the decoded String. */ public static String decode(String name) { // quick check if (name.indexOf("_x") < 0) { // not encoded return name; } StringBuffer decoded = new StringBuffer(); Matcher m = ENCODE_PATTERN.matcher(name); while (m.find()) { char ch = (char) Integer.parseInt(m.group().substring(2, 6), 16); if (ch == '$' || ch == '\\') { m.appendReplacement(decoded, "\\" + ch); } else { m.appendReplacement(decoded, Character.toString(ch)); } } m.appendTail(decoded); return decoded.toString(); } //-------------------------< internal >------------------------------------- /** * Encodes the character c as a String in the following form: * "_x" + hex value of c + "_". Where the hex value has * four digits if the character with possibly leading zeros. *

* Example: ' ' (the space character) is encoded to: _x0020_ * @param c the character to encode * @param b the encoded character is appended to StringBuffer * b. */ private static void encode(char c, StringBuffer b) { b.append("_x"); String hex = Integer.toHexString(c); b.append(PADDING, 0, 4 - hex.length()); b.append(hex); b.append("_"); } /** * Returns true if name.charAt(location) is the underscore * character and the following character sequence is 'xHHHH_' where H * is a hex digit. * @param name the name to check. * @param location the location to look at. * @throws ArrayIndexOutOfBoundsException if location > name.length() */ private static boolean needsEscaping(String name, int location) throws ArrayIndexOutOfBoundsException { if (name.charAt(location) == '_' && name.length() >= location + 6) { return name.charAt(location + 1) == 'x' && HEX_DIGITS.indexOf(name.charAt(location + 2)) != -1 && HEX_DIGITS.indexOf(name.charAt(location + 3)) != -1 && HEX_DIGITS.indexOf(name.charAt(location + 4)) != -1 && HEX_DIGITS.indexOf(name.charAt(location + 5)) != -1; } else { return false; } } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy