All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.glassfish.jersey.uri.internal.UriTemplateParser Maven / Gradle / Ivy

Go to download

A bundle project producing JAX-RS RI bundles. The primary artifact is an "all-in-one" OSGi-fied JAX-RS RI bundle (jaxrs-ri.jar). Attached to that are two compressed JAX-RS RI archives. The first archive (jaxrs-ri.zip) consists of binary RI bits and contains the API jar (under "api" directory), RI libraries (under "lib" directory) as well as all external RI dependencies (under "ext" directory). The secondary archive (jaxrs-ri-src.zip) contains buildable JAX-RS RI source bundle and contains the API jar (under "api" directory), RI sources (under "src" directory) as well as all external RI dependencies (under "ext" directory). The second archive also contains "build.xml" ANT script that builds the RI sources. To build the JAX-RS RI simply unzip the archive, cd to the created jaxrs-ri directory and invoke "ant" from the command line.

There is a newer version: 3.1.9
Show newest version
/*
 * Copyright (c) 2010, 2023 Oracle and/or its affiliates. All rights reserved.
 *
 * This program and the accompanying materials are made available under the
 * terms of the Eclipse Public License v. 2.0, which is available at
 * http://www.eclipse.org/legal/epl-2.0.
 *
 * This Source Code may also be made available under the following Secondary
 * Licenses when the conditions for such availability set forth in the
 * Eclipse Public License v. 2.0 are satisfied: GNU General Public License,
 * version 2 with the GNU Classpath Exception, which is available at
 * https://www.gnu.org/software/classpath/license.html.
 *
 * SPDX-License-Identifier: EPL-2.0 OR GPL-2.0 WITH Classpath-exception-2.0
 */

package org.glassfish.jersey.uri.internal;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.NoSuchElementException;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;

import org.glassfish.jersey.internal.LocalizationMessages;
import org.glassfish.jersey.uri.UriComponent;

/**
 * A URI template parser that parses JAX-RS specific URI templates.
 *
 * @author Paul Sandoz
 * @author Gerard Davison (gerard.davison at oracle.com)
 */
public class UriTemplateParser {

    /* package */ static final int[] EMPTY_INT_ARRAY = new int[0];
    private static final Set RESERVED_REGEX_CHARACTERS = initReserved();
    private static final String[] HEX_TO_UPPERCASE_REGEX = initHexToUpperCaseRegex();

    private static Set initReserved() {
        char[] reserved = {
                '.', '^', '&', '!',
                '?', '-', ':', '<',
                '(', '[', '$', '=',
                ')', ']', ',', '>',
                '*', '+', '|'};

        Set s = new HashSet(reserved.length);
        for (char c : reserved) {
            s.add(c);
        }
        return s;
    }

    /**
     * Default URI template value regexp pattern.
     */
    public static final Pattern TEMPLATE_VALUE_PATTERN = Pattern.compile("[^/]+");
    public static final Pattern TEMPLATE_VALUE_PATTERN_MULTI = Pattern.compile("[^,/]+");
    public static final Pattern MATCH_NUMBER_OF_MAX_LENGTH_4 = Pattern.compile("[1-9][0-9]{0,3}");

    private final String template;
    private final StringBuffer regex = new StringBuffer();
    private final StringBuffer normalizedTemplate = new StringBuffer();
    private final StringBuffer literalCharactersBuffer = new StringBuffer();
    private final Pattern pattern;
    private final List names = new ArrayList<>();
    private final List parts = new ArrayList<>();
    private final List groupCounts = new ArrayList();
    private final Map nameToPattern = new HashMap();
    private int numOfExplicitRegexes;
    private int skipGroup;

    private int literalCharacters;

    /**
     * Parse a template.
     *
     * @param template the template.
     * @throws IllegalArgumentException if the template is null, an empty string
     *                                  or does not conform to a JAX-RS URI template.
     */
    public UriTemplateParser(final String template) throws IllegalArgumentException {
        if (template == null || template.isEmpty()) {
            throw new IllegalArgumentException("Template is null or has zero length");
        }

        this.template = template;
        parse(new CharacterIterator(template));
        try {
            pattern = Pattern.compile(regex.toString());
        } catch (PatternSyntaxException ex) {
            throw new IllegalArgumentException("Invalid syntax for the template expression '"
                    + regex + "'",
                    ex
            );
        }
    }

    /**
     * Get the template.
     *
     * @return the template.
     */
    public final String getTemplate() {
        return template;
    }

    /**
     * Get the pattern.
     *
     * @return the pattern.
     */
    public final Pattern getPattern() {
        return pattern;
    }

    /**
     * Get the normalized template.
     * 

* A normalized template is a template without any explicit regular * expressions. * * @return the normalized template. */ public final String getNormalizedTemplate() { return normalizedTemplate.toString(); } /** * Get the map of template names to patterns. * * @return the map of template names to patterns. */ public final Map getNameToPattern() { return nameToPattern; } /** * Get the list of template names. * * @return the list of template names. */ public final List getNames() { return names; } /** * Get a collection of uri parts (static strings and dynamic arguments) as parsed by the parser. * Can be used to compose the uri. This collection is usually a superset of {@link #getNames() names} * and other parts that do not have a template. * * @return List of parts of the uri. */ public List getUriParts() { return parts; } /** * Get the capturing group counts for each template variable. * * @return the capturing group counts. */ public final List getGroupCounts() { return groupCounts; } /** * Get the group indexes to capturing groups. *

* Any nested capturing groups will be ignored and the * the group index will refer to the top-level capturing * groups associated with the templates variables. * * @return the group indexes to capturing groups. */ public final int[] getGroupIndexes() { if (names.isEmpty()) { return EMPTY_INT_ARRAY; } int[] indexes = new int[names.size()]; indexes[0] = 0 + groupCounts.get(0); for (int i = 1; i < indexes.length; i++) { indexes[i] = indexes[i - 1] + groupCounts.get(i); } return indexes; } /** * Get the number of explicit regular expressions. * * @return the number of explicit regular expressions. */ public final int getNumberOfExplicitRegexes() { return numOfExplicitRegexes; } /** * Get the number of regular expression groups * * @return the number of regular expressions groups * * @since 2.9 */ public final int getNumberOfRegexGroups() { if (groupCounts.isEmpty()) { return 0; } else { int[] groupIndex = getGroupIndexes(); return groupIndex[groupIndex.length - 1] + skipGroup; } } /** * Get the number of literal characters. * * @return the number of literal characters. */ public final int getNumberOfLiteralCharacters() { return literalCharacters; } /** * Encode literal characters of a template. * * @param characters the literal characters * @return the encoded literal characters. */ protected String encodeLiteralCharacters(final String characters) { return characters; } private void parse(final CharacterIterator ci) { try { while (ci.hasNext()) { char c = ci.next(); if (c == '{') { processLiteralCharacters(); skipGroup = parseName(ci, skipGroup); } else { literalCharactersBuffer.append(c); } } processLiteralCharacters(); } catch (NoSuchElementException ex) { throw new IllegalArgumentException(LocalizationMessages.ERROR_TEMPLATE_PARSER_INVALID_SYNTAX_TERMINATED( template), ex); } } private void processLiteralCharacters() { if (literalCharactersBuffer.length() > 0) { literalCharacters += literalCharactersBuffer.length(); String s = encodeLiteralCharacters(literalCharactersBuffer.toString()); normalizedTemplate.append(s); parts.add(new UriPart(s)); // Escape if reserved regex character for (int i = 0; i < s.length(); i++) { char c = s.charAt(i); if (RESERVED_REGEX_CHARACTERS.contains(c)) { regex.append("\\"); regex.append(c); } else if (c == '%') { final char c1 = s.charAt(i + 1); final char c2 = s.charAt(i + 2); if (UriComponent.isHexCharacter(c1) && UriComponent.isHexCharacter(c2)) { regex.append("%").append(HEX_TO_UPPERCASE_REGEX[c1]).append(HEX_TO_UPPERCASE_REGEX[c2]); i += 2; } } else { regex.append(c); } } literalCharactersBuffer.setLength(0); } } private static String[] initHexToUpperCaseRegex() { String[] table = new String[0x80]; for (int i = 0; i < table.length; i++) { table[i] = String.valueOf((char) i); } for (char c = 'a'; c <= 'f'; c++) { // initialize table values: table[a] = ([aA]) ... table[c] = "[" + c + (char) (c - 'a' + 'A') + "]"; } for (char c = 'A'; c <= 'F'; c++) { // initialize table values: table[A] = ([aA]) ... table[c] = "[" + (char) (c - 'A' + 'a') + c + "]"; } return table; } private int parseName(final CharacterIterator ci, int skipGroup) { Variables variables = new Variables(); variables.parse(ci, template); Pattern namePattern; // Make sure we display something useful String name = variables.getName(); int argIndex = 0; try { switch (variables.paramType) { case '?': case ';': case '&': // Build up the regex for each of these properties StringBuilder regexBuilder = new StringBuilder(); String separator = null; switch (variables.paramType) { case '?': separator = "\\&"; regexBuilder.append("\\?"); // first symbol break; case '&': separator = "\\&"; regexBuilder.append("\\&"); // first symbol break; case ';': separator = ";/\\?"; regexBuilder.append(";"); // first symbol break; } // Start a group because each parameter could repeat // names.add("__" + (paramType == '?' ? "query" : "matrix")); regexBuilder.append('('); for (String subName : variables.names) { TemplateVariable.Position position = determinePosition(variables.separatorCount, argIndex); TemplateVariable templateVariable = TemplateVariable.createTemplateVariable(variables.paramType, subName, position); templateVariable.setStar(variables.explodes(argIndex)); regexBuilder.append("(&?"); regexBuilder.append(subName); regexBuilder.append("(=([^"); regexBuilder.append(separator); regexBuilder.append(']'); if (variables.hasLength(argIndex)) { regexBuilder.append('{').append(variables.getLength(argIndex)).append('}'); templateVariable.setLength(variables.getLength(argIndex)); } else { regexBuilder.append('*'); } regexBuilder.append("))?"); regexBuilder.append(')'); if (argIndex != 0) { regexBuilder.append('|'); } names.add(templateVariable); parts.add(templateVariable); groupCounts.add( argIndex == 0 ? 5 : 3); argIndex++; } // groupCounts.add(1); skipGroup = 1; // Knock of last bar regexBuilder.append(")*"); namePattern = Pattern.compile(regexBuilder.toString()); // Make sure we display something useful break; default: if (variables.separatorCount == 0) { if (variables.hasRegexp(0)) { numOfExplicitRegexes++; } TemplateVariable templateVariable = TemplateVariable .createTemplateVariable(variables.paramType, variables.getName(0), TemplateVariable.Position.SINGLE); templateVariable.setStar(variables.explodes(0)); names.add(templateVariable); parts.add(templateVariable); // groupCounts.add(1 + skipGroup); if (variables.hasLength(0)) { if (variables.getLength(0) != 0) { int len = TEMPLATE_VALUE_PATTERN.pattern().length() - 1; String pattern = TEMPLATE_VALUE_PATTERN.pattern().substring(0, len) + '{' + variables.getLength(0) + '}'; namePattern = Pattern.compile(pattern); } else { namePattern = TEMPLATE_VALUE_PATTERN; } templateVariable.setLength(variables.getLength(0)); } else { namePattern = (!variables.hasRegexp(0)) ? TEMPLATE_VALUE_PATTERN : Pattern.compile(variables.regexp(0)); } if (nameToPattern.containsKey(name)) { if (!nameToPattern.get(name).equals(namePattern)) { throw new IllegalArgumentException( LocalizationMessages.ERROR_TEMPLATE_PARSER_NAME_MORE_THAN_ONCE(name, template)); } } else { nameToPattern.put(name, namePattern); } // Determine group count of pattern Matcher m = namePattern.matcher(""); int g = m.groupCount(); groupCounts.add(1 + skipGroup); skipGroup = g; } else { argIndex = 0; regexBuilder = new StringBuilder(); for (String subName : variables.names) { if (argIndex != 0) { regexBuilder .append('(') .append(','); } TemplateVariable.Position position = determinePosition(variables.separatorCount, argIndex); TemplateVariable templateVariable = TemplateVariable.createTemplateVariable(variables.paramType, subName, position); templateVariable.setStar(variables.explodes(argIndex)); names.add(templateVariable); parts.add(templateVariable); if (variables.hasLength(argIndex)) { int len = TEMPLATE_VALUE_PATTERN_MULTI.pattern().length() - 1; String pattern = TEMPLATE_VALUE_PATTERN_MULTI.pattern() .substring(0, len) + '{' + variables.getLength(argIndex) + '}'; namePattern = Pattern.compile(pattern); templateVariable.setLength(variables.getLength(argIndex)); } else { namePattern = (!variables.hasRegexp(argIndex)) ? TEMPLATE_VALUE_PATTERN_MULTI : Pattern.compile(variables.regexp(argIndex)); } // TODO breaks RFC 6570 --backward compatibility with default pattern if (nameToPattern.containsKey(subName) && variables.paramType == 'p') { if (!nameToPattern.get(subName).equals(namePattern)) { throw new IllegalArgumentException( LocalizationMessages.ERROR_TEMPLATE_PARSER_NAME_MORE_THAN_ONCE(name, template)); } } else { nameToPattern.put(subName, namePattern); } regexBuilder .append('(') .append(namePattern) .append(')'); if (argIndex != 0) { regexBuilder.append(")"); } if (!variables.hasRegexp(argIndex)) { regexBuilder.append("{0,1}"); } argIndex++; groupCounts.add(2); } namePattern = Pattern.compile(regexBuilder.toString()); } break; } regex.append('(') .append(namePattern) .append(')'); normalizedTemplate.append('{') .append(name) .append('}'); } catch (PatternSyntaxException ex) { throw new IllegalArgumentException(LocalizationMessages .ERROR_TEMPLATE_PARSER_INVALID_SYNTAX(variables.regexp(argIndex), variables.name, template), ex); } // Tell the next time through the loop how many to skip return skipGroup; } private static TemplateVariable.Position determinePosition(int separatorCount, int argIndex) { TemplateVariable.Position position = separatorCount == 0 ? TemplateVariable.Position.SINGLE : argIndex == 0 ? TemplateVariable.Position.FIRST : argIndex == separatorCount ? TemplateVariable.Position.LAST : TemplateVariable.Position.MIDDLE; return position; } private static class Variables { private char paramType = 'p'; private List names = new ArrayList<>(); // names private List explodes = new ArrayList<>(); // * private List regexps = new ArrayList<>(); // : regexp private List lengths = new ArrayList<>(); // :1-9999 private int separatorCount = 0; private StringBuilder name = new StringBuilder(); private int getCount() { return names.size(); } private boolean explodes(int index) { return !explodes.isEmpty() && explodes.get(index); } private boolean hasRegexp(int index) { return !regexps.isEmpty() && regexps.get(index) != null; } private String regexp(int index) { return regexps.get(index); } private boolean hasLength(int index) { return !lengths.isEmpty() && lengths.get(index) != null; } private Integer getLength(int index) { return lengths.get(index); } private char getParamType() { return paramType; } private int getSeparatorCount() { return separatorCount; } private String getName() { return name.toString(); } private String getName(int index) { return names.get(index); } private void parse(CharacterIterator ci, String template) { name.append('{'); char c = consumeWhiteSpace(ci); StringBuilder nameBuilder = new StringBuilder(); // Look for query or matrix types if (c == '?' || c == ';' || c == '.' || c == '+' || c == '#' || c == '/' || c == '&') { paramType = c; c = ci.next(); name.append(paramType); } if (Character.isLetterOrDigit(c) || c == '_') { // Template name character nameBuilder.append(c); name.append(c); } else { throw new IllegalArgumentException(LocalizationMessages.ERROR_TEMPLATE_PARSER_ILLEGAL_CHAR_START_NAME(c, ci.pos(), template)); } StringBuilder regexBuilder = new StringBuilder(); State state = State.TEMPLATE; State previousState; boolean star = false; boolean whiteSpace = false; boolean ignoredLastComma = false; int bracketDepth = 1; // { int regExpBracket = 0; // [ int regExpRound = 0; // ( boolean reqExpSlash = false; // \ while ((state.value & (State.ERROR.value | State.EXIT.value)) == 0) { previousState = state; c = ci.next(); // "\\{(\\w[-\\w\\.]*) if (Character.isLetterOrDigit(c)) { // Template name character append(c, state, nameBuilder, regexBuilder); state = state.transition(State.TEMPLATE.value | State.REGEXP.value); } else switch (c) { case '_': case '-': case '.': // Template name character append(c, state, nameBuilder, regexBuilder); state = state.transition(State.TEMPLATE.value | State.REGEXP.value); break; case ',': switch (state) { case REGEXP: if (bracketDepth == 1 && !reqExpSlash && regExpBracket == 0 && regExpRound == 0) { state = State.COMMA; } else { regexBuilder.append(c); } break; case TEMPLATE: case STAR: state = State.COMMA; break; } separatorCount++; break; case ':': if (state == State.REGEXP) { regexBuilder.append(c); } state = state.transition(State.TEMPLATE.value | State.REGEXP.value | State.STAR.value, State.REGEXP); break; case '*': state = state.transition(State.TEMPLATE.value | State.REGEXP.value); if (state == State.TEMPLATE) { star = true; state = State.STAR; } else if (state == State.REGEXP){ regexBuilder.append(c); } break; case '}': bracketDepth--; if (bracketDepth == 0) { state = State.BRACKET; } else { regexBuilder.append(c); } break; case '{': if (state == State.REGEXP) { bracketDepth++; regexBuilder.append(c); } else { state = State.ERROR; // Error multiple parenthesis } break; default: if (!Character.isWhitespace(c)) { if (state != State.REGEXP) { state = State.ERROR; // Error - unknown symbol } else { switch (c) { case '(' : regExpRound++; break; case ')': regExpRound--; break; case '[': regExpBracket++; break; case ']': regExpBracket--; break; } if (c == '\\') { reqExpSlash = true; } else { reqExpSlash = false; } regexBuilder.append(c); } } whiteSpace = true; break; } // Store parsed name, and associated star, regexp, and length switch (state) { case COMMA: case BRACKET: if (nameBuilder.length() == 0 && regexBuilder.length() == 0 && !star && name.charAt(name.length() - 1) == ',' /* ignore last comma */) { if (ignoredLastComma) { // Do not ignore twice state = State.ERROR; } else { name.setLength(name.length() - 1); ignoredLastComma = true; } break; } if (regexBuilder.length() != 0) { String regex = regexBuilder.toString(); Matcher matcher = MATCH_NUMBER_OF_MAX_LENGTH_4.matcher(regex); if (matcher.matches()) { lengths.add(Integer.parseInt(regex)); regexps.add(null); } else { if (paramType != 'p') { state = State.ERROR; // regular expressions allowed just on path by the REST spec c = regex.charAt(0); // display proper error values ci.setPosition(ci.pos() - regex.length()); break; } lengths.add(null); regexps.add(regex); } } else { regexps.add(previousState == State.REGEXP ? "" : null); lengths.add(previousState == State.REGEXP ? 0 : null); } names.add(nameBuilder.toString()); explodes.add(star); nameBuilder.setLength(0); regexBuilder.setLength(0); star = false; ignoredLastComma = false; break; } if (!whiteSpace) { name.append(c); } whiteSpace = false; // switch state back or exit switch (state) { case COMMA: state = State.TEMPLATE; break; case BRACKET: state = State.EXIT; break; } } if (state == State.ERROR) { throw new IllegalArgumentException( LocalizationMessages.ERROR_TEMPLATE_PARSER_ILLEGAL_CHAR_AFTER_NAME(c, ci.pos(), template)); } } private static void append(char c, State state, StringBuilder templateSb, StringBuilder regexpSb) { if (state == State.TEMPLATE) { templateSb.append(c); } else { // REGEXP regexpSb.append(c); } } private static char consumeWhiteSpace(final CharacterIterator ci) { char c; do { c = ci.next(); } while (Character.isWhitespace(c)); return c; } private enum State { TEMPLATE/**/(0b000000001), // Template name, before '*', ':', ',' or '}' REGEXP/* */(0b000000010), // Regular expression inside template, after : STAR/* */(0b000000100), // * COMMA/* */(0b000001000), // , BRACKET/* */(0b000010000), // } EXIT/* */(0b001000000), // quit parsing ERROR/* */(0b100000000); // error when parsing private final int value; State(int value) { this.value = value; } /** * Return error state when in not any of allowed states represented by their combined values * @param allowed The combined values of states (state1.value | state2.value) not to return error level * @return this state if in allowed state or {@link State#ERROR} if not */ State transition(int allowed) { return ((value & allowed) != 0) ? this : State.ERROR; } /** * Return error state when in not any of allowed states represented by their combined values * @param allowed The combined values of states (state1.value | state2.value) not to return error level * @param next the next state to transition * @return next state if in allowed state or {@link State#ERROR} if not */ State transition(int allowed, State next) { return ((value & allowed) != 0) ? next : State.ERROR; } } } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy