All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.glassfish.jersey.uri.internal.UriTemplateParser Maven / Gradle / Ivy

/*
 * Copyright (c) 2010, 2023 Oracle and/or its affiliates. All rights reserved.
 *
 * This program and the accompanying materials are made available under the
 * terms of the Eclipse Public License v. 2.0, which is available at
 * http://www.eclipse.org/legal/epl-2.0.
 *
 * This Source Code may also be made available under the following Secondary
 * Licenses when the conditions for such availability set forth in the
 * Eclipse Public License v. 2.0 are satisfied: GNU General Public License,
 * version 2 with the GNU Classpath Exception, which is available at
 * https://www.gnu.org/software/classpath/license.html.
 *
 * SPDX-License-Identifier: EPL-2.0 OR GPL-2.0 WITH Classpath-exception-2.0
 */

package org.glassfish.jersey.uri.internal;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.NoSuchElementException;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;

import org.glassfish.jersey.internal.LocalizationMessages;
import org.glassfish.jersey.uri.UriComponent;

/**
 * A URI template parser that parses JAX-RS specific URI templates.
 *
 * @author Paul Sandoz
 * @author Gerard Davison (gerard.davison at oracle.com)
 */
public class UriTemplateParser {

    /* package */ static final int[] EMPTY_INT_ARRAY = new int[0];
    private static final Set RESERVED_REGEX_CHARACTERS = initReserved();
    private static final String[] HEX_TO_UPPERCASE_REGEX = initHexToUpperCaseRegex();

    private static Set initReserved() {
        char[] reserved = {
                '.', '^', '&', '!',
                '?', '-', ':', '<',
                '(', '[', '$', '=',
                ')', ']', ',', '>',
                '*', '+', '|'};

        Set s = new HashSet(reserved.length);
        for (char c : reserved) {
            s.add(c);
        }
        return s;
    }

    /**
     * Default URI template value regexp pattern.
     */
    public static final Pattern TEMPLATE_VALUE_PATTERN = Pattern.compile("[^/]+");
    public static final Pattern TEMPLATE_VALUE_PATTERN_MULTI = Pattern.compile("[^,/]+");
    public static final Pattern MATCH_NUMBER_OF_MAX_LENGTH_4 = Pattern.compile("[1-9][0-9]{0,3}");

    private final String template;
    private final StringBuffer regex = new StringBuffer();
    private final StringBuffer normalizedTemplate = new StringBuffer();
    private final StringBuffer literalCharactersBuffer = new StringBuffer();
    private final Pattern pattern;
    private final List names = new ArrayList<>();
    private final List parts = new ArrayList<>();
    private final List groupCounts = new ArrayList();
    private final Map nameToPattern = new HashMap();
    private int numOfExplicitRegexes;
    private int skipGroup;

    private int literalCharacters;

    /**
     * Parse a template.
     *
     * @param template the template.
     * @throws IllegalArgumentException if the template is null, an empty string
     *                                  or does not conform to a JAX-RS URI template.
     */
    public UriTemplateParser(final String template) throws IllegalArgumentException {
        if (template == null || template.isEmpty()) {
            throw new IllegalArgumentException("Template is null or has zero length");
        }

        this.template = template;
        parse(new CharacterIterator(template));
        try {
            pattern = Pattern.compile(regex.toString());
        } catch (PatternSyntaxException ex) {
            throw new IllegalArgumentException("Invalid syntax for the template expression '"
                    + regex + "'",
                    ex
            );
        }
    }

    /**
     * Get the template.
     *
     * @return the template.
     */
    public final String getTemplate() {
        return template;
    }

    /**
     * Get the pattern.
     *
     * @return the pattern.
     */
    public final Pattern getPattern() {
        return pattern;
    }

    /**
     * Get the normalized template.
     * 

* A normalized template is a template without any explicit regular * expressions. * * @return the normalized template. */ public final String getNormalizedTemplate() { return normalizedTemplate.toString(); } /** * Get the map of template names to patterns. * * @return the map of template names to patterns. */ public final Map getNameToPattern() { return nameToPattern; } /** * Get the list of template names. * * @return the list of template names. */ public final List getNames() { return names; } /** * Get a collection of uri parts (static strings and dynamic arguments) as parsed by the parser. * Can be used to compose the uri. This collection is usually a superset of {@link #getNames() names} * and other parts that do not have a template. * * @return List of parts of the uri. */ public List getUriParts() { return parts; } /** * Get the capturing group counts for each template variable. * * @return the capturing group counts. */ public final List getGroupCounts() { return groupCounts; } /** * Get the group indexes to capturing groups. *

* Any nested capturing groups will be ignored and the * the group index will refer to the top-level capturing * groups associated with the templates variables. * * @return the group indexes to capturing groups. */ public final int[] getGroupIndexes() { if (names.isEmpty()) { return EMPTY_INT_ARRAY; } int[] indexes = new int[names.size()]; indexes[0] = 0 + groupCounts.get(0); for (int i = 1; i < indexes.length; i++) { indexes[i] = indexes[i - 1] + groupCounts.get(i); } return indexes; } /** * Get the number of explicit regular expressions. * * @return the number of explicit regular expressions. */ public final int getNumberOfExplicitRegexes() { return numOfExplicitRegexes; } /** * Get the number of regular expression groups * * @return the number of regular expressions groups * * @since 2.9 */ public final int getNumberOfRegexGroups() { if (groupCounts.isEmpty()) { return 0; } else { int[] groupIndex = getGroupIndexes(); return groupIndex[groupIndex.length - 1] + skipGroup; } } /** * Get the number of literal characters. * * @return the number of literal characters. */ public final int getNumberOfLiteralCharacters() { return literalCharacters; } /** * Encode literal characters of a template. * * @param characters the literal characters * @return the encoded literal characters. */ protected String encodeLiteralCharacters(final String characters) { return characters; } private void parse(final CharacterIterator ci) { try { while (ci.hasNext()) { char c = ci.next(); if (c == '{') { processLiteralCharacters(); skipGroup = parseName(ci, skipGroup); } else { literalCharactersBuffer.append(c); } } processLiteralCharacters(); } catch (NoSuchElementException ex) { throw new IllegalArgumentException(LocalizationMessages.ERROR_TEMPLATE_PARSER_INVALID_SYNTAX_TERMINATED( template), ex); } } private void processLiteralCharacters() { if (literalCharactersBuffer.length() > 0) { literalCharacters += literalCharactersBuffer.length(); String s = encodeLiteralCharacters(literalCharactersBuffer.toString()); normalizedTemplate.append(s); parts.add(new UriPart(s)); // Escape if reserved regex character for (int i = 0; i < s.length(); i++) { char c = s.charAt(i); if (RESERVED_REGEX_CHARACTERS.contains(c)) { regex.append("\\"); regex.append(c); } else if (c == '%') { final char c1 = s.charAt(i + 1); final char c2 = s.charAt(i + 2); if (UriComponent.isHexCharacter(c1) && UriComponent.isHexCharacter(c2)) { regex.append("%").append(HEX_TO_UPPERCASE_REGEX[c1]).append(HEX_TO_UPPERCASE_REGEX[c2]); i += 2; } } else { regex.append(c); } } literalCharactersBuffer.setLength(0); } } private static String[] initHexToUpperCaseRegex() { String[] table = new String[0x80]; for (int i = 0; i < table.length; i++) { table[i] = String.valueOf((char) i); } for (char c = 'a'; c <= 'f'; c++) { // initialize table values: table[a] = ([aA]) ... table[c] = "[" + c + (char) (c - 'a' + 'A') + "]"; } for (char c = 'A'; c <= 'F'; c++) { // initialize table values: table[A] = ([aA]) ... table[c] = "[" + (char) (c - 'A' + 'a') + c + "]"; } return table; } private int parseName(final CharacterIterator ci, int skipGroup) { Variables variables = new Variables(); variables.parse(ci, template); Pattern namePattern; // Make sure we display something useful String name = variables.getName(); int argIndex = 0; try { switch (variables.paramType) { case '?': case ';': case '&': // Build up the regex for each of these properties StringBuilder regexBuilder = new StringBuilder(); String separator = null; switch (variables.paramType) { case '?': separator = "\\&"; regexBuilder.append("\\?"); // first symbol break; case '&': separator = "\\&"; regexBuilder.append("\\&"); // first symbol break; case ';': separator = ";/\\?"; regexBuilder.append(";"); // first symbol break; } // Start a group because each parameter could repeat // names.add("__" + (paramType == '?' ? "query" : "matrix")); regexBuilder.append('('); for (String subName : variables.names) { TemplateVariable.Position position = determinePosition(variables.separatorCount, argIndex); TemplateVariable templateVariable = TemplateVariable.createTemplateVariable(variables.paramType, subName, position); templateVariable.setStar(variables.explodes(argIndex)); regexBuilder.append("(&?"); regexBuilder.append(subName); regexBuilder.append("(=([^"); regexBuilder.append(separator); regexBuilder.append(']'); if (variables.hasLength(argIndex)) { regexBuilder.append('{').append(variables.getLength(argIndex)).append('}'); templateVariable.setLength(variables.getLength(argIndex)); } else { regexBuilder.append('*'); } regexBuilder.append("))?"); regexBuilder.append(')'); if (argIndex != 0) { regexBuilder.append('|'); } names.add(templateVariable); parts.add(templateVariable); groupCounts.add( argIndex == 0 ? 5 : 3); argIndex++; } // groupCounts.add(1); skipGroup = 1; // Knock of last bar regexBuilder.append(")*"); namePattern = Pattern.compile(regexBuilder.toString()); // Make sure we display something useful break; default: if (variables.separatorCount == 0) { if (variables.hasRegexp(0)) { numOfExplicitRegexes++; } TemplateVariable templateVariable = TemplateVariable .createTemplateVariable(variables.paramType, variables.getName(0), TemplateVariable.Position.SINGLE); templateVariable.setStar(variables.explodes(0)); names.add(templateVariable); parts.add(templateVariable); // groupCounts.add(1 + skipGroup); if (variables.hasLength(0)) { if (variables.getLength(0) != 0) { int len = TEMPLATE_VALUE_PATTERN.pattern().length() - 1; String pattern = TEMPLATE_VALUE_PATTERN.pattern().substring(0, len) + '{' + variables.getLength(0) + '}'; namePattern = Pattern.compile(pattern); } else { namePattern = TEMPLATE_VALUE_PATTERN; } templateVariable.setLength(variables.getLength(0)); } else { namePattern = (!variables.hasRegexp(0)) ? TEMPLATE_VALUE_PATTERN : Pattern.compile(variables.regexp(0)); } if (nameToPattern.containsKey(name)) { if (!nameToPattern.get(name).equals(namePattern)) { throw new IllegalArgumentException( LocalizationMessages.ERROR_TEMPLATE_PARSER_NAME_MORE_THAN_ONCE(name, template)); } } else { nameToPattern.put(name, namePattern); } // Determine group count of pattern Matcher m = namePattern.matcher(""); int g = m.groupCount(); groupCounts.add(1 + skipGroup); skipGroup = g; } else { argIndex = 0; regexBuilder = new StringBuilder(); for (String subName : variables.names) { if (argIndex != 0) { regexBuilder .append('(') .append(','); } TemplateVariable.Position position = determinePosition(variables.separatorCount, argIndex); TemplateVariable templateVariable = TemplateVariable.createTemplateVariable(variables.paramType, subName, position); templateVariable.setStar(variables.explodes(argIndex)); names.add(templateVariable); parts.add(templateVariable); if (variables.hasLength(argIndex)) { int len = TEMPLATE_VALUE_PATTERN_MULTI.pattern().length() - 1; String pattern = TEMPLATE_VALUE_PATTERN_MULTI.pattern() .substring(0, len) + '{' + variables.getLength(argIndex) + '}'; namePattern = Pattern.compile(pattern); templateVariable.setLength(variables.getLength(argIndex)); } else { namePattern = (!variables.hasRegexp(argIndex)) ? TEMPLATE_VALUE_PATTERN_MULTI : Pattern.compile(variables.regexp(argIndex)); } // TODO breaks RFC 6570 --backward compatibility with default pattern if (nameToPattern.containsKey(subName) && variables.paramType == 'p') { if (!nameToPattern.get(subName).equals(namePattern)) { throw new IllegalArgumentException( LocalizationMessages.ERROR_TEMPLATE_PARSER_NAME_MORE_THAN_ONCE(name, template)); } } else { nameToPattern.put(subName, namePattern); } regexBuilder .append('(') .append(namePattern) .append(')'); if (argIndex != 0) { regexBuilder.append(")"); } if (!variables.hasRegexp(argIndex)) { regexBuilder.append("{0,1}"); } argIndex++; groupCounts.add(2); } namePattern = Pattern.compile(regexBuilder.toString()); } break; } regex.append('(') .append(namePattern) .append(')'); normalizedTemplate.append('{') .append(name) .append('}'); } catch (PatternSyntaxException ex) { throw new IllegalArgumentException(LocalizationMessages .ERROR_TEMPLATE_PARSER_INVALID_SYNTAX(variables.regexp(argIndex), variables.name, template), ex); } // Tell the next time through the loop how many to skip return skipGroup; } private static TemplateVariable.Position determinePosition(int separatorCount, int argIndex) { TemplateVariable.Position position = separatorCount == 0 ? TemplateVariable.Position.SINGLE : argIndex == 0 ? TemplateVariable.Position.FIRST : argIndex == separatorCount ? TemplateVariable.Position.LAST : TemplateVariable.Position.MIDDLE; return position; } private static class Variables { private char paramType = 'p'; private List names = new ArrayList<>(); // names private List explodes = new ArrayList<>(); // * private List regexps = new ArrayList<>(); // : regexp private List lengths = new ArrayList<>(); // :1-9999 private int separatorCount = 0; private StringBuilder name = new StringBuilder(); private int getCount() { return names.size(); } private boolean explodes(int index) { return !explodes.isEmpty() && explodes.get(index); } private boolean hasRegexp(int index) { return !regexps.isEmpty() && regexps.get(index) != null; } private String regexp(int index) { return regexps.get(index); } private boolean hasLength(int index) { return !lengths.isEmpty() && lengths.get(index) != null; } private Integer getLength(int index) { return lengths.get(index); } private char getParamType() { return paramType; } private int getSeparatorCount() { return separatorCount; } private String getName() { return name.toString(); } private String getName(int index) { return names.get(index); } private void parse(CharacterIterator ci, String template) { name.append('{'); char c = consumeWhiteSpace(ci); StringBuilder nameBuilder = new StringBuilder(); // Look for query or matrix types if (c == '?' || c == ';' || c == '.' || c == '+' || c == '#' || c == '/' || c == '&') { paramType = c; c = ci.next(); name.append(paramType); } if (Character.isLetterOrDigit(c) || c == '_') { // Template name character nameBuilder.append(c); name.append(c); } else { throw new IllegalArgumentException(LocalizationMessages.ERROR_TEMPLATE_PARSER_ILLEGAL_CHAR_START_NAME(c, ci.pos(), template)); } StringBuilder regexBuilder = new StringBuilder(); State state = State.TEMPLATE; State previousState; boolean star = false; boolean whiteSpace = false; boolean ignoredLastComma = false; int bracketDepth = 1; // { int regExpBracket = 0; // [ int regExpRound = 0; // ( boolean reqExpSlash = false; // \ while ((state.value & (State.ERROR.value | State.EXIT.value)) == 0) { previousState = state; c = ci.next(); // "\\{(\\w[-\\w\\.]*) if (Character.isLetterOrDigit(c)) { // Template name character append(c, state, nameBuilder, regexBuilder); state = state.transition(State.TEMPLATE.value | State.REGEXP.value); } else switch (c) { case '_': case '-': case '.': // Template name character append(c, state, nameBuilder, regexBuilder); state = state.transition(State.TEMPLATE.value | State.REGEXP.value); break; case ',': switch (state) { case REGEXP: if (bracketDepth == 1 && !reqExpSlash && regExpBracket == 0 && regExpRound == 0) { state = State.COMMA; } else { regexBuilder.append(c); } break; case TEMPLATE: case STAR: state = State.COMMA; break; } separatorCount++; break; case ':': if (state == State.REGEXP) { regexBuilder.append(c); } state = state.transition(State.TEMPLATE.value | State.REGEXP.value | State.STAR.value, State.REGEXP); break; case '*': state = state.transition(State.TEMPLATE.value | State.REGEXP.value); if (state == State.TEMPLATE) { star = true; state = State.STAR; } else if (state == State.REGEXP){ regexBuilder.append(c); } break; case '}': bracketDepth--; if (bracketDepth == 0) { state = State.BRACKET; } else { regexBuilder.append(c); } break; case '{': if (state == State.REGEXP) { bracketDepth++; regexBuilder.append(c); } else { state = State.ERROR; // Error multiple parenthesis } break; default: if (!Character.isWhitespace(c)) { if (state != State.REGEXP) { state = State.ERROR; // Error - unknown symbol } else { switch (c) { case '(' : regExpRound++; break; case ')': regExpRound--; break; case '[': regExpBracket++; break; case ']': regExpBracket--; break; } if (c == '\\') { reqExpSlash = true; } else { reqExpSlash = false; } regexBuilder.append(c); } } whiteSpace = true; break; } // Store parsed name, and associated star, regexp, and length switch (state) { case COMMA: case BRACKET: if (nameBuilder.length() == 0 && regexBuilder.length() == 0 && !star && name.charAt(name.length() - 1) == ',' /* ignore last comma */) { if (ignoredLastComma) { // Do not ignore twice state = State.ERROR; } else { name.setLength(name.length() - 1); ignoredLastComma = true; } break; } if (regexBuilder.length() != 0) { String regex = regexBuilder.toString(); Matcher matcher = MATCH_NUMBER_OF_MAX_LENGTH_4.matcher(regex); if (matcher.matches()) { lengths.add(Integer.parseInt(regex)); regexps.add(null); } else { if (paramType != 'p') { state = State.ERROR; // regular expressions allowed just on path by the REST spec c = regex.charAt(0); // display proper error values ci.setPosition(ci.pos() - regex.length()); break; } lengths.add(null); regexps.add(regex); } } else { regexps.add(previousState == State.REGEXP ? "" : null); lengths.add(previousState == State.REGEXP ? 0 : null); } names.add(nameBuilder.toString()); explodes.add(star); nameBuilder.setLength(0); regexBuilder.setLength(0); star = false; ignoredLastComma = false; break; } if (!whiteSpace) { name.append(c); } whiteSpace = false; // switch state back or exit switch (state) { case COMMA: state = State.TEMPLATE; break; case BRACKET: state = State.EXIT; break; } } if (state == State.ERROR) { throw new IllegalArgumentException( LocalizationMessages.ERROR_TEMPLATE_PARSER_ILLEGAL_CHAR_AFTER_NAME(c, ci.pos(), template)); } } private static void append(char c, State state, StringBuilder templateSb, StringBuilder regexpSb) { if (state == State.TEMPLATE) { templateSb.append(c); } else { // REGEXP regexpSb.append(c); } } private static char consumeWhiteSpace(final CharacterIterator ci) { char c; do { c = ci.next(); } while (Character.isWhitespace(c)); return c; } private enum State { TEMPLATE/**/(0b000000001), // Template name, before '*', ':', ',' or '}' REGEXP/* */(0b000000010), // Regular expression inside template, after : STAR/* */(0b000000100), // * COMMA/* */(0b000001000), // , BRACKET/* */(0b000010000), // } EXIT/* */(0b001000000), // quit parsing ERROR/* */(0b100000000); // error when parsing private final int value; State(int value) { this.value = value; } /** * Return error state when in not any of allowed states represented by their combined values * @param allowed The combined values of states (state1.value | state2.value) not to return error level * @return this state if in allowed state or {@link State#ERROR} if not */ State transition(int allowed) { return ((value & allowed) != 0) ? this : State.ERROR; } /** * Return error state when in not any of allowed states represented by their combined values * @param allowed The combined values of states (state1.value | state2.value) not to return error level * @param next the next state to transition * @return next state if in allowed state or {@link State#ERROR} if not */ State transition(int allowed, State next) { return ((value & allowed) != 0) ? next : State.ERROR; } } } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy