org.glassfish.jersey.uri.internal.UriTemplateParser Maven / Gradle / Ivy
Show all versions of jaxrs-ri Show documentation
/*
* Copyright (c) 2010, 2023 Oracle and/or its affiliates. All rights reserved.
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License v. 2.0, which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* This Source Code may also be made available under the following Secondary
* Licenses when the conditions for such availability set forth in the
* Eclipse Public License v. 2.0 are satisfied: GNU General Public License,
* version 2 with the GNU Classpath Exception, which is available at
* https://www.gnu.org/software/classpath/license.html.
*
* SPDX-License-Identifier: EPL-2.0 OR GPL-2.0 WITH Classpath-exception-2.0
*/
package org.glassfish.jersey.uri.internal;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.NoSuchElementException;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;
import org.glassfish.jersey.internal.LocalizationMessages;
import org.glassfish.jersey.uri.UriComponent;
/**
* A URI template parser that parses JAX-RS specific URI templates.
*
* @author Paul Sandoz
* @author Gerard Davison (gerard.davison at oracle.com)
*/
public class UriTemplateParser {
/* package */ static final int[] EMPTY_INT_ARRAY = new int[0];
private static final Set RESERVED_REGEX_CHARACTERS = initReserved();
private static final String[] HEX_TO_UPPERCASE_REGEX = initHexToUpperCaseRegex();
private static Set initReserved() {
char[] reserved = {
'.', '^', '&', '!',
'?', '-', ':', '<',
'(', '[', '$', '=',
')', ']', ',', '>',
'*', '+', '|'};
Set s = new HashSet(reserved.length);
for (char c : reserved) {
s.add(c);
}
return s;
}
/**
* Default URI template value regexp pattern.
*/
public static final Pattern TEMPLATE_VALUE_PATTERN = Pattern.compile("[^/]+");
public static final Pattern TEMPLATE_VALUE_PATTERN_MULTI = Pattern.compile("[^,/]+");
public static final Pattern MATCH_NUMBER_OF_MAX_LENGTH_4 = Pattern.compile("[1-9][0-9]{0,3}");
private final String template;
private final StringBuffer regex = new StringBuffer();
private final StringBuffer normalizedTemplate = new StringBuffer();
private final StringBuffer literalCharactersBuffer = new StringBuffer();
private final Pattern pattern;
private final List names = new ArrayList<>();
private final List parts = new ArrayList<>();
private final List groupCounts = new ArrayList();
private final Map nameToPattern = new HashMap();
private int numOfExplicitRegexes;
private int skipGroup;
private int literalCharacters;
/**
* Parse a template.
*
* @param template the template.
* @throws IllegalArgumentException if the template is null, an empty string
* or does not conform to a JAX-RS URI template.
*/
public UriTemplateParser(final String template) throws IllegalArgumentException {
if (template == null || template.isEmpty()) {
throw new IllegalArgumentException("Template is null or has zero length");
}
this.template = template;
parse(new CharacterIterator(template));
try {
pattern = Pattern.compile(regex.toString());
} catch (PatternSyntaxException ex) {
throw new IllegalArgumentException("Invalid syntax for the template expression '"
+ regex + "'",
ex
);
}
}
/**
* Get the template.
*
* @return the template.
*/
public final String getTemplate() {
return template;
}
/**
* Get the pattern.
*
* @return the pattern.
*/
public final Pattern getPattern() {
return pattern;
}
/**
* Get the normalized template.
*
* A normalized template is a template without any explicit regular
* expressions.
*
* @return the normalized template.
*/
public final String getNormalizedTemplate() {
return normalizedTemplate.toString();
}
/**
* Get the map of template names to patterns.
*
* @return the map of template names to patterns.
*/
public final Map getNameToPattern() {
return nameToPattern;
}
/**
* Get the list of template names.
*
* @return the list of template names.
*/
public final List getNames() {
return names;
}
/**
* Get a collection of uri parts (static strings and dynamic arguments) as parsed by the parser.
* Can be used to compose the uri. This collection is usually a superset of {@link #getNames() names}
* and other parts that do not have a template.
*
* @return List of parts of the uri.
*/
public List getUriParts() {
return parts;
}
/**
* Get the capturing group counts for each template variable.
*
* @return the capturing group counts.
*/
public final List getGroupCounts() {
return groupCounts;
}
/**
* Get the group indexes to capturing groups.
*
* Any nested capturing groups will be ignored and the
* the group index will refer to the top-level capturing
* groups associated with the templates variables.
*
* @return the group indexes to capturing groups.
*/
public final int[] getGroupIndexes() {
if (names.isEmpty()) {
return EMPTY_INT_ARRAY;
}
int[] indexes = new int[names.size()];
indexes[0] = 0 + groupCounts.get(0);
for (int i = 1; i < indexes.length; i++) {
indexes[i] = indexes[i - 1] + groupCounts.get(i);
}
return indexes;
}
/**
* Get the number of explicit regular expressions.
*
* @return the number of explicit regular expressions.
*/
public final int getNumberOfExplicitRegexes() {
return numOfExplicitRegexes;
}
/**
* Get the number of regular expression groups
*
* @return the number of regular expressions groups
*
* @since 2.9
*/
public final int getNumberOfRegexGroups() {
if (groupCounts.isEmpty()) {
return 0;
} else {
int[] groupIndex = getGroupIndexes();
return groupIndex[groupIndex.length - 1] + skipGroup;
}
}
/**
* Get the number of literal characters.
*
* @return the number of literal characters.
*/
public final int getNumberOfLiteralCharacters() {
return literalCharacters;
}
/**
* Encode literal characters of a template.
*
* @param characters the literal characters
* @return the encoded literal characters.
*/
protected String encodeLiteralCharacters(final String characters) {
return characters;
}
private void parse(final CharacterIterator ci) {
try {
while (ci.hasNext()) {
char c = ci.next();
if (c == '{') {
processLiteralCharacters();
skipGroup = parseName(ci, skipGroup);
} else {
literalCharactersBuffer.append(c);
}
}
processLiteralCharacters();
} catch (NoSuchElementException ex) {
throw new IllegalArgumentException(LocalizationMessages.ERROR_TEMPLATE_PARSER_INVALID_SYNTAX_TERMINATED(
template), ex);
}
}
private void processLiteralCharacters() {
if (literalCharactersBuffer.length() > 0) {
literalCharacters += literalCharactersBuffer.length();
String s = encodeLiteralCharacters(literalCharactersBuffer.toString());
normalizedTemplate.append(s);
parts.add(new UriPart(s));
// Escape if reserved regex character
for (int i = 0; i < s.length(); i++) {
char c = s.charAt(i);
if (RESERVED_REGEX_CHARACTERS.contains(c)) {
regex.append("\\");
regex.append(c);
} else if (c == '%') {
final char c1 = s.charAt(i + 1);
final char c2 = s.charAt(i + 2);
if (UriComponent.isHexCharacter(c1) && UriComponent.isHexCharacter(c2)) {
regex.append("%").append(HEX_TO_UPPERCASE_REGEX[c1]).append(HEX_TO_UPPERCASE_REGEX[c2]);
i += 2;
}
} else {
regex.append(c);
}
}
literalCharactersBuffer.setLength(0);
}
}
private static String[] initHexToUpperCaseRegex() {
String[] table = new String[0x80];
for (int i = 0; i < table.length; i++) {
table[i] = String.valueOf((char) i);
}
for (char c = 'a'; c <= 'f'; c++) {
// initialize table values: table[a] = ([aA]) ...
table[c] = "[" + c + (char) (c - 'a' + 'A') + "]";
}
for (char c = 'A'; c <= 'F'; c++) {
// initialize table values: table[A] = ([aA]) ...
table[c] = "[" + (char) (c - 'A' + 'a') + c + "]";
}
return table;
}
private int parseName(final CharacterIterator ci, int skipGroup) {
Variables variables = new Variables();
variables.parse(ci, template);
Pattern namePattern;
// Make sure we display something useful
String name = variables.getName();
int argIndex = 0;
try {
switch (variables.paramType) {
case '?':
case ';':
case '&':
// Build up the regex for each of these properties
StringBuilder regexBuilder = new StringBuilder();
String separator = null;
switch (variables.paramType) {
case '?':
separator = "\\&";
regexBuilder.append("\\?"); // first symbol
break;
case '&':
separator = "\\&";
regexBuilder.append("\\&"); // first symbol
break;
case ';':
separator = ";/\\?";
regexBuilder.append(";"); // first symbol
break;
}
// Start a group because each parameter could repeat
// names.add("__" + (paramType == '?' ? "query" : "matrix"));
regexBuilder.append('(');
for (String subName : variables.names) {
TemplateVariable.Position position = determinePosition(variables.separatorCount, argIndex);
TemplateVariable templateVariable =
TemplateVariable.createTemplateVariable(variables.paramType, subName, position);
templateVariable.setStar(variables.explodes(argIndex));
regexBuilder.append("(&?");
regexBuilder.append(subName);
regexBuilder.append("(=([^");
regexBuilder.append(separator);
regexBuilder.append(']');
if (variables.hasLength(argIndex)) {
regexBuilder.append('{').append(variables.getLength(argIndex)).append('}');
templateVariable.setLength(variables.getLength(argIndex));
} else {
regexBuilder.append('*');
}
regexBuilder.append("))?");
regexBuilder.append(')');
if (argIndex != 0) {
regexBuilder.append('|');
}
names.add(templateVariable);
parts.add(templateVariable);
groupCounts.add(
argIndex == 0 ? 5 : 3);
argIndex++;
}
// groupCounts.add(1);
skipGroup = 1;
// Knock of last bar
regexBuilder.append(")*");
namePattern = Pattern.compile(regexBuilder.toString());
// Make sure we display something useful
break;
default:
if (variables.separatorCount == 0) {
if (variables.hasRegexp(0)) {
numOfExplicitRegexes++;
}
TemplateVariable templateVariable = TemplateVariable
.createTemplateVariable(variables.paramType, variables.getName(0), TemplateVariable.Position.SINGLE);
templateVariable.setStar(variables.explodes(0));
names.add(templateVariable);
parts.add(templateVariable);
// groupCounts.add(1 + skipGroup);
if (variables.hasLength(0)) {
if (variables.getLength(0) != 0) {
int len = TEMPLATE_VALUE_PATTERN.pattern().length() - 1;
String pattern = TEMPLATE_VALUE_PATTERN.pattern().substring(0, len)
+ '{' + variables.getLength(0) + '}';
namePattern = Pattern.compile(pattern);
} else {
namePattern = TEMPLATE_VALUE_PATTERN;
}
templateVariable.setLength(variables.getLength(0));
} else {
namePattern = (!variables.hasRegexp(0))
? TEMPLATE_VALUE_PATTERN : Pattern.compile(variables.regexp(0));
}
if (nameToPattern.containsKey(name)) {
if (!nameToPattern.get(name).equals(namePattern)) {
throw new IllegalArgumentException(
LocalizationMessages.ERROR_TEMPLATE_PARSER_NAME_MORE_THAN_ONCE(name, template));
}
} else {
nameToPattern.put(name, namePattern);
}
// Determine group count of pattern
Matcher m = namePattern.matcher("");
int g = m.groupCount();
groupCounts.add(1 + skipGroup);
skipGroup = g;
} else {
argIndex = 0;
regexBuilder = new StringBuilder();
for (String subName : variables.names) {
if (argIndex != 0) {
regexBuilder
.append('(')
.append(',');
}
TemplateVariable.Position position = determinePosition(variables.separatorCount, argIndex);
TemplateVariable templateVariable
= TemplateVariable.createTemplateVariable(variables.paramType, subName, position);
templateVariable.setStar(variables.explodes(argIndex));
names.add(templateVariable);
parts.add(templateVariable);
if (variables.hasLength(argIndex)) {
int len = TEMPLATE_VALUE_PATTERN_MULTI.pattern().length() - 1;
String pattern = TEMPLATE_VALUE_PATTERN_MULTI.pattern()
.substring(0, len) + '{' + variables.getLength(argIndex) + '}';
namePattern = Pattern.compile(pattern);
templateVariable.setLength(variables.getLength(argIndex));
} else {
namePattern = (!variables.hasRegexp(argIndex))
? TEMPLATE_VALUE_PATTERN_MULTI : Pattern.compile(variables.regexp(argIndex));
}
// TODO breaks RFC 6570 --backward compatibility with default pattern
if (nameToPattern.containsKey(subName) && variables.paramType == 'p') {
if (!nameToPattern.get(subName).equals(namePattern)) {
throw new IllegalArgumentException(
LocalizationMessages.ERROR_TEMPLATE_PARSER_NAME_MORE_THAN_ONCE(name, template));
}
} else {
nameToPattern.put(subName, namePattern);
}
regexBuilder
.append('(')
.append(namePattern)
.append(')');
if (argIndex != 0) {
regexBuilder.append(")");
}
if (!variables.hasRegexp(argIndex)) {
regexBuilder.append("{0,1}");
}
argIndex++;
groupCounts.add(2);
}
namePattern = Pattern.compile(regexBuilder.toString());
}
break;
}
regex.append('(')
.append(namePattern)
.append(')');
normalizedTemplate.append('{')
.append(name)
.append('}');
} catch (PatternSyntaxException ex) {
throw new IllegalArgumentException(LocalizationMessages
.ERROR_TEMPLATE_PARSER_INVALID_SYNTAX(variables.regexp(argIndex), variables.name, template), ex);
}
// Tell the next time through the loop how many to skip
return skipGroup;
}
private static TemplateVariable.Position determinePosition(int separatorCount, int argIndex) {
TemplateVariable.Position position = separatorCount == 0
? TemplateVariable.Position.SINGLE
: argIndex == 0
? TemplateVariable.Position.FIRST
: argIndex == separatorCount ? TemplateVariable.Position.LAST : TemplateVariable.Position.MIDDLE;
return position;
}
private static class Variables {
private char paramType = 'p';
private List names = new ArrayList<>(); // names
private List explodes = new ArrayList<>(); // *
private List regexps = new ArrayList<>(); // : regexp
private List lengths = new ArrayList<>(); // :1-9999
private int separatorCount = 0;
private StringBuilder name = new StringBuilder();
private int getCount() {
return names.size();
}
private boolean explodes(int index) {
return !explodes.isEmpty() && explodes.get(index);
}
private boolean hasRegexp(int index) {
return !regexps.isEmpty() && regexps.get(index) != null;
}
private String regexp(int index) {
return regexps.get(index);
}
private boolean hasLength(int index) {
return !lengths.isEmpty() && lengths.get(index) != null;
}
private Integer getLength(int index) {
return lengths.get(index);
}
private char getParamType() {
return paramType;
}
private int getSeparatorCount() {
return separatorCount;
}
private String getName() {
return name.toString();
}
private String getName(int index) {
return names.get(index);
}
private void parse(CharacterIterator ci, String template) {
name.append('{');
char c = consumeWhiteSpace(ci);
StringBuilder nameBuilder = new StringBuilder();
// Look for query or matrix types
if (c == '?' || c == ';' || c == '.' || c == '+' || c == '#' || c == '/' || c == '&') {
paramType = c;
c = ci.next();
name.append(paramType);
}
if (Character.isLetterOrDigit(c) || c == '_') {
// Template name character
nameBuilder.append(c);
name.append(c);
} else {
throw new IllegalArgumentException(LocalizationMessages.ERROR_TEMPLATE_PARSER_ILLEGAL_CHAR_START_NAME(c, ci.pos(),
template));
}
StringBuilder regexBuilder = new StringBuilder();
State state = State.TEMPLATE;
State previousState;
boolean star = false;
boolean whiteSpace = false;
boolean ignoredLastComma = false;
int bracketDepth = 1; // {
int regExpBracket = 0; // [
int regExpRound = 0; // (
boolean reqExpSlash = false; // \
while ((state.value & (State.ERROR.value | State.EXIT.value)) == 0) {
previousState = state;
c = ci.next();
// "\\{(\\w[-\\w\\.]*)
if (Character.isLetterOrDigit(c)) {
// Template name character
append(c, state, nameBuilder, regexBuilder);
state = state.transition(State.TEMPLATE.value | State.REGEXP.value);
} else switch (c) {
case '_':
case '-':
case '.':
// Template name character
append(c, state, nameBuilder, regexBuilder);
state = state.transition(State.TEMPLATE.value | State.REGEXP.value);
break;
case ',':
switch (state) {
case REGEXP:
if (bracketDepth == 1 && !reqExpSlash && regExpBracket == 0 && regExpRound == 0) {
state = State.COMMA;
} else {
regexBuilder.append(c);
}
break;
case TEMPLATE:
case STAR:
state = State.COMMA;
break;
}
separatorCount++;
break;
case ':':
if (state == State.REGEXP) {
regexBuilder.append(c);
}
state = state.transition(State.TEMPLATE.value | State.REGEXP.value | State.STAR.value, State.REGEXP);
break;
case '*':
state = state.transition(State.TEMPLATE.value | State.REGEXP.value);
if (state == State.TEMPLATE) {
star = true;
state = State.STAR;
} else if (state == State.REGEXP){
regexBuilder.append(c);
}
break;
case '}':
bracketDepth--;
if (bracketDepth == 0) {
state = State.BRACKET;
} else {
regexBuilder.append(c);
}
break;
case '{':
if (state == State.REGEXP) {
bracketDepth++;
regexBuilder.append(c);
} else {
state = State.ERROR; // Error multiple parenthesis
}
break;
default:
if (!Character.isWhitespace(c)) {
if (state != State.REGEXP) {
state = State.ERROR; // Error - unknown symbol
} else {
switch (c) {
case '(' :
regExpRound++;
break;
case ')':
regExpRound--;
break;
case '[':
regExpBracket++;
break;
case ']':
regExpBracket--;
break;
}
if (c == '\\') {
reqExpSlash = true;
} else {
reqExpSlash = false;
}
regexBuilder.append(c);
}
}
whiteSpace = true;
break;
}
// Store parsed name, and associated star, regexp, and length
switch (state) {
case COMMA:
case BRACKET:
if (nameBuilder.length() == 0 && regexBuilder.length() == 0 && !star
&& name.charAt(name.length() - 1) == ',' /* ignore last comma */) {
if (ignoredLastComma) { // Do not ignore twice
state = State.ERROR;
} else {
name.setLength(name.length() - 1);
ignoredLastComma = true;
}
break;
}
if (regexBuilder.length() != 0) {
String regex = regexBuilder.toString();
Matcher matcher = MATCH_NUMBER_OF_MAX_LENGTH_4.matcher(regex);
if (matcher.matches()) {
lengths.add(Integer.parseInt(regex));
regexps.add(null);
} else {
if (paramType != 'p') {
state = State.ERROR; // regular expressions allowed just on path by the REST spec
c = regex.charAt(0); // display proper error values
ci.setPosition(ci.pos() - regex.length());
break;
}
lengths.add(null);
regexps.add(regex);
}
} else {
regexps.add(previousState == State.REGEXP ? "" : null);
lengths.add(previousState == State.REGEXP ? 0 : null);
}
names.add(nameBuilder.toString());
explodes.add(star);
nameBuilder.setLength(0);
regexBuilder.setLength(0);
star = false;
ignoredLastComma = false;
break;
}
if (!whiteSpace) {
name.append(c);
}
whiteSpace = false;
// switch state back or exit
switch (state) {
case COMMA:
state = State.TEMPLATE;
break;
case BRACKET:
state = State.EXIT;
break;
}
}
if (state == State.ERROR) {
throw new IllegalArgumentException(
LocalizationMessages.ERROR_TEMPLATE_PARSER_ILLEGAL_CHAR_AFTER_NAME(c, ci.pos(), template));
}
}
private static void append(char c, State state, StringBuilder templateSb, StringBuilder regexpSb) {
if (state == State.TEMPLATE) {
templateSb.append(c);
} else { // REGEXP
regexpSb.append(c);
}
}
private static char consumeWhiteSpace(final CharacterIterator ci) {
char c;
do {
c = ci.next();
} while (Character.isWhitespace(c));
return c;
}
private enum State {
TEMPLATE/**/(0b000000001), // Template name, before '*', ':', ',' or '}'
REGEXP/* */(0b000000010), // Regular expression inside template, after :
STAR/* */(0b000000100), // *
COMMA/* */(0b000001000), // ,
BRACKET/* */(0b000010000), // }
EXIT/* */(0b001000000), // quit parsing
ERROR/* */(0b100000000); // error when parsing
private final int value;
State(int value) {
this.value = value;
}
/**
* Return error state when in not any of allowed states represented by their combined values
* @param allowed The combined values of states (state1.value | state2.value) not to return error level
* @return this state if in allowed state or {@link State#ERROR} if not
*/
State transition(int allowed) {
return ((value & allowed) != 0) ? this : State.ERROR;
}
/**
* Return error state when in not any of allowed states represented by their combined values
* @param allowed The combined values of states (state1.value | state2.value) not to return error level
* @param next the next state to transition
* @return next state if in allowed state or {@link State#ERROR} if not
*/
State transition(int allowed, State next) {
return ((value & allowed) != 0) ? next : State.ERROR;
}
}
}
}