org.apache.jena.iri.impl.ComponentPatternParser Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.jena.iri.impl;
import java.util.List;
import java.util.ArrayList;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.jena.iri.ViolationCodes ;
public class ComponentPatternParser implements ViolationCodes {
static String separators = "([(](?![?]))|([(][?])|([)])|(\\[)|(\\])|([@][{])|([}]|[a-z]-[a-z])";
static final int OPEN_PAREN = 1;
static final int OPEN_NON_CAPTURING_PAREN = 2;
static final int CLOSE_PAREN = 3;
static final int OPEN_SQ = 4;
static final int CLOSE_SQ = 5;
static final int OPEN_VAR = 6;
static final int CLOSE_BRACE = 7;
static final int LOWER_CASE_RANGE = 8;
static final int OTHER = -1;
static final Pattern keyword = Pattern.compile(separators);
/* .NET port does not like this. Reworked.
*
static final Pattern splitter = Pattern.compile("(?=" + separators
+ ")|(?<=" + separators + ")");
public ComponentPatternParser(String p) {
split = splitter.split(p);
field = 0;
classify = new int[split.length];
for (int i = 0; i < split.length; i++)
classify[i] = classify(split[i]);
while (field < split.length)
next();
// System.err.println(p + " ==> "+ rslt.toString());
pattern = Pattern.compile(rslt.toString());
}
*/
// working data
final String split[];
final int classify[];
int field;
int groupCount;
// result data
final StringBuffer rslt = new StringBuffer();
int shouldLowerCase;
int mustLowerCase;
int hostNames;
final Pattern pattern;
static final String emptyStringArray[] = new String[0];
static private String[] mySplit(String p) {
//return splitter.split(p);
Matcher m = keyword.matcher(p);
List rslt = new ArrayList<>();
int pos = 0;
// rslt.add("");
while (m.find()) {
if (m.start()>pos || pos==0) {
rslt.add(p.substring(pos,m.start()));
}
rslt.add(p.substring(m.start(),m.end()));
pos = m.end();
}
if (pos < p.length())
rslt.add(p.substring(pos));
// m.
// String preSplit[] = keyword.split(p);
// String rslt[] = new String[preSplit.length*2];
return rslt.toArray(emptyStringArray);
}
// static private String[] mySplitx(String p) {
// String r[] = mySplit(p);
// String s[] = splitter.split(p);
// if (r.length!=s.length) {
// System.err.println("Bad lengths: "+p+","+r.length+","+s.length);
// }
// for (int i=0;i "+ rslt.toString());
pattern = Pattern.compile(rslt.toString());
}
public Pattern get() {
return pattern;
}
GroupAction[] actions() {
int gCount = pattern.matcher("").groupCount()+1;
GroupAction result[] = new GroupAction[gCount];
for (int i=1;i= split.length)
throw new IllegalArgumentException(
"Internal IRI code error. Did not find CLOSE_SQ in until().");
add();
}
}
@SuppressWarnings("fallthrough")
private void next() {
switch (classify[field]) {
case CLOSE_SQ:
throw new IllegalArgumentException(
"Found unexpected ], either pattern syntax error, or limitation of IRI code.");
case OPEN_SQ:
add();
untilCloseSq();
break;
case OPEN_VAR:
field++;
rslt.append("(");
groupCount++;
if (split[field].equals("host")) {
addHost();
} else {
if (split[field].equals("shouldLowerCase")) {
shouldLowerCase |= (1 << groupCount);
} else if (split[field].equals("mustLowerCase")) {
mustLowerCase |= (1 << groupCount);
} else {
throw new IllegalArgumentException("No macro: "
+ split[field]);
}
addLowerCase();
}
break;
case OPEN_PAREN:
groupCount++;
// fall through
case OPEN_NON_CAPTURING_PAREN:
case CLOSE_PAREN:
case CLOSE_BRACE:
case LOWER_CASE_RANGE:
case OTHER:
add();
return;
default:
throw new IllegalStateException("IRI code internal error.");
}
}
@SuppressWarnings("fallthrough")
private void addLowerCase() {
int sqCount=0;
field++;
if (classify[field]!=OPEN_PAREN)
throw new IllegalArgumentException(split[field-1]+" macro syntax error");
field++;
rslt.append("?:(?:"); // make group non-capturing.
StringBuffer caseInsensitiveEx = new StringBuffer();
while (classify[field-1]!=CLOSE_PAREN || sqCount>0 ) {
if (field >= split.length)
throw new IllegalArgumentException(
"Internal IRI code error. Did not find CLOSE_PAREN in addLowerCase().");
switch (classify[field]) {
case OPEN_SQ:
sqCount++;
caseInsensitiveEx.append('[');
break;
case CLOSE_SQ:
sqCount--;
caseInsensitiveEx.append(']');
break;
case LOWER_CASE_RANGE:
if (sqCount==0)
makeCaseInsensitive(caseInsensitiveEx);
else {
caseInsensitiveEx.append(split[field]);
caseInsensitiveEx.append((char)(split[field].charAt(0)-'a'+'A'));
caseInsensitiveEx.append('-');
caseInsensitiveEx.append((char)(split[field].charAt(2)-'a'+'A'));
}
break;
case OPEN_PAREN:
if (sqCount==0)
throw new IllegalStateException("IRI code internal error: capturing group not supported inside lowercase.");
// fall through
case OPEN_NON_CAPTURING_PAREN:
case CLOSE_PAREN: // here
case CLOSE_BRACE:
caseInsensitiveEx.append(split[field]);
break;
case OTHER:
makeCaseInsensitive(caseInsensitiveEx);
break;
default:
throw new IllegalStateException("IRI code internal error.");
}
add();
}
if (classify[field]!=CLOSE_BRACE)
throw new IllegalArgumentException("case macro syntax error");
field++;
rslt.append("|("); // start capturing group
rslt.append(caseInsensitiveEx);
rslt.append(")");
}
private void makeCaseInsensitive(StringBuffer caseInsensitiveEx) {
for (int i=0;i='a' && c<='z') {
caseInsensitiveEx.append('[');
caseInsensitiveEx.append(c);
caseInsensitiveEx.append((char)(c-'a'+'A'));
caseInsensitiveEx.append(']');
}
}
}
private void addHost() {
hostNames |= (1 << groupCount);
field++;
if (classify[field]!=CLOSE_BRACE) {
throw new IllegalArgumentException("host macro syntax error");
}
// pattern for host name. A sequence of chars that are not reserved.
// or an IP v6 or future address which starts and ends with [ ] and may
// include :.
rslt.append("[^\\[\\]:/?#@!$&'()*+,;=]*|\\[[^\\[\\]/?#@!$&'()*+,;=]*\\])");
field++;
}
private void add() {
rslt.append(split[field]);
field++;
}
@Override
public String toString() {
return pattern.pattern();
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy