
org.openrdf.util.uri.URI Maven / Gradle / Ivy
The newest version!
/* Sesame - Storage and Querying architecture for RDF and RDF Schema
* Copyright (C) 2001-2006 Aduna
*
* Contact:
* Aduna
* Prinses Julianaplein 14 b
* 3817 CS Amersfoort
* The Netherlands
* tel. +33 (0)33 465 99 87
* fax. +33 (0)33 465 99 87
*
* http://aduna-software.com/
* http://www.openrdf.org/
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
package org.openrdf.util.uri;
import java.util.LinkedList;
import java.util.StringTokenizer;
import org.openrdf.util.StringUtil;
/**
* A replacement for Java's own URI: java.net.URI. Java's implementation is
* quite buggy in that it doesn't resolve relative URIs correctly.
* Note: this implementation is not guaranteed to handle ipv6 addresses
* correctly (yet).
**/
public class URI implements java.lang.Cloneable {
/*
// Tesing method
public static void main(String[] args)
throws Exception
{
URI baseURI = new URI(args[0]);
baseURI.normalize();
URI uri = null;
for (int i = 0; i < 100; i++) {
uri = baseURI.resolve(args[1]);
}
try { Thread.sleep(1000); } catch (Exception e) {}
long startTime = System.currentTimeMillis();
for (int i = 0; i < 100; i++) {
uri = baseURI.resolve(args[1]);
}
long endTime = System.currentTimeMillis();
System.out.println(args[0] + " was parsed as:");
System.out.println("scheme = " + uri.getScheme());
System.out.println("schemeSpecificPart = " + uri.getSchemeSpecificPart());
System.out.println("authority = " + uri.getAuthority());
System.out.println("path = " + uri.getPath());
System.out.println("query = " + uri.getQuery());
System.out.println("fragment = " + uri.getFragment());
System.out.println("full URI = " + uri.toString());
System.out.println(" parsed 100 times in " + (endTime-startTime) + "ms");
}
*/
/*--------------------------------------------------------+
| Variables |
+--------------------------------------------------------*/
// For all URIs:
private String _scheme;
private String _schemeSpecificPart;
private String _fragment;
// For hierarchical URIs:
private String _authority;
private String _path;
private String _query;
/*--------------------------------------------------------+
| Constructors |
+--------------------------------------------------------*/
public URI(String uriSpec) {
_parse(uriSpec);
}
public URI(String scheme, String schemeSpecificPart, String fragment) {
_scheme = scheme;
_schemeSpecificPart = schemeSpecificPart;
_fragment = fragment;
}
public URI(String scheme, String authority, String path, String query, String fragment) {
_scheme = scheme;
_authority = authority;
_path = path;
_query = query;
_fragment = fragment;
}
/*--------------------------------------------------------+
| Public access methods |
+--------------------------------------------------------*/
public boolean isHierarchical() {
return _path != null;
}
public boolean isOpaque() {
return _path == null;
}
public boolean isAbsolute() {
return _scheme != null;
}
public boolean isRelative() {
return _scheme == null;
}
/**
* Checks whether this URI is a relative URI that references itself
* (i.e. it only contains an anchor).
**/
public boolean isSelfReference() {
return _scheme == null
&& _authority == null
&& _query == null
&& _path.length() == 0;
}
public String getScheme() {
return _scheme;
}
public String getSchemeSpecificPart() {
return _schemeSpecificPart;
}
public String getAuthority() {
return _authority;
}
public String getPath() {
return _path;
}
public String getQuery() {
return _query;
}
public String getFragment() {
return _fragment;
}
/*--------------------------------------------------------+
| Methods for normalizing URIs |
+--------------------------------------------------------*/
/**
* Normalizes the path of this URI if it has one. Normalizing a path means
* that any unnecessary '.' and '..' segments are removed. For example, the
* URI http://server.com/a/b/../c/./d would be normalized to
* http://server.com/a/c/d. A URI doens't have a path if it is
* opaque.
**/
public void normalize() {
if (_path == null) {
return;
}
// Remove any '.' segments:
_path = StringUtil.gsub("/./", "/", _path);
if (_path.startsWith("./")) {
// Remove both characters
_path = _path.substring(2);
}
if (_path.endsWith("/.")) {
// Remove only the last dot, not the slash!
_path = _path.substring(0, _path.length() - 1);
}
if (_path.indexOf("/../") == -1 && !_path.endsWith("/..")) {
// There are no '..' segments that can be removed. We're done and
// don't have to execute the time-consuming code following this
// if-statement
return;
}
// Split the path into its segments
LinkedList segments = new LinkedList();
StringTokenizer st = new StringTokenizer(_path, "/");
while (st.hasMoreTokens()) {
segments.add( st.nextToken() );
}
boolean lastSegmentRemoved = false;
// Remove all unnecessary '..' segments
int i = 1;
while (i < segments.size()) {
String segment = (String)segments.get(i);
if (segment.equals("..")) {
String prevSegment = (String)segments.get(i - 1);
if (prevSegment.equals("..")) {
// two consecutive '..' segments at position i-1 and i,
// continue at i + 2
i += 2;
}
else {
// Bingo! Remove these two segments...
if (i == segments.size() - 1) {
lastSegmentRemoved = true;
}
segments.remove(i);
segments.remove(i - 1);
// ...and continue at position (i + 1 - 2) == (i - 1)...
// ...but only if i > 1, position 0 does not need to be
// checked.
if (i > 1) {
i--;
}
}
}
else {
// Not a '..' segment, check next
i++;
}
}
// Construct the normalized path
StringBuffer newPath = new StringBuffer(_path.length());
if (_path.startsWith("/")) {
newPath.append('/');
}
int segmentCount = segments.size();
for (i = 0; i < segmentCount - 1; i++) {
newPath.append( (String)segments.get(i) );
newPath.append('/');
}
if (segmentCount > 0) {
String lastSegment = (String)segments.get(segmentCount - 1);
newPath.append(lastSegment);
if (_path.endsWith("/") || lastSegmentRemoved) {
newPath.append('/');
}
}
_path = newPath.toString();
}
/**
* Resolves a relative URI using this URI as the base URI.
**/
public URI resolve(String relUriSpec) {
// This algorithm is based on the algorithm specified in chapter 5 of
// RFC 2396: URI Generic Syntax. See http://www.ietf.org/rfc/rfc2396.txt
// RFC, step 1:
URI relUri = new URI(relUriSpec);
return this.resolve(relUri);
}
/**
* Resolves a relative URI using this URI as the base URI.
**/
public URI resolve(URI relUri) {
// This algorithm is based on the algorithm specified in chapter 5 of
// RFC 2396: URI Generic Syntax. See http://www.ietf.org/rfc/rfc2396.txt
// RFC, step 3:
if (relUri.isAbsolute()) {
return relUri;
}
// relUri._scheme == null
// RFC, step 2:
if (relUri._authority == null &&
relUri._query == null &&
relUri._path.length() == 0)
{
// Reference to this URI
URI result = (URI)this.clone();
// Inherit any fragment identifier from relUri
result._fragment = relUri._fragment;
return result;
}
// We can start combining the URIs
String scheme, authority, path, query, fragment;
boolean normalizeURI = false;
scheme = this._scheme;
query = relUri._query;
fragment = relUri._fragment;
// RFC, step 4:
if (relUri._authority != null) {
authority = relUri._authority;
path = relUri._path;
}
else {
authority = this._authority;
// RFC, step 5:
if (relUri._path.startsWith("/")) {
path = relUri._path;
}
else {
// RFC, step 6:
path = this._path;
if (path == null) {
path = "/";
}
else {
if (!path.endsWith("/")) {
// Remove the last segment of the path. Note: if
// lastSlashIdx is -1, the path will become empty,
// which is fixed later.
int lastSlashIdx = path.lastIndexOf('/');
path = path.substring(0, lastSlashIdx + 1);
}
if (path.length() == 0) {
// No path means: start at root.
path = "/";
}
}
// Append the path of the relative URI
path += relUri._path;
// Path needs to be normalized.
normalizeURI = true;
}
}
URI result = new URI(scheme, authority, path, query, fragment);
if (normalizeURI) {
result.normalize();
}
return result;
}
public String toString() {
StringBuffer result = new StringBuffer(64);
if (_scheme != null) {
result.append(_scheme);
result.append(':');
}
if (isOpaque()) {
// Opaque URI
if (_schemeSpecificPart != null) {
result.append(_schemeSpecificPart);
}
}
else {
// Hierachical URI
if (_authority != null) {
result.append("//");
result.append(_authority);
}
result.append(_path);
if (_query != null) {
result.append('?');
result.append(_query);
}
}
if (_fragment != null) {
result.append('#');
result.append(_fragment);
}
return result.toString();
}
// Overrides Object.clone()
public Object clone() {
try {
return super.clone();
}
catch (CloneNotSupportedException e) {
throw new RuntimeException(e);
}
}
/*--------------------------------------------------------+
| Methods for parsing URIs |
+--------------------------------------------------------*/
private void _parse(String uri) {
if (_parseScheme(uri)) {
// A scheme was found; _scheme and _schemeSpecificPart are now set
if (_schemeSpecificPart.startsWith("/")) {
// Hierachical URI
String rest = _schemeSpecificPart;
rest = _parseAuthority(rest);
rest = _parsePath(rest);
rest = _parseQuery(rest);
_parseFragment(rest);
}
else {
// Opaque URI
String rest = _schemeSpecificPart;
rest = _parseOpaquePart(rest);
_parseFragment(rest);
}
}
else {
// No scheme was found
String rest = uri;
rest = _parseAuthority(rest);
rest = _parsePath(rest);
rest = _parseQuery(rest);
_parseFragment(rest);
}
}
private boolean _parseScheme(String uri) {
// Query cannot contain a ':', '/', '?' or '#' character
// Try to find the scheme in the URI
char c = 0;
int i = 0;
for (; i < uri.length(); i++) {
c = uri.charAt(i);
if (c == ':' || c == '/' || c == '?' || c == '#') {
// c is equal to one of the illegal chars
break;
}
}
if (c == ':' && i > 0) {
// We've found a scheme
_scheme = uri.substring(0, i);
_schemeSpecificPart = uri.substring(i + 1);
return true;
}
// No scheme found, uri is relative
return false;
}
private String _parseAuthority(String s) {
// Query cannot contain a '/', '?' or '#' character
if (s.startsWith("//")) {
// Authority present, could be empty though.
int i = 2;
for (; i < s.length(); i++) {
char c = s.charAt(i);
if (c == '/' || c == '?' ||c == '#') {
// c is equal to one of the illegal chars
break;
}
}
_authority = s.substring(2, i);
return s.substring(i);
}
return s;
}
private String _parsePath(String s) {
// Query cannot contain a '?' or '#' character
int i = 0;
for (; i < s.length(); i++) {
char c = s.charAt(i);
if (c == '?' || c == '#') {
// c is equal to one of the illegal chars
break;
}
}
_path = s.substring(0, i);
return s.substring(i);
}
private String _parseQuery(String s) {
// Query must start with a '?' and cannot contain a '#' character
if (s.startsWith("?")) {
int i = 1;
for (; i < s.length(); i++) {
char c = s.charAt(i);
if (c == '#') {
// c is equal to one of the illegal chars
break;
}
}
_query = s.substring(1, i);
return s.substring(i);
}
else {
return s;
}
}
private String _parseOpaquePart(String s) {
// Opaque part cannot contain a '#' character
int i = 0;
for (; i < s.length(); i++) {
char c = s.charAt(i);
if (c == '#') {
// c is equal to one of the illegal chars
break;
}
}
_schemeSpecificPart = s.substring(0, i);
return s.substring(i);
}
private void _parseFragment(String s) {
// Fragment must start with a '#'
if (s.startsWith("#")) {
_fragment = s.substring(1);
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy