com.yahoo.net.UrlTokenizer Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of vespajlib Show documentation
Show all versions of vespajlib Show documentation
Library for use in Java components of Vespa. Shared code which do
not fit anywhere else.
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.net;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
/**
* @author Simon Thoresen Hult
*/
public class UrlTokenizer {
public static final String TERM_STARTHOST = "StArThOsT";
public static final String TERM_ENDHOST = "EnDhOsT";
private static final Map schemeToPort = new HashMap<>();
private static final Map portToScheme = new HashMap<>();
private static final char TO_LOWER = (char)('A' - 'a');
private final Url url;
static {
registerScheme("ftp", 21);
registerScheme("gopher", 70);
registerScheme("http", 80);
registerScheme("https", 443);
registerScheme("imap", 143);
registerScheme("mailto", 25);
registerScheme("news", 119);
registerScheme("nntp", 119);
registerScheme("pop", 110);
registerScheme("rsync", 873);
registerScheme("rtsp", 554);
registerScheme("sftp", 22);
registerScheme("shttp", 443);
registerScheme("sip", 5060);
registerScheme("sips", 5061);
registerScheme("snmp", 161);
registerScheme("ssh", 22);
registerScheme("telnet", 23);
registerScheme("tftp", 69);
}
public UrlTokenizer(String url) {
this(Url.fromString(url));
}
public UrlTokenizer(Url url) {
this.url = url;
}
private String guessScheme(String port) {
String scheme = portToScheme.get(port);
if (scheme != null) {
return scheme;
}
return "http";
}
private String guessPort(String scheme) {
String port = schemeToPort.get(scheme);
if (port != null) {
return port;
}
return null;
}
public List tokenize() {
List lst = new LinkedList<>();
int offset = 0;
String port = url.getPortString();
String scheme = url.getScheme();
if (scheme == null) {
scheme = guessScheme(port);
addTokens(lst, UrlToken.Type.SCHEME, offset, scheme, false);
} else {
addTokens(lst, UrlToken.Type.SCHEME, url.getSchemeBegin(), scheme, true);
offset = url.getSchemeEnd();
}
String userInfo = url.getUserInfo();
if (userInfo != null) {
addTokens(lst, UrlToken.Type.USERINFO, url.getUserInfoBegin(), userInfo, true);
offset = url.getUserInfoEnd();
}
String password = url.getPassword();
if (password != null) {
addTokens(lst, UrlToken.Type.PASSWORD, url.getPasswordBegin(), password, true);
offset = url.getPasswordEnd();
}
String host = url.getHost();
if (host == null || host.isEmpty()) {
if (host != null) {
offset = url.getHostBegin();
}
if ("file".equalsIgnoreCase(scheme)) {
addHostTokens(lst, offset, offset, "localhost", false);
}
} else {
addHostTokens(lst, url.getHostBegin(), url.getHostEnd(), host, true);
offset = url.getHostEnd();
}
port = url.getPortString();
if (port == null) {
if ((port = guessPort(scheme)) != null) {
addTokens(lst, UrlToken.Type.PORT, offset, port, false);
}
} else {
addTokens(lst, UrlToken.Type.PORT, url.getPortBegin(), port, true);
}
String path = url.getPath();
if (path != null) {
addTokens(lst, UrlToken.Type.PATH, url.getPathBegin(), path, true);
}
String query = url.getQuery();
if (query != null) {
addTokens(lst, UrlToken.Type.QUERY, url.getQueryBegin(), query, true);
}
String fragment = url.getFragment();
if (fragment != null) {
addTokens(lst, UrlToken.Type.FRAGMENT, url.getFragmentBegin(), fragment, true);
}
return lst;
}
public static void addTokens(List lst, UrlToken.Type type, int offset, String image, boolean orig) {
StringBuilder term = new StringBuilder();
int prev = 0;
for (int skip, next = 0, len = image.length(); next < len; next += skip) {
char c = image.charAt(next);
if (c == '%') {
c = (char)Integer.parseInt(image.substring(next + 1, next + 3), 16);
skip = 3;
} else {
skip = 1;
}
if ((c >= '0' && c <= '9') ||
(c >= 'a' && c <= 'z') ||
(c == '-' || c == '_'))
{
term.append(c);
} else if (c >= 'A' && c <= 'Z') {
term.append((char)(c - TO_LOWER));
} else {
if (prev < next) {
lst.add(new UrlToken(type, offset + (orig ? prev : 0), orig ? image.substring(prev, next) : null,
term.toString()));
term = new StringBuilder();
}
prev = next + skip;
}
}
if (term.length() > 0) {
lst.add(new UrlToken(type, offset + (orig ? prev : 0), orig ? image.substring(prev) : null,
term.toString()));
}
}
private static void addHostTokens(List lst, int begin, int end, String image, boolean orig) {
lst.add(new UrlToken(UrlToken.Type.HOST, begin, null, TERM_STARTHOST));
addTokens(lst, UrlToken.Type.HOST, begin, image, orig);
lst.add(new UrlToken(UrlToken.Type.HOST, end, null, TERM_ENDHOST));
}
private static void registerScheme(String scheme, int port) {
String str = String.valueOf(port);
schemeToPort.put(scheme, str);
portToScheme.put(str, scheme);
}
}