Please wait. This can take some minutes ...
Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance.
Project price only 1 $
You can buy this project and download/modify it how often you want.
com.yahoo.net.UrlTokenizer Maven / Gradle / Ivy
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.net;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
/**
* @author Simon Thoresen Hult
*/
public class UrlTokenizer {
public static final String TERM_STARTHOST = "StArThOsT";
public static final String TERM_ENDHOST = "EnDhOsT";
private static final Map schemeToPort = new HashMap<>();
private static final Map portToScheme = new HashMap<>();
private static final char TO_LOWER = (char)('A' - 'a');
private final Url url;
static {
registerScheme("ftp", 21);
registerScheme("gopher", 70);
registerScheme("http", 80);
registerScheme("https", 443);
registerScheme("imap", 143);
registerScheme("mailto", 25);
registerScheme("news", 119);
registerScheme("nntp", 119);
registerScheme("pop", 110);
registerScheme("rsync", 873);
registerScheme("rtsp", 554);
registerScheme("sftp", 22);
registerScheme("shttp", 443);
registerScheme("sip", 5060);
registerScheme("sips", 5061);
registerScheme("snmp", 161);
registerScheme("ssh", 22);
registerScheme("telnet", 23);
registerScheme("tftp", 69);
}
public UrlTokenizer(String url) {
this(Url.fromString(url));
}
public UrlTokenizer(Url url) {
this.url = url;
}
private String guessScheme(String port) {
String scheme = portToScheme.get(port);
if (scheme != null) {
return scheme;
}
return "http";
}
private String guessPort(String scheme) {
String port = schemeToPort.get(scheme);
if (port != null) {
return port;
}
return null;
}
public List tokenize() {
List lst = new LinkedList<>();
int offset = 0;
String port = url.getPortString();
String scheme = url.getScheme();
if (scheme == null) {
scheme = guessScheme(port);
addTokens(lst, UrlToken.Type.SCHEME, offset, scheme, false);
} else {
addTokens(lst, UrlToken.Type.SCHEME, url.getSchemeBegin(), scheme, true);
offset = url.getSchemeEnd();
}
String userInfo = url.getUserInfo();
if (userInfo != null) {
addTokens(lst, UrlToken.Type.USERINFO, url.getUserInfoBegin(), userInfo, true);
offset = url.getUserInfoEnd();
}
String password = url.getPassword();
if (password != null) {
addTokens(lst, UrlToken.Type.PASSWORD, url.getPasswordBegin(), password, true);
offset = url.getPasswordEnd();
}
String host = url.getHost();
if (host == null || host.isEmpty()) {
if (host != null) {
offset = url.getHostBegin();
}
if ("file".equalsIgnoreCase(scheme)) {
addHostTokens(lst, offset, offset, "localhost", false);
}
} else {
addHostTokens(lst, url.getHostBegin(), url.getHostEnd(), host, true);
offset = url.getHostEnd();
}
port = url.getPortString();
if (port == null) {
if ((port = guessPort(scheme)) != null) {
addTokens(lst, UrlToken.Type.PORT, offset, port, false);
}
} else {
addTokens(lst, UrlToken.Type.PORT, url.getPortBegin(), port, true);
}
String path = url.getPath();
if (path != null) {
addTokens(lst, UrlToken.Type.PATH, url.getPathBegin(), path, true);
}
String query = url.getQuery();
if (query != null) {
addTokens(lst, UrlToken.Type.QUERY, url.getQueryBegin(), query, true);
}
String fragment = url.getFragment();
if (fragment != null) {
addTokens(lst, UrlToken.Type.FRAGMENT, url.getFragmentBegin(), fragment, true);
}
return lst;
}
public static void addTokens(List lst, UrlToken.Type type, int offset, String image, boolean orig) {
StringBuilder term = new StringBuilder();
int prev = 0;
for (int skip, next = 0, len = image.length(); next < len; next += skip) {
char c = image.charAt(next);
if (c == '%') {
c = (char)Integer.parseInt(image.substring(next + 1, next + 3), 16);
skip = 3;
} else {
skip = 1;
}
if ((c >= '0' && c <= '9') ||
(c >= 'a' && c <= 'z') ||
(c == '-' || c == '_'))
{
term.append(c);
} else if (c >= 'A' && c <= 'Z') {
term.append((char)(c - TO_LOWER));
} else {
if (prev < next) {
lst.add(new UrlToken(type, offset + (orig ? prev : 0), orig ? image.substring(prev, next) : null,
term.toString()));
term = new StringBuilder();
}
prev = next + skip;
}
}
if (term.length() > 0) {
lst.add(new UrlToken(type, offset + (orig ? prev : 0), orig ? image.substring(prev) : null,
term.toString()));
}
}
private static void addHostTokens(List lst, int begin, int end, String image, boolean orig) {
lst.add(new UrlToken(UrlToken.Type.HOST, begin, null, TERM_STARTHOST));
addTokens(lst, UrlToken.Type.HOST, begin, image, orig);
lst.add(new UrlToken(UrlToken.Type.HOST, end, null, TERM_ENDHOST));
}
private static void registerScheme(String scheme, int port) {
String str = String.valueOf(port);
schemeToPort.put(scheme, str);
portToScheme.put(str, scheme);
}
}