All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.archive.url.WaybackURLKeyMaker Maven / Gradle / Ivy

There is a newer version: 1.1.9
Show newest version
package org.archive.url;

import java.net.URISyntaxException;
import java.util.List;

public class WaybackURLKeyMaker implements URLKeyMaker {
//	URLCanonicalizer canonicalizer = new NonMassagingIAURLCanonicalizer();
	URLCanonicalizer canonicalizer = new DefaultIAURLCanonicalizer();
	
	public URLCanonicalizer getCanonicalizer() {
		return canonicalizer;
	}

	public void setCanonicalizer(URLCanonicalizer canonicalizer) {
		this.canonicalizer = canonicalizer;
	}

	private boolean surtMode = true;
	
	protected List customRules;
	
	public WaybackURLKeyMaker()
	{

	}
	
	public boolean isSurtMode()
	{
		return surtMode;
	}
	
	public WaybackURLKeyMaker(boolean surtMode)
	{
		this.surtMode = surtMode;
	}

	public String makeKey(String url) throws URISyntaxException {
		if(url == null) {
			return "-";
		}
		if(url.length() == 0) {
			return "-";
		}
		if(url.startsWith("filedesc")) {
			return url;
		}
		if(url.startsWith("warcinfo")) {
			return url;
		}
		if(url.startsWith("dns:")) {
			String authority = url.substring(4);
			if (!surtMode) {
				return authority;
			}
			String surt = URLRegexTransformer.hostToSURT(authority);
			return surt + ")";
		}
		HandyURL hURL;

		hURL = URLParser.parse(url);
		canonicalizer.canonicalize(hURL);
		String key = hURL.getURLString(surtMode, surtMode, false);
		if (!surtMode) {
			return key;
		}
		int parenIdx = key.indexOf('(');
		if(parenIdx == -1) {
			// something very wrong..
			return url;
		}
		key = key.substring(parenIdx+1);
		
		if (customRules != null) {
			key = applyCustomRules(key);
		}
		
		return key;
	}

	public List getCustomRules() {
		return customRules;
	}

	public void setCustomRules(List customRules) {
		this.customRules = customRules;
	}
	
	protected String applyCustomRules(String urlkey)
	{
		StringBuilder sb = new StringBuilder(urlkey);
		
		for (RewriteRule rule : customRules) {
			rule.rewrite(sb);
		}
		
		return sb.toString();
	}
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy