com.github.axet.wget.info.URLInfo Maven / Gradle / Ivy
package com.github.axet.wget.info;
import java.io.IOException;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.json.JSONObject;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import com.github.axet.wget.RetryWrap;
import com.github.axet.wget.WGet;
import com.github.axet.wget.errors.DownloadRetry;
/**
* URLInfo - keep all information about source in one place. Thread safe.
*
*/
public class URLInfo extends BrowserInfo {
private static final long serialVersionUID = 7260247341480497184L;
// supports two forms with and without quotes:
//
// 1) contentDisposition="attachment;filename="ap61.ram"";
// 2) contentDisposition="attachment;filename=ap61.ram";
public static Pattern FILENAME = Pattern.compile("filename=[\"]*([^\"]*)[\"]*"); // content disposition pattern
public static Pattern BYTES = Pattern.compile("bytes \\d+-\\d+/(\\d+)"); // RANGE bytes
/**
* connect socket timeout
*/
public static int CONNECT_TIMEOUT = 10 * 1000;
/**
* read socket timeout
*/
public static int READ_TIMEOUT = 10 * 1000;
/**
* source url (set by user)
*/
protected URL source;
/**
* download url (if redirected/moved)
*/
protected URL url;
/**
* source referer, may be null
*/
protected URL referer;
/**
* have been extracted?
*/
protected boolean extract = false;
/**
* null if size is unknown, which means we unable to restore downloads or do multi thread downlaods
*/
protected Long length;
/**
* does server support for the RANGE?
*/
protected boolean range;
/**
* null if here is no such file or other error
*/
protected String contentType;
/**
* come from Content-Disposition: attachment; filename="fname.ext"
*/
protected String contentFilename;
// set cookie
protected String cookie;
/**
* Notify States
*/
public enum States {
EXTRACTING, EXTRACTING_DONE, DOWNLOADING, RETRYING, STOP, ERROR, DONE;
}
/**
* download state
*/
protected States state;
/**
* downloading error / retry error
*/
protected Throwable exception;
/**
* retrying delay;
*/
protected int delay;
/**
* retry count
*/
protected int retry;
public static String toString(Throwable e) { // simple exception serialization
ArrayList ss = new ArrayList(Arrays.asList(DownloadRetry.class.getPackage().getName(),
Throwable.class.getPackage().getName(), IOException.class.getPackage().getName()));
if (ss.contains(e.getClass().getPackage().getName()))
return e.getClass().getSimpleName() + ": " + e.getMessage();
else
return e.getClass().getCanonicalName() + ": " + e.getMessage();
}
public static Throwable toThroable(String s) { // simple exception deseralization
if (s == null)
return null;
Pattern p = Pattern.compile("(.*): (.*)");
Matcher m = p.matcher(s);
if (m.matches()) {
String n = m.group(1);
String msg = m.group(2);
try { // try wget errors
Class k = Class.forName(DownloadRetry.class.getPackage().getName() + "." + n);
return (Throwable) k.getConstructor(String.class).newInstance(msg);
} catch (Exception e) {
try { // try java.lang
Class k = Class.forName(RuntimeException.class.getPackage().getName() + "." + n);
return (Throwable) k.getConstructor(String.class).newInstance(msg);
} catch (Exception e1) {
try { // try java.io
Class k = Class.forName(IOException.class.getPackage().getName() + "." + n);
return (Throwable) k.getConstructor(String.class).newInstance(msg);
} catch (Exception e2) {
try { // try full class name
Class k = Class.forName(n);
return (Throwable) k.getConstructor(String.class).newInstance(msg);
} catch (Exception e3) {
return new RuntimeException(s);
}
}
}
}
} else {
return new RuntimeException(s);
}
}
public static URL toURL(String r) { // simple URL serialization
if (r == null)
return null;
try {
return new URL(r);
} catch (MalformedURLException e) {
throw new RuntimeException(e);
}
}
public URLInfo() {
}
public URLInfo(URL source) {
this.source = source;
this.url = source;
}
public URLInfo(JSONObject o) {
load(o);
}
public HttpURLConnection openConnection() throws IOException {
HttpURLConnection conn;
if (getProxy() != null)
conn = (HttpURLConnection) url.openConnection(getProxy().proxy);
else
conn = (HttpURLConnection) url.openConnection();
if (cookie != null)
conn.setRequestProperty("Cookie", cookie);
conn.setConnectTimeout(CONNECT_TIMEOUT);
conn.setReadTimeout(READ_TIMEOUT);
conn.setRequestProperty("User-Agent", getUserAgent());
if (getReferer() != null)
conn.setRequestProperty("Referer", getReferer().toExternalForm());
return conn;
}
public void extract() {
extract(new AtomicBoolean(false), new Runnable() {
@Override
public void run() {
}
});
}
public void extract(final AtomicBoolean stop, final Runnable notify) {
try {
HttpURLConnection conn;
conn = RetryWrap.wrap(stop, new RetryWrap.WrapReturn() {
@Override
public void proxy() {
getProxy().set();
}
@Override
public void resume() {
setRetry(0);
}
@Override
public void error(Throwable e) {
setRetry(getRetry() + 1);
}
@Override
public HttpURLConnection download() throws IOException {
setState(States.EXTRACTING);
notify.run();
try {
return meta(extractRange());
} catch (DownloadRetry e) {
throw e;
} catch (RuntimeException e) {
return meta(extractNormal());
}
}
HttpURLConnection meta(HttpURLConnection conn) throws IOException {
String ct = conn.getContentType();
if (ct == null)
return conn;
String[] values = ct.split(";");
String contentType = values[0];
if (contentType.equals("text/html")) {
String html = WGet.getHtml(conn, stop);
Document doc = Jsoup.parse(html);
Element link = doc.select("meta[http-equiv=refresh]").first();
if (link != null) {
String content = link.attr("content");
if (!content.isEmpty()) {
String[] vv = content.split(";");
if (vv.length > 1) {
String urlmeta = vv[1];
String[] uu = urlmeta.split("url=");
if (uu.length > 1) {
setReferer(url);
url = new URL(uu[1]);
String c = conn.getHeaderField("Set-cookie");
if (c != null)
setCookie(c);
return download();
}
}
}
}
}
return conn;
}
@Override
public boolean retry(int d, Throwable ee) {
setDelay(d, ee);
notify.run();
return RetryWrap.retry(getRetry());
}
@Override
public void moved(URL u) {
setReferer(url);
url = u;
setState(States.RETRYING);
notify.run();
}
});
setContentType(conn.getContentType());
String contentDisposition = conn.getHeaderField("Content-Disposition");
if (contentDisposition != null) {
Matcher cm = FILENAME.matcher(contentDisposition);
if (cm.find())
setContentFilename(cm.group(1));
}
setEmpty(true);
setState(States.EXTRACTING_DONE);
notify.run();
} catch (RuntimeException e) {
setState(States.ERROR, e);
throw e;
}
}
synchronized public boolean empty() {
return !extract;
}
synchronized public void setEmpty(boolean b) {
extract = b;
}
// if range failed - do plain download with no retrys's
protected HttpURLConnection extractRange() throws IOException {
HttpURLConnection conn = openConnection();
// may raise an exception if not supported by server
conn.setRequestProperty("Range", "bytes=" + 0 + "-" + 0);
RetryWrap.check(conn);
String range = conn.getHeaderField("Content-Range");
if (range == null)
throw new RuntimeException("range not supported");
Matcher m = BYTES.matcher(range);
if (m.find())
setLength(new Long(m.group(1)));
else
throw new RuntimeException("range not supported");
this.setRange(true);
return conn;
}
// if range failed - do plain download with no retrys's
protected HttpURLConnection extractNormal() throws IOException {
HttpURLConnection conn = openConnection();
setRange(false);
RetryWrap.check(conn);
int len = conn.getContentLength();
if (len >= 0)
setLength(new Long(len));
return conn;
}
synchronized public String getContentType() {
return contentType;
}
synchronized public void setContentType(String ct) {
contentType = ct;
}
synchronized public Long getLength() {
return length;
}
synchronized public void setLength(Long l) {
length = l;
}
synchronized public URL getSource() {
return source;
}
synchronized public String getContentFilename() {
return contentFilename;
}
synchronized public void setContentFilename(String f) {
contentFilename = f;
}
synchronized public States getState() {
return state;
}
synchronized public void setState(States state) {
this.state = state;
this.exception = null;
this.delay = 0;
}
synchronized public void setState(States state, Throwable e) {
this.state = state;
this.exception = e;
this.delay = 0;
}
synchronized public Throwable getException() {
return exception;
}
synchronized protected void setException(Throwable exception) {
this.exception = exception;
}
synchronized public int getDelay() {
return delay;
}
synchronized public void setDelay(int delay, Throwable e) {
this.delay = delay;
this.exception = e;
this.state = URLInfo.States.RETRYING;
}
synchronized public boolean getRange() {
return range;
}
synchronized public void setRange(boolean range) {
this.range = range;
}
synchronized public String getCookie() {
return cookie;
}
synchronized public void setCookie(String cookie) {
this.cookie = cookie;
}
synchronized public int getRetry() {
return retry;
}
synchronized public void setRetry(int retry) {
this.retry = retry;
}
synchronized public URL getReferer() {
return referer;
}
synchronized public void setReferer(URL referer) {
this.referer = referer;
}
synchronized public void resume(URLInfo old) {
super.resume(old);
referer = old.referer;
}
public JSONObject save() {
JSONObject o = super.save();
o.put("source", source.toExternalForm());
o.put("url", url.toExternalForm());
o.put("extract", extract);
o.put("length", length);
o.put("range", range);
o.put("content_type", contentType);
o.put("content_filename", contentFilename);
o.put("cookie", cookie);
o.put("state", state.toString());
if (exception != null)
o.put("exception", toString(exception));
// o.put("delay", delay);
// o.put("retry", retry);
if (referer != null)
o.put("referer", referer.toExternalForm());
return o;
}
public void load(JSONObject o) {
super.load(o);
source = toURL(o.getString("source"));
url = toURL(o.getString("url"));
extract = o.getBoolean("extract");
length = o.getLong("length");
range = o.getBoolean("range");
contentType = o.getString("content_type");
contentFilename = o.optString("content_filename", null);
cookie = o.optString("cookie", null);
state = States.valueOf(o.getString("state"));
exception = toThroable(o.optString("exception", null));
// delay = o.getInt("delay");
// retry = o.getInt("retry");
referer = toURL(o.optString("referer", null));
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy