All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.github.axet.wget.info.URLInfo Maven / Gradle / Ivy

package com.github.axet.wget.info;

import java.io.IOException;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.json.JSONObject;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;

import com.github.axet.wget.RetryWrap;
import com.github.axet.wget.WGet;
import com.github.axet.wget.errors.DownloadRetry;

/**
 * URLInfo - keep all information about source in one place. Thread safe.
 * 
 */
public class URLInfo extends BrowserInfo {
    private static final long serialVersionUID = 7260247341480497184L;

    // supports two forms with and without quotes:
    //
    // 1) contentDisposition="attachment;filename="ap61.ram"";
    // 2) contentDisposition="attachment;filename=ap61.ram";
    public static Pattern FILENAME = Pattern.compile("filename=[\"]*([^\"]*)[\"]*"); // content disposition pattern
    public static Pattern BYTES = Pattern.compile("bytes \\d+-\\d+/(\\d+)"); // RANGE bytes

    /**
     * connect socket timeout
     */
    public static int CONNECT_TIMEOUT = 10 * 1000;

    /**
     * read socket timeout
     */
    public static int READ_TIMEOUT = 10 * 1000;

    /**
     * source url (set by user)
     */
    protected URL source;

    /**
     * download url (if redirected/moved)
     */
    protected URL url;

    /**
     * source referer, may be null
     */
    protected URL referer;

    /**
     * have been extracted?
     */
    protected boolean extract = false;

    /**
     * null if size is unknown, which means we unable to restore downloads or do multi thread downlaods
     */
    protected Long length;

    /**
     * does server support for the RANGE?
     */
    protected boolean range;

    /**
     * null if here is no such file or other error
     */
    protected String contentType;

    /**
     * come from Content-Disposition: attachment; filename="fname.ext"
     */
    protected String contentFilename;

    // set cookie
    protected String cookie;

    /**
     * Notify States
     */
    public enum States {
        EXTRACTING, EXTRACTING_DONE, DOWNLOADING, RETRYING, STOP, ERROR, DONE;
    }

    /**
     * download state
     */
    protected States state;
    /**
     * downloading error / retry error
     */
    protected Throwable exception;
    /**
     * retrying delay;
     */
    protected int delay;
    /**
     * retry count
     */
    protected int retry;

    public static String toString(Throwable e) { // simple exception serialization
        ArrayList ss = new ArrayList(Arrays.asList(DownloadRetry.class.getPackage().getName(),
                Throwable.class.getPackage().getName(), IOException.class.getPackage().getName()));
        if (ss.contains(e.getClass().getPackage().getName()))
            return e.getClass().getSimpleName() + ": " + e.getMessage();
        else
            return e.getClass().getCanonicalName() + ": " + e.getMessage();
    }

    public static Throwable toThroable(String s) { // simple exception deseralization
        if (s == null)
            return null;
        Pattern p = Pattern.compile("(.*): (.*)");
        Matcher m = p.matcher(s);
        if (m.matches()) {
            String n = m.group(1);
            String msg = m.group(2);
            try { // try wget errors
                Class k = Class.forName(DownloadRetry.class.getPackage().getName() + "." + n);
                return (Throwable) k.getConstructor(String.class).newInstance(msg);
            } catch (Exception e) {
                try { // try java.lang
                    Class k = Class.forName(RuntimeException.class.getPackage().getName() + "." + n);
                    return (Throwable) k.getConstructor(String.class).newInstance(msg);
                } catch (Exception e1) {
                    try { // try java.io
                        Class k = Class.forName(IOException.class.getPackage().getName() + "." + n);
                        return (Throwable) k.getConstructor(String.class).newInstance(msg);
                    } catch (Exception e2) {
                        try { // try full class name
                            Class k = Class.forName(n);
                            return (Throwable) k.getConstructor(String.class).newInstance(msg);
                        } catch (Exception e3) {
                            return new RuntimeException(s);
                        }
                    }
                }
            }
        } else {
            return new RuntimeException(s);
        }
    }

    public static URL toURL(String r) { // simple URL serialization
        if (r == null)
            return null;
        try {
            return new URL(r);
        } catch (MalformedURLException e) {
            throw new RuntimeException(e);
        }
    }

    public URLInfo() {
    }

    public URLInfo(URL source) {
        this.source = source;
        this.url = source;
    }

    public URLInfo(JSONObject o) {
        load(o);
    }

    public HttpURLConnection openConnection() throws IOException {
        HttpURLConnection conn;

        if (getProxy() != null)
            conn = (HttpURLConnection) url.openConnection(getProxy().proxy);
        else
            conn = (HttpURLConnection) url.openConnection();

        if (cookie != null)
            conn.setRequestProperty("Cookie", cookie);

        conn.setConnectTimeout(CONNECT_TIMEOUT);
        conn.setReadTimeout(READ_TIMEOUT);

        conn.setRequestProperty("User-Agent", getUserAgent());
        if (getReferer() != null)
            conn.setRequestProperty("Referer", getReferer().toExternalForm());

        return conn;
    }

    public void extract() {
        extract(new AtomicBoolean(false), new Runnable() {
            @Override
            public void run() {
            }
        });
    }

    public void extract(final AtomicBoolean stop, final Runnable notify) {
        try {
            HttpURLConnection conn;

            conn = RetryWrap.wrap(stop, new RetryWrap.WrapReturn() {
                @Override
                public void proxy() {
                    getProxy().set();
                }

                @Override
                public void resume() {
                    setRetry(0);
                }

                @Override
                public void error(Throwable e) {
                    setRetry(getRetry() + 1);
                }

                @Override
                public HttpURLConnection download() throws IOException {
                    setState(States.EXTRACTING);
                    notify.run();

                    try {
                        return meta(extractRange());
                    } catch (DownloadRetry e) {
                        throw e;
                    } catch (RuntimeException e) {
                        return meta(extractNormal());
                    }
                }

                HttpURLConnection meta(HttpURLConnection conn) throws IOException {
                    String ct = conn.getContentType();
                    if (ct == null)
                        return conn;

                    String[] values = ct.split(";");
                    String contentType = values[0];

                    if (contentType.equals("text/html")) {
                        String html = WGet.getHtml(conn, stop);
                        Document doc = Jsoup.parse(html);
                        Element link = doc.select("meta[http-equiv=refresh]").first();
                        if (link != null) {
                            String content = link.attr("content");
                            if (!content.isEmpty()) {
                                String[] vv = content.split(";");
                                if (vv.length > 1) {
                                    String urlmeta = vv[1];
                                    String[] uu = urlmeta.split("url=");
                                    if (uu.length > 1) {
                                        setReferer(url);
                                        url = new URL(uu[1]);
                                        String c = conn.getHeaderField("Set-cookie");
                                        if (c != null)
                                            setCookie(c);
                                        return download();
                                    }
                                }
                            }
                        }
                    }

                    return conn;
                }

                @Override
                public boolean retry(int d, Throwable ee) {
                    setDelay(d, ee);
                    notify.run();
                    return RetryWrap.retry(getRetry());
                }

                @Override
                public void moved(URL u) {
                    setReferer(url);
                    url = u;
                    setState(States.RETRYING);
                    notify.run();
                }
            });

            setContentType(conn.getContentType());

            String contentDisposition = conn.getHeaderField("Content-Disposition");
            if (contentDisposition != null) {
                Matcher cm = FILENAME.matcher(contentDisposition);
                if (cm.find())
                    setContentFilename(cm.group(1));
            }

            setEmpty(true);

            setState(States.EXTRACTING_DONE);
            notify.run();
        } catch (RuntimeException e) {
            setState(States.ERROR, e);
            throw e;
        }
    }

    synchronized public boolean empty() {
        return !extract;
    }

    synchronized public void setEmpty(boolean b) {
        extract = b;
    }

    // if range failed - do plain download with no retrys's
    protected HttpURLConnection extractRange() throws IOException {
        HttpURLConnection conn = openConnection();

        // may raise an exception if not supported by server
        conn.setRequestProperty("Range", "bytes=" + 0 + "-" + 0);

        RetryWrap.check(conn);

        String range = conn.getHeaderField("Content-Range");
        if (range == null)
            throw new RuntimeException("range not supported");

        Matcher m = BYTES.matcher(range);
        if (m.find())
            setLength(new Long(m.group(1)));
        else
            throw new RuntimeException("range not supported");

        this.setRange(true);

        return conn;
    }

    // if range failed - do plain download with no retrys's
    protected HttpURLConnection extractNormal() throws IOException {
        HttpURLConnection conn = openConnection();

        setRange(false);

        RetryWrap.check(conn);

        int len = conn.getContentLength();
        if (len >= 0)
            setLength(new Long(len));

        return conn;
    }

    synchronized public String getContentType() {
        return contentType;
    }

    synchronized public void setContentType(String ct) {
        contentType = ct;
    }

    synchronized public Long getLength() {
        return length;
    }

    synchronized public void setLength(Long l) {
        length = l;
    }

    synchronized public URL getSource() {
        return source;
    }

    synchronized public String getContentFilename() {
        return contentFilename;
    }

    synchronized public void setContentFilename(String f) {
        contentFilename = f;
    }

    synchronized public States getState() {
        return state;
    }

    synchronized public void setState(States state) {
        this.state = state;
        this.exception = null;
        this.delay = 0;
    }

    synchronized public void setState(States state, Throwable e) {
        this.state = state;
        this.exception = e;
        this.delay = 0;
    }

    synchronized public Throwable getException() {
        return exception;
    }

    synchronized protected void setException(Throwable exception) {
        this.exception = exception;
    }

    synchronized public int getDelay() {
        return delay;
    }

    synchronized public void setDelay(int delay, Throwable e) {
        this.delay = delay;
        this.exception = e;
        this.state = URLInfo.States.RETRYING;
    }

    synchronized public boolean getRange() {
        return range;
    }

    synchronized public void setRange(boolean range) {
        this.range = range;
    }

    synchronized public String getCookie() {
        return cookie;
    }

    synchronized public void setCookie(String cookie) {
        this.cookie = cookie;
    }

    synchronized public int getRetry() {
        return retry;
    }

    synchronized public void setRetry(int retry) {
        this.retry = retry;
    }

    synchronized public URL getReferer() {
        return referer;
    }

    synchronized public void setReferer(URL referer) {
        this.referer = referer;
    }

    synchronized public void resume(URLInfo old) {
        super.resume(old);
        referer = old.referer;
    }

    public JSONObject save() {
        JSONObject o = super.save();
        o.put("source", source.toExternalForm());
        o.put("url", url.toExternalForm());
        o.put("extract", extract);
        o.put("length", length);
        o.put("range", range);
        o.put("content_type", contentType);
        o.put("content_filename", contentFilename);
        o.put("cookie", cookie);
        o.put("state", state.toString());
        if (exception != null)
            o.put("exception", toString(exception));
        // o.put("delay", delay);
        // o.put("retry", retry);
        if (referer != null)
            o.put("referer", referer.toExternalForm());
        return o;
    }

    public void load(JSONObject o) {
        super.load(o);
        source = toURL(o.getString("source"));
        url = toURL(o.getString("url"));
        extract = o.getBoolean("extract");
        length = o.getLong("length");
        range = o.getBoolean("range");
        contentType = o.getString("content_type");
        contentFilename = o.optString("content_filename", null);
        cookie = o.optString("cookie", null);
        state = States.valueOf(o.getString("state"));
        exception = toThroable(o.optString("exception", null));
        // delay = o.getInt("delay");
        // retry = o.getInt("retry");
        referer = toURL(o.optString("referer", null));
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy