All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.tinymediamanager.scraper.http.Url Maven / Gradle / Ivy

There is a newer version: 3.0.5
Show newest version
/*
 * Copyright 2012 - 2019 Manuel Laggner
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.tinymediamanager.scraper.http;

import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InterruptedIOException;
import java.net.MalformedURLException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;
import java.net.UnknownHostException;
import java.nio.channels.Channels;
import java.nio.channels.ReadableByteChannel;
import java.nio.charset.Charset;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Locale;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.tinymediamanager.scraper.exceptions.HttpException;
import org.tinymediamanager.scraper.util.Pair;
import org.tinymediamanager.scraper.util.UrlUtil;

import okhttp3.Call;
import okhttp3.Headers;
import okhttp3.OkHttpClient;
import okhttp3.Request;
import okhttp3.Response;

/**
 * The Class Url. Used to make simple, blocking URL requests. The request is temporarily streamed into a ByteArrayInputStream, before the InputStream
 * is passed to the caller.
 *
 * @author Manuel Laggner / Myron Boyle
 */
public class Url {
  private static final Logger          LOGGER                = LoggerFactory.getLogger(Url.class);
  protected OkHttpClient               client;

  protected static final String        USER_AGENT            = "User-Agent";
  // where is such a list in std java?
  // https://github.com/xbmc/xbmc/blob/master/xbmc/addons/kodi-addon-dev-kit/include/kodi/Filesystem.h#L195
  protected static final List  KNOWN_HEADERS         = Arrays.asList("accept", "accept-charset", "accept-encoding", "accept-language",
      "authorization", "cookie", "customrequest", "noshout", "postdata", "referer", "user-agent", "seekable", "sslcipherlist", "Via");
  protected int                        responseCode          = 0;
  protected String                     responseMessage       = "";
  protected Charset                    responseCharset       = null;
  protected String                     responseContentType   = "";
  protected long                       responseContentLength = -1;

  protected String                     url                   = null;                                                                          // NOSONAR
  protected Headers                    headersResponse       = null;
  protected List> headersRequest        = new ArrayList<>();
  protected URI                        uri                   = null;

  protected Call                       call                  = null;
  protected Request                    request               = null;
  protected Response                   response              = null;

  /**
   * gets the specified header value from this connection
* You need to call this AFTER getInputstream(). * * @param header * the header you want to know (like Content-Length) * @return the header value */ public String getHeader(String header) { if (headersResponse == null) { return ""; } String h = headersResponse.get(header); if (StringUtils.isBlank(h)) { return ""; } return h; } /** * get all response headers * * @return the response headers */ public Headers getHeadersResponse() { return headersResponse; } /** * Instantiates a new url / httpclient with default user-agent. * * @param url * the url */ public Url(String url) throws MalformedURLException { this.client = TmmHttpClient.getHttpClient(); this.url = url; if (url.contains("|")) { splitHeadersFromUrl(); } // morph to URI to check syntax of the url try { uri = morphStringToUri(url); } catch (URISyntaxException e) { throw new MalformedURLException(url); } // default user agent addHeader(USER_AGENT, UrlUtil.generateUA()); } /** * A constructor for inherited classes which needs a special setup */ protected Url() { } /** * pipe could be delimiter for header values (like seen in Kodi)
* http://www.asdfcom/page?what=do|Referer=http://my.site.com
* http://de.clip-1.filmtrailer.com/2845_14749_a_4.flv?log_var=67|491100001-1|-
* split away from url, and add as header */ protected void splitHeadersFromUrl() { Pattern p = Pattern.compile(".*\\|(.*?)=(.*?)$"); Matcher m = p.matcher(this.url); if (m.find() && KNOWN_HEADERS.contains(m.group(1).toLowerCase(Locale.ROOT))) { // ok, url might have a pipe, but we now have a recognized header - set it this.url = this.url.substring(0, m.start(1) - 1); // -1 is pipe char addHeader(m.group(1), m.group(2)); } } /** * morph the url (string) to an URI to check the syntax and escape the path * * @param urlToMorph * the url to morph * @return the morphed URI * @throws MalformedURLException * @throws URISyntaxException */ protected URI morphStringToUri(String urlToMorph) throws MalformedURLException, URISyntaxException { URL newUrl = new URL(urlToMorph); return new URI(newUrl.getProtocol(), newUrl.getUserInfo(), newUrl.getHost(), newUrl.getPort(), newUrl.getPath(), newUrl.getQuery(), newUrl.getRef()); } /** * set a specified User-Agent * * @param userAgent * the user agent to be set */ public void setUserAgent(String userAgent) { addHeader(USER_AGENT, userAgent); } /** * Gets the url. * * @return the url * @throws IOException * Signals that an I/O exception has occurred. */ public URL getUrl() throws IOException { return new URL(url); } /** * Adds the header. * * @param key * the key * @param value * the value */ public void addHeader(String key, String value) { if (StringUtils.isBlank(key)) { return; } LOGGER.trace("add HTTP header: {}={}", key, value); // looks like there is no need for duplicate check since some headers can // occur several times // Typically HTTP headers work like a Map: each field has // one value or none. But some headers permit multiple values, like Guava's // Multimap. For example, it's legal and common for an HTTP response to // supply multiple Vary headers. for (int i = headersRequest.size() - 1; i >= 0; i--) { Pair header = headersRequest.get(i); if (key.equals(header.first())) { headersRequest.remove(i); } } // and add the new one headersRequest.add(new Pair<>(key, value)); } /** * Adds the header. * * @param header * the header */ public void addHeader(Pair header) { headersRequest.add(header); } /** * Adds the headers. * * @param headers * the headers */ public void addHeaders(List> headers) { headersRequest.addAll(headers); } /** * Gets the input stream. * * @return the input stream * @throws IOException * Signals that an I/O exception has occurred. * @throws InterruptedException * Signals that the thread has been interrupted */ public InputStream getInputStream() throws IOException, InterruptedException { return getInputStream(false); } /** * Gets the input stream * * * @param headRequest * do you just want to send a HEAD request (no content), for checking file availability? * @return * @throws IOException * Signals that an I/O exception has occurred. * @throws InterruptedException * Signals that the thread has been interrupted */ public InputStream getInputStream(boolean headRequest) throws IOException, InterruptedException { // workaround for local files if (url.startsWith("file:")) { String newUrl = url.replace("file:/", ""); File file = new File(newUrl); return new FileInputStream(file); } InputStream is = null; // replace our API keys for logging... String logUrl = url.replaceAll("api_key=\\w+", "api_key=").replaceAll("api/\\d+\\w+", "api/"); LOGGER.debug("getting {}", logUrl); Request.Builder requestBuilder = new Request.Builder(); requestBuilder.url(url); if (headRequest) { requestBuilder.head(); } // set custom headers for (Pair header : headersRequest) { requestBuilder.addHeader(header.first(), header.second()); } request = requestBuilder.build(); try { call = client.newCall(request); response = call.execute(); headersResponse = response.headers(); responseCode = response.code(); responseMessage = response.message(); // log any "connection problems" if (responseCode < 200 || responseCode >= 400) { cleanup(); LOGGER.debug("bad http response: {} - {}", responseCode, responseMessage); throw new HttpException(url, responseCode, responseMessage); } if (response.body().contentType() != null) { // could be null, see AnimeDB responseCharset = response.body().contentType().charset(); responseContentType = response.body().contentType().toString(); } responseContentLength = response.body().contentLength(); is = getInputstreamInternal(response); } catch (HttpException e) { // rethrow that to inform the caller that there was an HTTP-Exception throw e; } catch (InterruptedIOException | IllegalStateException e) { LOGGER.debug("aborted request: {} - {}", logUrl, e.getMessage()); cleanup(); throw new InterruptedException(); } catch (UnknownHostException e) { cleanup(); LOGGER.error("proxy or host not found/reachable - {}", e.getMessage()); } catch (Exception e) { cleanup(); LOGGER.error("Unexpected exception getting url " + logUrl + " - " + e.getMessage(), e); } return is; } protected InputStream getInputstreamInternal(Response response) throws IOException { // response.body().bytes() closes the connection return new ByteArrayInputStream(response.body().bytes()); } /** * gets the url with the given amount of retries * * @param retries * the amount of retries (>0) * @return the InputStream or null */ public InputStream getInputStreamWithRetry(int retries) throws InterruptedException { if (retries <= 0) { return null; } InputStream is = null; int counter = 0; do { counter++; try { is = getInputStream(); } catch (InterruptedException | InterruptedIOException e) { // this exception has already been logged in getInputStream() throw new InterruptedException(); } catch (Exception e) { LOGGER.warn("problem fetching the url: {}", e.getMessage()); } if (is != null || (getStatusCode() > 0 && getStatusCode() < 500)) { // we either got a response or a permanent failure return is; } // has this thread been interrupted? if (Thread.interrupted()) { return null; } LOGGER.info("could not fetch: {} - retrying", url); } while (counter <= retries); return null; } /** * Cleanup the connection */ protected void cleanup() { if (call != null) { call.cancel(); } if (response != null) { response.close(); } } /** * is the HTTP status code a 4xx/5xx? * * @return true/false */ public boolean isFault() { return (responseCode >= 400); } /** * http status code */ public int getStatusCode() { return responseCode; } /** * http status string */ public String getStatusLine() { return responseMessage; } /** * Gets the bytes. * * @return the bytes * @throws IOException * Signals that an I/O exception has occurred. */ public byte[] getBytes() throws IOException, InterruptedException { try (InputStream is = getInputStream()) { return IOUtils.toByteArray(is); } } /** * Gets the bytes with the given amount of retries * * @param retries * the amount of retries (>0) * @return the bytes or an empty array * @throws IOException * Signals that an I/O exception has occurred. */ public byte[] getBytesWithRetry(int retries) throws IOException, InterruptedException { try (InputStream is = getInputStreamWithRetry(retries)) { return IOUtils.toByteArray(is); } } /** * Download an Url to a file via NIO FileChannel (synchron) * * @param file * @return successful or not */ public boolean download(File file) { try (InputStream is = getInputStream(); ReadableByteChannel rbc = Channels.newChannel(is); FileOutputStream fos = new FileOutputStream(file)) { if (is == null) { return false; } fos.getChannel().transferFrom(rbc, 0, Long.MAX_VALUE); return true; } catch (IOException e) { LOGGER.error("Error downloading {} - {}", this.url, e.getMessage()); } catch (InterruptedException ignored) { if (call != null) { call.cancel(); } Thread.currentThread().interrupt(); } return false; } /** * Download an Url to a file via NIO FileChannel (synchron) * * @param file * @return successful or not */ public boolean download(Path file) { return download(file.toFile()); } /** * Gets the charset. * * @return the charset */ public Charset getCharset() { if (responseCharset == null) { return Charset.defaultCharset(); } return responseCharset; } /** * Gets the content encoding. * * @return the content encoding */ public String getContentEncoding() { return responseContentType; } /** * the number of bytes of the content, or a negative number if unknown. If the content length is known but exceeds Long.MAX_VALUE, a negative number * is returned. * * @return the content length */ public long getContentLength() { return responseContentLength; } @Override public String toString() { return url; } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy