All Downloads are FREE. Search and download functionalities are using the official Maven repository.

ai.preferred.venom.fetcher.AsyncResponseConsumer Maven / Gradle / Ivy

There is a newer version: 4.2.7
Show newest version
/*
 * Copyright 2018 Preferred.AI
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package ai.preferred.venom.fetcher;

import ai.preferred.venom.request.HttpFetcherRequest;
import ai.preferred.venom.request.Request;
import ai.preferred.venom.request.Unwrappable;
import ai.preferred.venom.response.BaseResponse;
import ai.preferred.venom.response.Response;
import ai.preferred.venom.utils.ResponseDecompressor;
import ai.preferred.venom.validator.Validator;
import com.ibm.icu.text.CharsetDetector;
import com.ibm.icu.text.CharsetMatch;
import org.apache.http.*;
import org.apache.http.entity.ContentType;
import org.apache.http.nio.ContentDecoder;
import org.apache.http.nio.IOControl;
import org.apache.http.nio.entity.ContentBufferEntity;
import org.apache.http.nio.protocol.AbstractAsyncResponseConsumer;
import org.apache.http.nio.util.HeapByteBufferAllocator;
import org.apache.http.nio.util.SimpleInputBuffer;
import org.apache.http.protocol.HttpContext;
import org.apache.http.util.Asserts;
import org.apache.http.util.EntityUtils;
import org.apache.tika.Tika;
import org.apache.tika.io.TikaInputStream;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.nio.charset.UnsupportedCharsetException;
import java.util.Set;

/**
 * On top of the abstract class, this class handles the parsing of a response
 * from the web service.
 *
 * @author Ween Jiann Lee
 */
public class AsyncResponseConsumer extends AbstractAsyncResponseConsumer {

  private static final Logger LOGGER = LoggerFactory.getLogger(AsyncResponseConsumer.class);

  private static final ResponseDecompressor RESPONSE_DECOMPRESSOR = new ResponseDecompressor();

  private static final ContentType DEFAULT_CONTENT_TYPE = ContentType.APPLICATION_OCTET_STREAM;

  private final Validator validator;

  private final Set stopCodes;

  private final boolean compressed;

  private final Request request;

  private volatile HttpResponse httpResponse;

  private volatile SimpleInputBuffer buf;

  public AsyncResponseConsumer(Validator validator, Set stopCodes, boolean compressed,
                               HttpFetcherRequest request) {
    this.validator = validator;
    this.stopCodes = stopCodes;
    this.compressed = compressed;
    this.request = request;
  }

  private BaseResponse createVenomResponse(boolean compressed) throws IOException {
    if (compressed) {
      RESPONSE_DECOMPRESSOR.decompress(httpResponse);
    }

    final byte[] content = EntityUtils.toByteArray(httpResponse.getEntity());
    final Header[] headers = httpResponse.getAllHeaders();
    final ContentType contentType = parseContentType(content);

    String baseUrl = "";
    try {
      URL url = new URL(request.getUrl());
      baseUrl = url.getProtocol() + "://" + url.getHost();
    } catch (MalformedURLException e) {
      LOGGER.warn("Could not parse base URL: " + request.getUrl());
    }

    return new BaseResponse(
        httpResponse.getStatusLine().getStatusCode(),
        baseUrl,
        content,
        contentType,
        headers,
        request.getProxy());
  }

  private ContentType parseContentType(byte[] content) {
    try {
      ContentType type = ContentType.get(httpResponse.getEntity());
      if (type == null) {
        TikaInputStream stream = TikaInputStream.get(new ByteArrayInputStream(content));
        Tika tika = new Tika();
        String fileType = tika.detect(stream);
        type = ContentType.create(fileType);
      }
      if (type.getCharset() == null) {
        CharsetMatch match = new CharsetDetector()
            .setText(new ByteArrayInputStream(content))
            .detect();

        if (match != null && match.getConfidence() > 50) {
          type = type.withCharset(match.getName());
        }
      }
      return type;
    } catch (ParseException e) {
      LOGGER.warn("Could not parse content type", e);
    } catch (UnsupportedCharsetException e) {
      LOGGER.warn("Charset is not available in this instance of the Java virtual machine", e);
    } catch (IOException e) {
      LOGGER.warn("Cannot get content to determine media type", e);
    }
    return DEFAULT_CONTENT_TYPE;
  }

  @Override
  protected void onResponseReceived(final HttpResponse httpResponse) {
    this.httpResponse = httpResponse;
  }

  @Override
  protected void onContentReceived(
      final ContentDecoder decoder, final IOControl ioctrl) throws IOException {
    Asserts.notNull(this.buf, "Content buffer");
    this.buf.consumeContent(decoder);
  }

  @Override
  protected void onEntityEnclosed(
      final HttpEntity entity, final ContentType contentType) throws IOException {
    long len = entity.getContentLength();
    if (len > Integer.MAX_VALUE) {
      throw new ContentTooLongException("Entity content is too long: " + len);
    }
    if (len < 0) {
      len = 4096;
    }
    this.buf = new SimpleInputBuffer((int) len, new HeapByteBufferAllocator());
    this.httpResponse.setEntity(new ContentBufferEntity(entity, this.buf));
  }

  @Override
  protected BaseResponse buildResult(HttpContext context) throws Exception {
    final int statusCode = httpResponse.getStatusLine().getStatusCode();
    if (stopCodes.contains(statusCode)) {
      EntityUtils.consumeQuietly(httpResponse.getEntity());
      releaseResources();
      throw new StopCodeException(statusCode, "Stop code received.");
    }

    final BaseResponse response = createVenomResponse(compressed);
    releaseResources();

    final Validator.Status status = validator.isValid(Unwrappable.unwrapRequest(request), response);
    if (status != Validator.Status.VALID) {
      throw new ValidationException(status, response, "Invalid response.");
    }

    return response;
  }

  @Override
  protected void releaseResources() {
    this.httpResponse = null;
    this.buf = null;
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy