All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.smartlogic.classificationserver.client.ClassificationClient Maven / Gradle / Ivy

There is a newer version: 5.6.1
Show newest version
package com.smartlogic.classificationserver.client;

import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.StringWriter;
import java.net.URL;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Date;
import java.util.Map;
import java.util.UUID;
import java.util.concurrent.TimeUnit;

import javax.xml.XMLConstants;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;

import org.apache.commons.io.IOUtils;
import org.apache.http.Consts;
import org.apache.http.HttpEntity;
import org.apache.http.HttpHost;
import org.apache.http.HttpRequest;
import org.apache.http.HttpStatus;
import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.config.RequestConfig;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.conn.HttpClientConnectionManager;
import org.apache.http.conn.ssl.DefaultHostnameVerifier;
import org.apache.http.entity.ContentType;
import org.apache.http.entity.mime.FormBodyPart;
import org.apache.http.entity.mime.FormBodyPartBuilder;
import org.apache.http.entity.mime.MultipartEntityBuilder;
import org.apache.http.entity.mime.content.ByteArrayBody;
import org.apache.http.entity.mime.content.FileBody;
import org.apache.http.entity.mime.content.StringBody;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.impl.conn.PoolingHttpClientConnectionManager;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.w3c.dom.Document;
import org.w3c.dom.Element;

import com.smartlogic.classificationserver.client.utils.XMLFeatureConst;

/**
 * General purpose client for the classification server
 *
 * @author Smartlogic Semaphore
 *
 */
public class ClassificationClient implements AutoCloseable {
  public static Logger logger = LoggerFactory.getLogger(ClassificationClient.class);

  /* Methods that are classification requests */

  /**
   * Determine the version information as generated by Classification Server
   *
   * @return The version string
   * @throws ClassificationException
   *           - There has been a connectivity issue
   */
  public String getVersion() throws ClassificationException {
    logger.debug("getVersion - entry");

    String commandString = getCommandXML("version", null);
    CSVersion version = new CSVersion(sendPostRequest(commandString, null));
    return version.getVersion();
  }

  /**
   * Return the rulebase classes that are currently configured on the classification server instance
   *
   * @return List of rulebases classes
   * @throws ClassificationException
   *           - There has been a connectivity issue
   */
  public Collection getRulebaseClasses() throws ClassificationException {
    String commandString = getCommandXML("listrulenetclasses", null);
    RulebaseClassSet rulebaseClassSet = new RulebaseClassSet(sendPostRequest(commandString, null));
    return rulebaseClassSet.getRulebaseClasses();
  }

  /**
   * Clear out a publish set so that new pack files can be uploaded. Until the publish set is
   * committed, this will have no effect on what is currently live
   *
   * @param publishSetName
   *          - the name of the set to be created
   * @throws ClassificationException
   *           - There has been a connectivity issue
   */
  public void clearPublishSet(String publishSetName) throws ClassificationException {
    String commandString = getCommandXML("publish_set_init", publishSetName);
    sendPostRequest(commandString, null);
  }

  /**
   * Upload the collection of pakfiles to the named publish set Until the publish set is committed,
   * this will have no effect on what is currently live
   *
   * @param publishSetName
   *          - the name of the publish set to which the pak files should be added
   * @param pakFiles
   *          - the pak files to upload
   * @throws ClassificationException
   *           - There has been a connectivity issue
   */
  public void sendPakfiles(String publishSetName, Collection pakFiles)
      throws ClassificationException {
    for (File pakFile : pakFiles) {
      sendPakFile(publishSetName, pakFile);
    }
  }

  /**
   * Upload the pakfile to the named publish set Until the publish set is committed, this will have
   * no effect on what is currently live
   *
   * @param publishSetName
   *          - the name of the publish set to which the pak file should be added
   * @param pakFile
   *          - the pak file to upload
   * @throws ClassificationException
   *           - There has been a connectivity issue
   */
  public void sendPakFile(String publishSetName, File pakFile) throws ClassificationException {
    String commandString = getCommandXML("publish_set_add", publishSetName);
    sendPostRequest(commandString, pakFile);
  }

  /**
   * Instruct that a particular publish set should become live This command will affect the
   * classification result
   *
   * @param publishSetName
   *          - the name of the publish set to commit
   * @throws ClassificationException
   *           - There has been a connectivity issue
   */
  public void commitPublishSet(String publishSetName) throws ClassificationException {
    String commandString = getCommandXML("publish_set", publishSetName);
    sendPostRequest(commandString, null);
  }

  /**
   * Remove a particular publish set from the classification servers rulebase set.
   *
   * @param publishSetName
   *          - the name of the publish set to deactivate
   * @throws ClassificationException
   *           - There has been a connectivity issue
   */
  public void deactivatePublishSet(String publishSetName) throws ClassificationException {
    String commandString = getCommandXML("publish_set_deactivate", publishSetName);
    sendPostRequest(commandString, null);
  }

  /**
   * Return the information that CS makes available.
   *
   * @return Classification Server information
   * @throws ClassificationException
   *           - There has been a connectivity issue
   */
  public CSInfo getInfo() throws ClassificationException {
    logger.debug("getInfo");

    CSInfo csInfo = new CSInfo(sendPostRequest(getCommandXML("info", null), null));
    return csInfo;
  }

  /**
   * Return the list of languages available on the cs instance
   *
   * @return List of languages
   * @throws ClassificationException
   *           Classification exception
   */
  public Collection getLanguages() throws ClassificationException {
    logger.debug("getLanguages - entry");

    LanguageSet langSet =
        new LanguageSet(sendPostRequest(getCommandXML("listlanguages", null), null));
    return langSet.getLanguages();
  }

  /**
   * Return the map of default parameter values
   *
   * @return Map of default parameter values
   * @throws ClassificationException
   *           Classification exception
   */
  public Map getDefaults() throws ClassificationException {
    logger.debug("getDefaults - entry");
    Defaults defaults =
        new Defaults(sendPostRequest(getCommandXML("getparameterdefaults", null), null));
    return defaults.getDefaults();
  }

  /**
   * Return the status of the classification server instance
   *
   * @return A classification status object
   * @throws ClassificationException
   *           Classification exception
   */
  @Deprecated // This response appears pretty useless
  public ClassificationServerStatus status() throws ClassificationException {
    if (logger.isDebugEnabled()) {
      logger.debug("status - entry");
    }

    ClassificationServerStatus status =
        new ClassificationServerStatus(sendPostRequest(getCommandXML("stats", null), null));
    return status;

  }

  /* Plain getters and setters for this object */

  private ClassificationConfiguration classificationConfiguration;

  /**
   * Get the configuration of the classification server
   *
   * @return The configuration
   */
  public ClassificationConfiguration getClassificationConfiguration() {
    return classificationConfiguration;
  }

  /**
   * Set the configuration of the classification server
   *
   * @param classificationConfiguration
   *          The configuration to use
   */
  public void setClassificationConfiguration(
      ClassificationConfiguration classificationConfiguration) {
    this.classificationConfiguration = classificationConfiguration;
  }

  private UUID auditUUID = null;

  /**
   * Return the UUID object used to tag the request
   *
   * @return The UUID object
   */
  public UUID getAuditUUID() {
    return auditUUID;
  }

  /**
   * Set a UUID object that will be used to tag the request. If configured, this will be stored in
   * the classification server log and so can be used for auditing purposes.
   *
   * @param auditGUID
   *          The audit GUID to use
   */
  public void setAuditUUID(UUID auditGUID) {
    this.auditUUID = auditGUID;
  }

  private String proxyHost = null;

  /**
   * The name of the proxy host in use.
   *
   * @return The proxy host. Null if no proxy is in use (the default)
   */
  @Deprecated
  public String getProxyHost() {
    return proxyHost;
  }

  /**
   * Set the proxy host to be used for all requests
   *
   * @param proxyHost
   *          The proxy host to use
   */
  @Deprecated
  public void setProxyHost(String proxyHost) {
    this.proxyHost = proxyHost;
  }

  private int proxyPort;

  /**
   * The port of the proxy being used
   *
   * @return The port number
   */
  @Deprecated
  public int getProxyPort() {
    return proxyPort;
  }

  /**
   * The port of the proxy being used
   *
   * @param proxyPort
   *          The port number to use
   */
  @Deprecated
  public void setProxyPort(int proxyPort) {
    this.proxyPort = proxyPort;
  }

  private String proxyURL;

  private String getProxyURL() {
    if (proxyURL == null) {
      if ((proxyHost != null) && (proxyPort != 0)) {
        proxyURL = "http://" + proxyHost + ":" + proxyPort;
      }
    }
    return proxyURL;
  }

  public void setProxyURL(String proxyURL) {
    this.proxyURL = proxyURL;
  }

  /* Classification requests */

  /**
   * Classify the supplied file
   *
   * @param inputFile
   *          The input file to classify
   * @param fileType
   *          File type of "inputFile". If the file type is not supplied (i.e. is null) then it will
   *          be guessed by classification server.
   * @return the classifications as returned by classification server.
   * @throws ClassificationException
   *           Classification exception
   */
  public Result getClassifiedDocument(File inputFile, String fileType)
      throws ClassificationException {
    return new Result(getStructuredDocument(inputFile, fileType));
  }

  public Document getStructuredDocument(File inputFile, String fileType)
      throws ClassificationException {
    return getStructuredDocument(inputFile, fileType, null, null);
  }

  /**
   * Classify the supplied title and body as if they were a document
   *
   * @param fileName
   *          The file name of the document to classify
   * @param title
   *          The document title
   * @param body
   *          The document body
   * @return the classifications as returned by classification server.
   * @throws ClassificationException
   *           Classification exception
   */
  public Result getClassifiedDocument(FileName fileName, Body body, Title title)
      throws ClassificationException {
    return new Result(getStructuredDocument(fileName, body, title));
  }

  public Document getStructuredDocument(FileName fileName, Body body, Title title)
      throws ClassificationException {
    return getStructuredDocument(fileName, body, title, null);
  }

  /**
   * Classify the supplied title and body as if they were a document
   *
   * @param title
   *          The document title
   * @param body
   *          The document body
   * @return the classifications as returned by classification server.
   * @throws ClassificationException
   *           Classification exception
   */
  public Result getClassifiedDocument(Body body, Title title) throws ClassificationException {
    return new Result(getStructuredDocument(body, title));
  }

  public Document getStructuredDocument(Body body, Title title) throws ClassificationException {
    return getStructuredDocument(null, body, title, null);
  }

  /**
   * Classify the supplied title and body as if they were a document
   *
   * @param fileName
   *          The file name of the document to classify
   * @param body
   *          The document body
   * @param title
   *          The document title
   * @param metadata
   *          Map containing metadata
   * @return the classifications as returned by classification server.
   * @throws ClassificationException
   *           Classification exception
   */
  public Result getClassifiedDocument(FileName fileName, Body body, Title title,
      Map> metadata) throws ClassificationException {
    return new Result(getStructuredDocument(fileName, body, title, metadata));
  }

  public Document getStructuredDocument(FileName fileName, Body body, Title title,
      Map> metadata) throws ClassificationException {
    logger.debug("Treating document: '" + title.getValue() + "'");

    // If there is no body, then don't bother attempting to classify the document
    if ((body == null) || (body.getValue() == null) || (body.getValue().trim().length() == 0)) {
      return getBlankStructuredDocument();
    }

    Collection parts = new ArrayList<>();

    addTitle(parts, title);
    addMetadata(parts, metadata);
    addByteArray(parts, body, fileName);
    return XMLReader.getDocument(getClassifications(parts));
  }

  public byte[] getClassificationServerResponse(FileName filename, Body body, Title title,
      Map> metadata) throws ClassificationException {
    logger.debug("Treating document: '" + title.getValue() + "'");

    // If there is no body, then don't bother attempting to classify the
    // document
    if ((body == null) || (body.getValue() == null) || (body.getValue().trim().length() == 0)) {
      return new byte[0];
    }

    Collection parts = new ArrayList<>();

    addTitle(parts, title);
    addMetadata(parts, metadata);
    addByteArray(parts, body, filename);
    return getClassificationServerResponse(parts);
  }

  /**
   * Classify the supplied title and body as if they were a document
   *
   * @param body
   *          The document body
   * @param title
   *          The document title
   * @param metadata
   *          Map containing metadata
   * @return the classifications as returned by classification server.
   * @throws ClassificationException
   *           Classification exception
   */
  public Result getClassifiedDocument(Body body, Title title,
      Map> metadata) throws ClassificationException {
    return new Result(getStructuredDocument(body, title, metadata));
  }

  public Document getStructuredDocument(Body body, Title title,
      Map> metadata) throws ClassificationException {
    return getStructuredDocument(null, body, title, metadata);
  }

  /**
   * Classify the supplied url
   *
   * @param url
   *          The URL to classify
   * @return the classifications as returned by classification server.
   * @throws ClassificationException
   *           Classification exception
   */
  public Result getClassifiedDocument(URL url) throws ClassificationException {
    return new Result(getStructuredDocument(url));
  }

  public Document getStructuredDocument(URL url) throws ClassificationException {
    return getStructuredDocument(url, null, null);
  }

  /**
   * Classify the supplied url with the extra metadata
   *
   * @param url
   *          The URL to classify
   * @param title
   *          The document title
   * @param metadata
   *          Map containing metadata
   * @return the classifications as returned by classification server.
   * @throws ClassificationException
   *           Classification exception
   */
  public Result getClassifiedDocument(URL url, Title title,
      Map> metadata) throws ClassificationException {
    return new Result(getStructuredDocument(url, title, metadata));
  }

  public Document getStructuredDocument(URL url, Title title,
      Map> metadata) throws ClassificationException {
    Collection parts = new ArrayList<>();
    addTitle(parts, title);
    addMetadata(parts, metadata);
    parts.add(getFormPart("path", url.toExternalForm()));
    return XMLReader.getDocument(getClassifications(parts));
  }

  private final static SimpleDateFormat simpleDateFormat =
      new SimpleDateFormat("yyyy-MM-dd HH:mm:ssZ");

  /**
   * Return the classification records for all requests between the two supplied dates
   *
   * @param startTime
   *          The earliest possible date for returned results
   * @param endTime
   *          The latest possible date for returned results
   * @return One record for each document classified in that date range
   * @throws ClassificationException
   *           Classification exception
   */
  public Collection getClassificationHistory(Date startTime, Date endTime)
      throws ClassificationException {
    logger.info("getClassificationHistory - entry");

    ArrayList partsList = new ArrayList<>();
    partsList.add(getFormPart("start_time", simpleDateFormat.format(startTime)));
    partsList.add(getFormPart("finish_time", simpleDateFormat.format(endTime)));
    partsList.add(getFormPart("operation", "getclassificationhistory"));

    ClassificationHistory classificationHistory =
        new ClassificationHistory(getClassificationServerResponse(partsList));
    return classificationHistory.getClassificationRecords();
  }

  public byte[] getClassifiedBytes(Body body, Title title, Map> metadata)
      throws ClassificationException {
    logger.debug("Treating document: '" + title.getValue() + "'");

    // If there is no body, then don't bother attempting to classify the document
    if ((body == null) || (body.getValue() == null) || (body.getValue().trim().length() == 0)) {
      return new byte[0];
    }

    Collection parts = new ArrayList<>();

    addTitle(parts, title);
    addMetadata(parts, metadata);
    addByteArray(parts, body, null);
    return getClassifications(parts);
  }

  public byte[] getClassifiedBytes(URL url, Title title, Map> metadata)
      throws ClassificationException {
    Collection parts = new ArrayList<>();
    addTitle(parts, title);
    addMetadata(parts, metadata);
    parts.add(getFormPart("path", url.toExternalForm()));
    return getClassifications(parts);
  }

  public byte[] getClassificationServerResponse(Body body, Title title)
      throws ClassificationException {
    return getClassificationServerResponse(null, body, title, null);
  }

  /**
   * Return directly the output from classification server with no analysis
   *
   * @param inputFile
   *          The input file to classify
   * @param fileType
   *          File type of "inputFile". If the file type is not supplied (i.e. is null) then it will
   *          be guessed by classification server.
   * @return The classification server response
   * @throws ClassificationException
   *           Classification exception
   */
  public byte[] getClassificationServerResponse(File inputFile, String fileType)
      throws ClassificationException {
    return getClassificationServerResponse(inputFile, fileType, null, null);
  }

  /**
   * Return in a structured form the output of the classification process
   *
   * @param data
   *          Data to classify
   * @param fileName
   *          A string containing the name of the file to classify
   * @return The structured result of the classification
   * @throws ClassificationException
   *           Classification exception
   */
  public Result getClassifiedDocument(byte[] data, String fileName) throws ClassificationException {
    return new Result(getStructuredDocument(data, fileName));
  }

  public Document getStructuredDocument(byte[] data, String fileName)
      throws ClassificationException {
    Collection parts = new ArrayList<>();

    if ((data == null) || (data.length == 0)) {
      return getBlankStructuredDocument();
    }

    addByteArray(parts, data, fileName);

    return XMLReader.getDocument(getClassificationServerResponse(parts));
  }

  /**
   * Return in a structured form the output of the classification process
   *
   * @param data
   *          Data to classify
   * @param fileName
   *          A string containing the name of the file to classify
   * @param title
   *          The document title
   * @param metadata
   *          Map containing metadata
   * @return The structured result of the classification
   * @throws ClassificationException
   *           Classification exception
   */
  public Result getClassifiedDocument(byte[] data, String fileName, Title title,
      Map> metadata) throws ClassificationException {
    return new Result(getStructuredDocument(data, fileName, title, metadata));
  }

  public Document getStructuredDocument(byte[] data, String fileName, Title title,
      Map> metadata) throws ClassificationException {
    logger.debug("Treating file: '" + fileName + "'");
    Collection parts = new ArrayList<>();

    addTitle(parts, title);
    addMetadata(parts, metadata);
    addByteArray(parts, data, fileName);

    return XMLReader.getDocument(getClassificationServerResponse(parts));
  }

  /**
   * Return in a structured form the output of the classification process
   *
   * @param inputFile
   *          The input file to classify
   * @param fileType
   *          File type of "inputFile". If the file type is not supplied (i.e. is null) then it will
   *          be guessed by classification server.
   * @param title
   *          The document title
   * @param metadata
   *          Map containing metadata
   * @return The structured result of the classification
   * @throws ClassificationException
   *           Classification exception
   */
  public Result getClassifiedDocument(File inputFile, String fileType, Title title,
      Map> metadata) throws ClassificationException {
    return new Result(getStructuredDocument(inputFile, fileType, title, metadata));
  }

  public Document getStructuredDocument(File inputFile, String fileType, Title title,
      Map> metadata) throws ClassificationException {
    Collection parts = new ArrayList<>();

    addTitle(parts, title);
    addMetadata(parts, metadata);
    addFile(parts, inputFile, fileType);

    return XMLReader.getDocument(getClassificationServerResponse(parts));
  }

  private void addTitle(Collection parts, Title title) {
    if ((title != null) && (title.getValue() != null) && (title.getValue().length() > 0)) {
      parts.add(title.asFormPart());
    }
  }

  private void addByteArray(Collection parts, Body body, FileName filename) {
    if (filename == null) {
      parts.add(body.asFormPart());
    } else {
      addByteArray(parts, body.getValue().getBytes(Charset.forName("UTF-8")), filename.getValue());
    }
  }

  private void addByteArray(Collection parts, byte[] data, String fileName) {
    parts.add(FormBodyPartBuilder.create("UploadFile", new ByteArrayBody(data, fileName)).build());
  }

  private void addFile(Collection parts, File inputFile, String fileType)
      throws ClassificationException {
    if (inputFile == null) {
      throw new ClassificationException("Null input file provided");
    }
    if (!inputFile.exists()) {
      throw new ClassificationException("Input file not found: " + inputFile.getAbsolutePath());
    }

    parts.add(getFormPart("UploadFile", inputFile));
  }

  private void addFileContent(Collection parts, byte[] fileContent, String fileName)
      throws ClassificationException {
    if (fileContent == null) {
      throw new ClassificationException("Null input file provided");
    }
    parts.add(
        FormBodyPartBuilder.create("UploadFile", new ByteArrayBody(fileContent, fileName)).build());
  }

  private void addMetadata(Collection parts,
      Map> metadata) {
    if (metadata != null) {
      for (String name : metadata.keySet()) {
        Collection values = metadata.get(name);
        if (values != null) {
          int m = 0;
          for (String value : values) {
            if (m == 0) {
              parts.add(getFormPart("meta_" + name, value));
            } else {
              parts.add(getFormPart("meta_" + name + "__" + m, value));
            }
            m++;
          }
        }
      }
    }
  }

  private MultipartEntityBuilder getDefaultParts() {
    MultipartEntityBuilder multipartEntityBuilder = MultipartEntityBuilder.create();

    for (String parameterName : classificationConfiguration.getAdditionalParameters().keySet()) {
      String value = classificationConfiguration.getAdditionalParameters().get(parameterName);
      if ((value != null) && (value.length() > 0)) {
        multipartEntityBuilder.addPart(getFormPart(parameterName, value));
      }
    }
    if (classificationConfiguration.isSingleArticle()) {
      multipartEntityBuilder.addPart(getFormPart("singlearticle", "on"));
    }
    if (classificationConfiguration.isMultiArticle()) {
      multipartEntityBuilder.addPart(getFormPart("multiarticle", "on"));
    }
    if (classificationConfiguration.isFeedback()) {
      multipartEntityBuilder.addPart(getFormPart("feedback", "on"));
    }
    if (classificationConfiguration.isStylesheet()) {
      multipartEntityBuilder.addPart(getFormPart("stylesheet", "on"));
    }
    if (classificationConfiguration.isUseGeneratedKeys()) {
      multipartEntityBuilder.addPart(getFormPart("use_generated_keys", "on"));
    }
    if (classificationConfiguration.isReturnHashCode()) {
      multipartEntityBuilder.addPart(getFormPart("return_hash", "on"));
    }
    return multipartEntityBuilder;
  }

  private final static ContentType contentType = ContentType.create("text/plain", Consts.UTF_8);

  private static FormBodyPart getFormPart(String name, String value) {
    return FormBodyPartBuilder.create(name, new StringBody(value, contentType)).build();
  }

  private static FormBodyPart getFormPart(String name, File file) {
    return FormBodyPartBuilder.create(name, new FileBody(file)).build();
  }

  private byte[] getClassificationServerResponse(Collection parts)
      throws ClassificationException {

    MultipartEntityBuilder multipartEntityBuilder = getDefaultParts();
    for (FormBodyPart part : parts) {
      multipartEntityBuilder.addPart(part);
    }

    if (this.getAuditUUID() != null) {
      multipartEntityBuilder.addPart(getFormPart("audit_tag", this.getAuditUUID().toString()));
    }

    byte[] returnedData = sendPostRequest(multipartEntityBuilder.build());

    logger.debug("getClassificationServerResponse - exit: " + returnedData.length);
    return returnedData;
  }

  private byte[] getClassifications(Collection partsList)
      throws ClassificationException {
    return getClassifications(partsList, null);
  }

  private byte[] getClassifications(Collection partsList, Map outMeta)
      throws ClassificationException {
    byte[] returnedData = getClassificationServerResponse(partsList);
    if ((returnedData != null) && (outMeta != null)) {
      Result result = new Result(XMLReader.getDocument(returnedData));
      if (result.getMetadata() != null) {
        for (String meta : result.getMetadata().keySet()) {
          outMeta.put(meta, result.getMetadata().get(meta));
        }
      }
    }
    return returnedData;
  }

  public byte[] getClassificationServerResponse(File inputFile, String fileType, Title title,
      Map> metadata) throws ClassificationException {
    logger.debug("Treating file: '" + inputFile + "'");

    Collection parts = new ArrayList<>();
    addFile(parts, inputFile, fileType);

    addTitle(parts, title);
    addMetadata(parts, metadata);
    return getClassificationServerResponse(parts);
  }

  public byte[] getClassificationServerResponse(byte[] fileContent, String fileName, Title title,
      Map> metadata) throws ClassificationException {
    logger.debug("Treating raw bytes: '" + title + "'");

    Collection parts = new ArrayList<>();
    addFileContent(parts, fileContent, fileName);

    addTitle(parts, title);
    addMetadata(parts, metadata);
    return getClassificationServerResponse(parts);
  }

  private DocumentBuilder documentBuilder = null;

  private String getCommandXML(String command, String publishSetName)
      throws ClassificationException {
    createDocumentBuilder();
    Document document = documentBuilder.newDocument();
    Element requestElement = document.createElement("request");
    requestElement.setAttribute("op", command);
    document.appendChild(requestElement);

    if (publishSetName != null) {
      Element publishSetElement = document.createElement("publish_set");
      publishSetElement.appendChild(document.createTextNode(publishSetName));
      requestElement.appendChild(publishSetElement);
    }

    StringWriter stringWriter = new StringWriter();
    try {
      TransformerFactory transformerFactory = TransformerFactory.newInstance();
      Transformer transformer = transformerFactory.newTransformer();
      transformer.setOutputProperty(OutputKeys.INDENT, "yes");
      transformer.setOutputProperty(OutputKeys.ENCODING, "utf-8");
      DOMSource source = new DOMSource(document);
      StreamResult destination = new StreamResult(stringWriter);
      transformer.transform(source, destination);
    } catch (TransformerException e) {
      throw new ClassificationException(
          String.format("TransformerException building CS command: %s %s - %s", command,
              publishSetName, e.getMessage()));
    }
    return stringWriter.toString();
  }

  private void createDocumentBuilder() {
    if (documentBuilder == null) {
      DocumentBuilderFactory documentBuilderFactory = DocumentBuilderFactory.newInstance();
      try {
        documentBuilderFactory.setFeature(XMLFeatureConst.LOAD_DTD_GRAMMAR, false);
        documentBuilderFactory.setFeature(XMLFeatureConst.LOAD_EXTERNAL_DTD, false);
        documentBuilderFactory.setFeature(XMLFeatureConst.EXTERNAL_GENERAL_ENTITIES, false);
        documentBuilderFactory.setFeature(XMLFeatureConst.EXTERNAL_PARAMETER_ENTITIES, false);
        documentBuilderFactory.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true);
        documentBuilder = documentBuilderFactory.newDocumentBuilder();
      } catch (ParserConfigurationException e) {
        throw new RuntimeException("Failed to create XML document builder", e);
      }
    }
  }

  private byte[] sendPostRequest(String commandString, File pakFile)
      throws ClassificationException {

    MultipartEntityBuilder multipartEntityBuilder = MultipartEntityBuilder.create();

    if (pakFile != null) {
      FormBodyPart filePart =
          FormBodyPartBuilder.create("UploadFile", new FileBody(pakFile)).build();
      multipartEntityBuilder.addPart(filePart);
    }

    FormBodyPart commandPart = FormBodyPartBuilder
        .create("XML_INPUT", new StringBody(commandString, ContentType.TEXT_XML)).build();
    multipartEntityBuilder.addPart(commandPart);

    return sendPostRequest(multipartEntityBuilder.build());
  }

  private PoolingHttpClientConnectionManager poolingConnectionManager;
  private RequestConfig requestConfig;

  private int clientPoolSize = 2;

  public int getClientPoolSize() {
    return clientPoolSize;
  }

  public void setClientPoolSize(int clientPoolSize) {
    this.clientPoolSize = clientPoolSize;
  }

  private CloseableHttpClient httpClient = null;
  private IdleConnectionMonitorThread idleConnectionMonitorThread;

  private synchronized void initialize() {
    if (httpClient == null) {
      httpClient = getHttpClient();
    }
  }

  private CloseableHttpClient getHttpClient() {

    poolingConnectionManager = new PoolingHttpClientConnectionManager();
    poolingConnectionManager.setValidateAfterInactivity(0);
    poolingConnectionManager.setDefaultMaxPerRoute(clientPoolSize);
    poolingConnectionManager.setMaxTotal(clientPoolSize);

    // Make sure that idle and stale connections are discarded
    idleConnectionMonitorThread = new IdleConnectionMonitorThread(poolingConnectionManager);
    idleConnectionMonitorThread.start();

    RequestConfig.Builder requestConfigBuilder = RequestConfig.copy(RequestConfig.DEFAULT)
        .setSocketTimeout(classificationConfiguration.getSocketTimeoutMS())
        .setConnectTimeout(classificationConfiguration.getConnectionTimeoutMS())
        .setConnectionRequestTimeout(classificationConfiguration.getConnectionTimeoutMS());
    if (getProxyURL() != null) {
      HttpHost proxy = HttpHost.create(getProxyURL());
      requestConfigBuilder.setProxy(proxy);
    }
    requestConfig = requestConfigBuilder.build();

    return HttpClients.custom().setDefaultRequestConfig(requestConfig)
        .setSSLHostnameVerifier(new DefaultHostnameVerifier())
        .setConnectionManager(poolingConnectionManager).build();
  }

  @Override
  public void close() {
    if (null != idleConnectionMonitorThread) {
      idleConnectionMonitorThread.shutdown();
    }
    if (httpClient != null) {
      try {
        httpClient.close();
      } catch (IOException ioe) {
        throw new RuntimeException("HTTP client close failed.", ioe);
      }
    }
  }

  private byte[] sendPostRequest(HttpEntity requestEntity) throws ClassificationException {
    initialize();

    HttpPost httpPost = null;
    byte[] responseData;
    try {
      httpPost = new HttpPost(classificationConfiguration.getUrl());
      addHeaders(httpPost);
      httpPost.setEntity(requestEntity);

      try (CloseableHttpResponse response = httpClient.execute(httpPost)) {
        if (response == null) {
          throw new ClassificationException(
              "Null response from http client: " + classificationConfiguration.getUrl());
        }
        if (response.getStatusLine() == null) {
          throw new ClassificationException(
              "Null status line from http client: " + classificationConfiguration.getUrl());
        }

        int statusCode = response.getStatusLine().getStatusCode();

        HttpEntity responseEntity = response.getEntity();

        logger.debug("Status: " + statusCode);
        ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
        try (InputStream responseInputStream = responseEntity.getContent()) {
          IOUtils.copy(responseInputStream, byteArrayOutputStream);
        }
        responseData = byteArrayOutputStream.toByteArray();
        if (statusCode == HttpStatus.SC_INTERNAL_SERVER_ERROR) {
          throw new ClassificationException(
              "Internal classification server error: " + new String(responseData, "UTF-8"));
        } else if (statusCode != HttpStatus.SC_OK) {
          throw new ClassificationException("HttpStatus: " +
              statusCode +
              " received from classification server (" +
              classificationConfiguration.getUrl() +
              ") " +
              new String(responseData, "UTF-8"));
        }
      }
    } catch (ClientProtocolException e) {
      throw new ClassificationException(
          "ClientProtocolException talking to classification server" + e.getMessage());
    } catch (IOException e) {
      throw new ClassificationException(
          "IOException talking to classification server" + e.getMessage());
    } finally {
      if (httpPost != null) {
        httpPost.abort();
      }
    }

    return responseData;
  }

  private static Document blankDocument = null;

  private final Document getBlankStructuredDocument() throws ClassificationException {
    if (blankDocument == null) {
      blankDocument = XMLReader.getDocument(
          "".getBytes(StandardCharsets.UTF_8));
    }
    return blankDocument;
  }

  private void addHeaders(HttpRequest httpRequest) {
    if (classificationConfiguration.getApiToken() != null) {
      logger.trace("Adding authorization header: {}", classificationConfiguration.getApiToken());
      httpRequest.addHeader("Authorization", classificationConfiguration.getApiToken());
    }
  }

  @Override
  public String toString() {
    StringBuilder stringBuilder = new StringBuilder(this.getClass().getCanonicalName() + "\n");
    stringBuilder
        .append("  Host Name: '" + this.getClassificationConfiguration().getHostName() + "'\n");
    stringBuilder
        .append("  Host Path: '" + this.getClassificationConfiguration().getHostPath() + "'\n");
    stringBuilder
        .append("  Host Port: '" + this.getClassificationConfiguration().getHostPort() + "'\n");
    stringBuilder.append("  Connection Timeout MS: '" +
        this.getClassificationConfiguration().getConnectionTimeoutMS() +
        "'\n");
    stringBuilder.append("  Socket Timeout MS: '" +
        this.getClassificationConfiguration().getSocketTimeoutMS() +
        "'\n");
    stringBuilder
        .append("  Protocol: '" + this.getClassificationConfiguration().getProtocol() + "'\n");
    stringBuilder.append("  Proxy Host: '" + this.getProxyHost() + "'\n");
    stringBuilder.append("  Proxy Port: '" + this.getProxyPort() + "'\n");
    return stringBuilder.toString();
  }

  public static class IdleConnectionMonitorThread extends Thread {

    private final HttpClientConnectionManager connMgr;
    private volatile boolean shutdown;

    public IdleConnectionMonitorThread(HttpClientConnectionManager connMgr) {
      super();
      this.connMgr = connMgr;
    }

    @Override
    public void run() {
      try {
        while (!shutdown) {
          synchronized (this) {
            wait(5000);
            // Close expired connections
            connMgr.closeExpiredConnections();
            // Optionally, close connections
            // that have been idle longer than 30 sec
            connMgr.closeIdleConnections(30, TimeUnit.SECONDS);
          }
        }
      } catch (InterruptedException ex) {
        // terminate
      }
    }

    public void shutdown() {
      shutdown = true;
      synchronized (this) {
        notifyAll();
      }
    }

  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy