org.apache.solr.common.util.ContentStreamBase Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of solr-solrj Show documentation
Apache Solr Solrj
There is a newer version: 9.7.0
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.solr.common.util;

import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.io.StringReader;
import java.io.UnsupportedEncodingException;
import java.net.URL;
import java.net.URLConnection;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import java.util.List;
import java.util.Locale;
import java.util.function.Predicate;
import java.util.zip.GZIPInputStream;
import org.apache.solr.client.solrj.SolrRequest;
import org.apache.solr.client.solrj.request.RequestWriter;

/**
 * Three concrete implementations for ContentStream - one for File/URL/String
 *
 * @since solr 1.2
 */
public abstract class ContentStreamBase implements ContentStream {

  public static final String DEFAULT_CHARSET = StandardCharsets.UTF_8.name();
  private static final String TEXT_CSV = "text/csv";
  public static final String TEXT_XML = "text/xml";
  public static final String APPLICATION_OCTET_STREAM = "application/octet-stream";
  public static final String APPLICATION_GZIP = "application/gzip";
  public static final String APPLICATION_XML = "application/xml";
  public static final String APPLICATION_JSON = "application/json";
  private static final List UNHELPFUL_TYPES =
      Arrays.asList(APPLICATION_OCTET_STREAM, APPLICATION_GZIP, "content/unknown");
  private static final List XML_SUF = Arrays.asList(".xml", ".xml.gz", ".xml.gzip");
  private static final List JSON_SUF = Arrays.asList(".json", ".json.gz", ".json.gzip");
  private static final List CSV_SUF = Arrays.asList(".csv", ".csv.gz", ".csv.gzip");

  protected String name;
  protected String sourceInfo;
  protected String contentType;
  protected Long size;

  // ---------------------------------------------------------------------
  // ---------------------------------------------------------------------

  public static String getCharsetFromContentType(String contentType) {
    if (contentType != null) {
      int idx = contentType.toLowerCase(Locale.ROOT).indexOf("charset=");
      if (idx > 0) {
        return contentType.substring(idx + "charset=".length()).trim();
      }
    }
    return null;
  }

  protected String attemptToDetermineContentType() {
    String type = null;
    if (name != null) {
      Predicate endsWith = suffix -> name.toLowerCase(Locale.ROOT).endsWith(suffix);

      if (XML_SUF.stream().anyMatch(endsWith)) {
        type = APPLICATION_XML;
      } else if (JSON_SUF.stream().anyMatch(endsWith)) {
        type = APPLICATION_JSON;
      } else if (CSV_SUF.stream().anyMatch(endsWith)) {
        type = TEXT_CSV;
      } else {
        type = attemptToDetermineTypeFromFirstCharacter();
      }
    }
    return type;
  }

  private String attemptToDetermineTypeFromFirstCharacter() {
    String type = null;
    try (InputStream stream = getStream()) {
      // Last ditch effort to determine content, if the first non-white space
      // is a '<' or '{', assume xml or json.
      int data = stream.read();
      while ((data != -1) && (((char) data) == ' ')) {
        data = stream.read();
      }
      if ((char) data == '<') {
        type = APPLICATION_XML;
      } else if ((char) data == '{') {
        type = APPLICATION_JSON;
      }
    } catch (Exception ex) {
      // This code just eats, the exception and leaves
      // the contentType untouched.
    }
    return type;
  }

  // ------------------------------------------------------------------------
  // ------------------------------------------------------------------------

  /**
   * Construct a ContentStream from a URL This uses a URLConnection
   *  to get the content stream
   *
   * @see URLConnection
   */
  public static class URLStream extends ContentStreamBase {
    private final URL url;

    public URLStream(URL url) {
      this.url = url;
      sourceInfo = "url";
    }

    @Override
    public String getContentType() {
      // for file:// streams that are octet-streams, try to determine the payload
      // type from payload rather than just using the mime type.
      if ("file".equals(url.getProtocol())) {
        Predicate equals = mimeType -> mimeType.equals(contentType);
        if (UNHELPFUL_TYPES.stream().anyMatch(equals)) {
          String type = attemptToDetermineContentType();
          contentType = (type != null) ? type : contentType;
        }
      }
      return contentType;
    }

    @Override
    public InputStream getStream() throws IOException {
      URLConnection conn = this.url.openConnection();

      contentType = conn.getContentType();
      name = url.toExternalForm();
      size = conn.getContentLengthLong();
      InputStream is = conn.getInputStream();
      String urlFile = url.getFile().toLowerCase(Locale.ROOT);
      if ("gzip".equals(conn.getContentEncoding())
          || urlFile.endsWith(".gz")
          || urlFile.endsWith(".gzip")) {
        is = new GZIPInputStream(is);
      }
      return is;
    }
  }

  /** Construct a ContentStream from a File */
  public static class FileStream extends ContentStreamBase {
    private final File file;

    public FileStream(File f) {
      file = f;

      contentType = null; // ??
      name = file.getName();
      size = file.length();
      sourceInfo = file.toURI().toString();
    }

    @Override
    public String getContentType() {
      if (contentType == null) {
        contentType = attemptToDetermineContentType();
      }
      return contentType;
    }

    @Override
    public InputStream getStream() throws IOException {
      InputStream is = new FileInputStream(file);
      String lowerName = name.toLowerCase(Locale.ROOT);
      if (lowerName.endsWith(".gz") || lowerName.endsWith(".gzip")) {
        is = new GZIPInputStream(is);
      }
      return is;
    }
  }

  /** Construct a ContentStream from a String */
  public static class StringStream extends ContentStreamBase {
    private final String str;

    public StringStream(String str) {
      this(str, detect(str));
    }

    public StringStream(String str, String contentType) {
      this.str = str;
      this.contentType = contentType;
      name = null;
      try {
        size = (long) str.getBytes(DEFAULT_CHARSET).length;
      } catch (UnsupportedEncodingException e) {
        // won't happen
        throw new RuntimeException(e);
      }
      sourceInfo = "string";
    }

    public static String detect(String str) {
      String detectedContentType = null;
      int lim = str.length() - 1;
      for (int i = 0; i < lim; i++) {
        char ch = str.charAt(i);
        if (Character.isWhitespace(ch)) {
          continue;
        }
        // first non-whitespace chars
        if (ch == '#' // single line comment
            || (ch == '/'
                && (str.charAt(i + 1) == '/'
                    || str.charAt(i + 1) == '*')) // single line or multi-line comment
            || (ch == '{' || ch == '[') // start of JSON object
        ) {
          detectedContentType = APPLICATION_JSON;
        } else if (ch == '<') {
          detectedContentType = TEXT_XML;
        }
        break;
      }
      return detectedContentType;
    }

    @Override
    public InputStream getStream() throws IOException {
      return new ByteArrayInputStream(str.getBytes(DEFAULT_CHARSET));
    }

    /** If a charset is defined (by the contentType) use that, otherwise use a StringReader */
    @Override
    public Reader getReader() throws IOException {
      String charset = getCharsetFromContentType(contentType);
      return charset == null ? new StringReader(str) : new InputStreamReader(getStream(), charset);
    }
  }

  /**
   * Base reader implementation. If the contentType declares a charset use it, otherwise use
   * "utf-8".
   */
  @Override
  public Reader getReader() throws IOException {
    String charset = getCharsetFromContentType(getContentType());
    return charset == null
        ? new InputStreamReader(getStream(), DEFAULT_CHARSET)
        : new InputStreamReader(getStream(), charset);
  }

  // ------------------------------------------------------------------
  // Getters / Setters for overrideable attributes
  // ------------------------------------------------------------------

  @Override
  public String getContentType() {
    return contentType;
  }

  public void setContentType(String contentType) {
    this.contentType = contentType;
  }

  @Override
  public String getName() {
    return name;
  }

  public void setName(String name) {
    this.name = name;
  }

  @Override
  public Long getSize() {
    return size;
  }

  public void setSize(Long size) {
    this.size = size;
  }

  @Override
  public String getSourceInfo() {
    return sourceInfo;
  }

  public void setSourceInfo(String sourceInfo) {
    this.sourceInfo = sourceInfo;
  }

  public static ContentStream create(
      RequestWriter requestWriter, @SuppressWarnings({"rawtypes"}) SolrRequest req)
      throws IOException {
    ByteArrayOutputStream baos = new ByteArrayOutputStream();
    RequestWriter.ContentWriter contentWriter = requestWriter.getContentWriter(req);
    contentWriter.write(baos);
    return new ByteArrayStream(baos.toByteArray(), null, contentWriter.getContentType());
  }

  /** Construct a ContentStream from a File */
  public static class ByteArrayStream extends ContentStreamBase {
    private final byte[] bytes;

    public ByteArrayStream(byte[] bytes, String source) {
      this(bytes, source, null);
    }

    public ByteArrayStream(byte[] bytes, String source, String contentType) {
      this.bytes = bytes;

      this.contentType = contentType;
      name = source;
      size = (long) bytes.length;
      sourceInfo = source;
    }

    @Override
    public InputStream getStream() throws IOException {
      return new ByteArrayInputStream(bytes);
    }
  }
}