All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.solr.common.util.ContentStreamBase Maven / Gradle / Ivy

There is a newer version: 9.8.1
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.solr.common.util;

import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.io.StringReader;
import java.io.UnsupportedEncodingException;
import java.net.URL;
import java.net.URLConnection;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import java.util.List;
import java.util.Locale;
import java.util.function.Predicate;
import java.util.zip.GZIPInputStream;

import org.apache.http.entity.ContentType;
import org.apache.solr.client.solrj.SolrRequest;
import org.apache.solr.client.solrj.request.RequestWriter;

/**
 * Three concrete implementations for ContentStream - one for File/URL/String
 * 
 *
 * @since solr 1.2
 */
public abstract class ContentStreamBase implements ContentStream
{

  public static final String DEFAULT_CHARSET = StandardCharsets.UTF_8.name();
  private static final String TEXT_CSV = "text/csv";
  private static final List UNHELPFUL_TYPES = Arrays.asList(ContentType.APPLICATION_OCTET_STREAM.getMimeType(), "application/gzip", "content/unknown");
  private static final List XML_SUF =  Arrays.asList(".xml", ".xml.gz", ".xml.gzip");
  private static final List JSON_SUF =  Arrays.asList(".json", ".json.gz", ".json.gzip");
  private static final List CSV_SUF =  Arrays.asList(".csv", ".csv.gz", ".csv.gzip");

  protected String name;
  protected String sourceInfo;
  protected String contentType;
  protected Long size;
  
  //---------------------------------------------------------------------
  //---------------------------------------------------------------------
  
  public static String getCharsetFromContentType( String contentType )
  {
    if( contentType != null ) {
      int idx = contentType.toLowerCase(Locale.ROOT).indexOf( "charset=" );
      if( idx > 0 ) {
        return contentType.substring( idx + "charset=".length() ).trim();
      }
    }
    return null;
  }

  protected String attemptToDetermineContentType() {
    String type = null;
    if (name != null) {
      Predicate endsWith = suffix->name.toLowerCase(Locale.ROOT).endsWith(suffix);

      if (XML_SUF.stream().anyMatch(endsWith)) {
        type = ContentType.APPLICATION_XML.getMimeType();
      } else if (JSON_SUF.stream().anyMatch(endsWith)) {
        type = ContentType.APPLICATION_JSON.getMimeType();
      } else if (CSV_SUF.stream().anyMatch(endsWith)) {
        type = TEXT_CSV;
      } else {
        type = attemptToDetermineTypeFromFirstCharacter();
      }
    }
    return type;
  }

  private String attemptToDetermineTypeFromFirstCharacter() {
    String type = null;
    try (InputStream stream = getStream()) {
      // Last ditch effort to determine content, if the first non-white space
      // is a '<' or '{', assume xml or json.
      int data = stream.read();
      while (( data != -1 ) && ( ( (char)data ) == ' ' )) {
        data = stream.read();
      }
      if ((char)data == '<') {
        type = ContentType.APPLICATION_XML.getMimeType();
      } else if ((char)data == '{') {
        type = ContentType.APPLICATION_JSON.getMimeType();
      }
    } catch (Exception ex) {
      // This code just eats, the exception and leaves
      // the contentType untouched.
    }
    return type;
  }

  //------------------------------------------------------------------------
  //------------------------------------------------------------------------
  
  /**
   * Construct a ContentStream from a URL
   * 
   * This uses a URLConnection to get the content stream
   * @see  URLConnection
   */
  public static class URLStream extends ContentStreamBase
  {
    private final URL url;
    
    public URLStream( URL url ) {
      this.url = url; 
      sourceInfo = "url";
    }

    @Override
    public String getContentType() {
      // for file:// streams that are octet-streams, try to determine the payload
      // type from payload rather than just using the mime type.
      if ("file".equals(url.getProtocol())) {
        Predicate equals = mimeType->mimeType.equals(contentType);
        if (UNHELPFUL_TYPES.stream().anyMatch(equals)) {
          String type = attemptToDetermineContentType();
          contentType = ( type != null ) ? type : contentType;
        }
      }
      return contentType;
    }

    @Override
    public InputStream getStream() throws IOException {
      URLConnection conn = this.url.openConnection();
      
      contentType = conn.getContentType();
      name = url.toExternalForm();
      size = conn.getContentLengthLong();
      InputStream is = conn.getInputStream();
      String urlFile = url.getFile().toLowerCase(Locale.ROOT);
      if( "gzip".equals(conn.getContentEncoding()) || urlFile.endsWith( ".gz" ) || urlFile.endsWith( ".gzip" )){
        is = new GZIPInputStream(is);
      }
      return is;
    }
  }
  
  /**
   * Construct a ContentStream from a File
   */
  public static class FileStream extends ContentStreamBase
  {
    private final File file;
    
    public FileStream( File f ) {
      file = f; 
      
      contentType = null; // ??
      name = file.getName();
      size = file.length();
      sourceInfo = file.toURI().toString();
    }

    @Override
    public String getContentType() {
      if(contentType==null) {
        contentType = attemptToDetermineContentType();
      }
      return contentType;
    }

    @Override
    public InputStream getStream() throws IOException {
      InputStream is = new FileInputStream( file );
      String lowerName = name.toLowerCase(Locale.ROOT);
      if(lowerName.endsWith(".gz") || lowerName.endsWith(".gzip")) {
        is = new GZIPInputStream(is);
      }
      return is;
    }
  }
  

  /**
   * Construct a ContentStream from a String
   */
  public static class StringStream extends ContentStreamBase
  {
    private final String str;

    public StringStream( String str ) {
      this(str, detect(str));
    }

    public StringStream( String str, String contentType ) {
      this.str = str;
      this.contentType = contentType;
      name = null;
      try {
        size = (long) str.getBytes(DEFAULT_CHARSET).length;
      } catch (UnsupportedEncodingException e) {
        // won't happen
        throw new RuntimeException(e);
      }
      sourceInfo = "string";
    }

    public static String detect(String str) {
      String detectedContentType = null;
      int lim = str.length() - 1;
      for (int i=0; iContentStream from a File
   */
  public static class ByteArrayStream extends ContentStreamBase
  {
    private final byte[] bytes;
    public ByteArrayStream( byte[] bytes, String source ) {
      this(bytes,source, null);
    }
    
    public ByteArrayStream( byte[] bytes, String source, String contentType ) {
      this.bytes = bytes;
      
      this.contentType = contentType;
      name = source;
      size = (long) bytes.length;
      sourceInfo = source;
    }


    @Override
    public InputStream getStream() throws IOException {
      return new ByteArrayInputStream( bytes );
    }
  }  
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy