
org.apache.solr.common.util.ContentStreamBase Maven / Gradle / Ivy
/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.solr.common.util; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.io.Reader; import java.io.StringReader; import java.io.UnsupportedEncodingException; import java.net.URL; import java.net.URLConnection; import java.nio.charset.StandardCharsets; import java.util.Arrays; import java.util.List; import java.util.Locale; import java.util.function.Predicate; import java.util.zip.GZIPInputStream; import org.apache.http.entity.ContentType; import org.apache.solr.client.solrj.SolrRequest; import org.apache.solr.client.solrj.request.RequestWriter; /** * Three concrete implementations for ContentStream - one for File/URL/String * * * @since solr 1.2 */ public abstract class ContentStreamBase implements ContentStream { public static final String DEFAULT_CHARSET = StandardCharsets.UTF_8.name(); private static final String TEXT_CSV = "text/csv"; private static final List
from aUNHELPFUL_TYPES = Arrays.asList(ContentType.APPLICATION_OCTET_STREAM.getMimeType(), "application/gzip", "content/unknown"); private static final List XML_SUF = Arrays.asList(".xml", ".xml.gz", ".xml.gzip"); private static final List JSON_SUF = Arrays.asList(".json", ".json.gz", ".json.gzip"); private static final List CSV_SUF = Arrays.asList(".csv", ".csv.gz", ".csv.gzip"); protected String name; protected String sourceInfo; protected String contentType; protected Long size; //--------------------------------------------------------------------- //--------------------------------------------------------------------- public static String getCharsetFromContentType( String contentType ) { if( contentType != null ) { int idx = contentType.toLowerCase(Locale.ROOT).indexOf( "charset=" ); if( idx > 0 ) { return contentType.substring( idx + "charset=".length() ).trim(); } } return null; } protected String attemptToDetermineContentType() { String type = null; if (name != null) { Predicate endsWith = suffix->name.toLowerCase(Locale.ROOT).endsWith(suffix); if (XML_SUF.stream().anyMatch(endsWith)) { type = ContentType.APPLICATION_XML.getMimeType(); } else if (JSON_SUF.stream().anyMatch(endsWith)) { type = ContentType.APPLICATION_JSON.getMimeType(); } else if (CSV_SUF.stream().anyMatch(endsWith)) { type = TEXT_CSV; } else { type = attemptToDetermineTypeFromFirstCharacter(); } } return type; } private String attemptToDetermineTypeFromFirstCharacter() { String type = null; try (InputStream stream = getStream()) { // Last ditch effort to determine content, if the first non-white space // is a '<' or '{', assume xml or json. int data = stream.read(); while (( data != -1 ) && ( ( (char)data ) == ' ' )) { data = stream.read(); } if ((char)data == '<') { type = ContentType.APPLICATION_XML.getMimeType(); } else if ((char)data == '{') { type = ContentType.APPLICATION_JSON.getMimeType(); } } catch (Exception ex) { // This code just eats, the exception and leaves // the contentType untouched. } return type; } //------------------------------------------------------------------------ //------------------------------------------------------------------------ /** * Construct a ContentStream
from aURL
* * This uses aURLConnection
to get the content stream * @see URLConnection */ public static class URLStream extends ContentStreamBase { private final URL url; public URLStream( URL url ) { this.url = url; sourceInfo = "url"; } @Override public String getContentType() { // for file:// streams that are octet-streams, try to determine the payload // type from payload rather than just using the mime type. if ("file".equals(url.getProtocol())) { Predicateequals = mimeType->mimeType.equals(contentType); if (UNHELPFUL_TYPES.stream().anyMatch(equals)) { String type = attemptToDetermineContentType(); contentType = ( type != null ) ? type : contentType; } } return contentType; } @Override public InputStream getStream() throws IOException { URLConnection conn = this.url.openConnection(); contentType = conn.getContentType(); name = url.toExternalForm(); size = conn.getContentLengthLong(); InputStream is = conn.getInputStream(); String urlFile = url.getFile().toLowerCase(Locale.ROOT); if( "gzip".equals(conn.getContentEncoding()) || urlFile.endsWith( ".gz" ) || urlFile.endsWith( ".gzip" )){ is = new GZIPInputStream(is); } return is; } } /** * Construct a ContentStream
from aFile
*/ public static class FileStream extends ContentStreamBase { private final File file; public FileStream( File f ) { file = f; contentType = null; // ?? name = file.getName(); size = file.length(); sourceInfo = file.toURI().toString(); } @Override public String getContentType() { if(contentType==null) { contentType = attemptToDetermineContentType(); } return contentType; } @Override public InputStream getStream() throws IOException { InputStream is = new FileInputStream( file ); String lowerName = name.toLowerCase(Locale.ROOT); if(lowerName.endsWith(".gz") || lowerName.endsWith(".gzip")) { is = new GZIPInputStream(is); } return is; } } /** * Construct aContentStream
from aString
*/ public static class StringStream extends ContentStreamBase { private final String str; public StringStream( String str ) { this(str, detect(str)); } public StringStream( String str, String contentType ) { this.str = str; this.contentType = contentType; name = null; try { size = (long) str.getBytes(DEFAULT_CHARSET).length; } catch (UnsupportedEncodingException e) { // won't happen throw new RuntimeException(e); } sourceInfo = "string"; } public static String detect(String str) { String detectedContentType = null; int lim = str.length() - 1; for (int i=0; iContentStream File
*/ public static class ByteArrayStream extends ContentStreamBase { private final byte[] bytes; public ByteArrayStream( byte[] bytes, String source ) { this(bytes,source, null); } public ByteArrayStream( byte[] bytes, String source, String contentType ) { this.bytes = bytes; this.contentType = contentType; name = source; size = (long) bytes.length; sourceInfo = source; } @Override public InputStream getStream() throws IOException { return new ByteArrayInputStream( bytes ); } } }
© 2015 - 2025 Weber Informatics LLC | Privacy Policy