org.apache.solr.common.util.ContentStreamBase Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.common.util;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.io.StringReader;
import java.io.UnsupportedEncodingException;
import java.net.URL;
import java.net.URLConnection;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import java.util.List;
import java.util.Locale;
import java.util.function.Predicate;
import java.util.zip.GZIPInputStream;
import org.apache.solr.client.solrj.SolrRequest;
import org.apache.solr.client.solrj.request.RequestWriter;
/**
* Three concrete implementations for ContentStream - one for File/URL/String
*
* @since solr 1.2
*/
public abstract class ContentStreamBase implements ContentStream {
public static final String DEFAULT_CHARSET = StandardCharsets.UTF_8.name();
private static final String TEXT_CSV = "text/csv";
public static final String TEXT_XML = "text/xml";
public static final String APPLICATION_OCTET_STREAM = "application/octet-stream";
public static final String APPLICATION_GZIP = "application/gzip";
public static final String APPLICATION_XML = "application/xml";
public static final String APPLICATION_JSON = "application/json";
private static final List UNHELPFUL_TYPES =
Arrays.asList(APPLICATION_OCTET_STREAM, APPLICATION_GZIP, "content/unknown");
private static final List XML_SUF = Arrays.asList(".xml", ".xml.gz", ".xml.gzip");
private static final List JSON_SUF = Arrays.asList(".json", ".json.gz", ".json.gzip");
private static final List CSV_SUF = Arrays.asList(".csv", ".csv.gz", ".csv.gzip");
protected String name;
protected String sourceInfo;
protected String contentType;
protected Long size;
// ---------------------------------------------------------------------
// ---------------------------------------------------------------------
public static String getCharsetFromContentType(String contentType) {
if (contentType != null) {
int idx = contentType.toLowerCase(Locale.ROOT).indexOf("charset=");
if (idx > 0) {
return contentType.substring(idx + "charset=".length()).trim();
}
}
return null;
}
protected String attemptToDetermineContentType() {
String type = null;
if (name != null) {
Predicate endsWith = suffix -> name.toLowerCase(Locale.ROOT).endsWith(suffix);
if (XML_SUF.stream().anyMatch(endsWith)) {
type = APPLICATION_XML;
} else if (JSON_SUF.stream().anyMatch(endsWith)) {
type = APPLICATION_JSON;
} else if (CSV_SUF.stream().anyMatch(endsWith)) {
type = TEXT_CSV;
} else {
type = attemptToDetermineTypeFromFirstCharacter();
}
}
return type;
}
private String attemptToDetermineTypeFromFirstCharacter() {
String type = null;
try (InputStream stream = getStream()) {
// Last ditch effort to determine content, if the first non-white space
// is a '<' or '{', assume xml or json.
int data = stream.read();
while ((data != -1) && (((char) data) == ' ')) {
data = stream.read();
}
if ((char) data == '<') {
type = APPLICATION_XML;
} else if ((char) data == '{') {
type = APPLICATION_JSON;
}
} catch (Exception ex) {
// This code just eats, the exception and leaves
// the contentType untouched.
}
return type;
}
// ------------------------------------------------------------------------
// ------------------------------------------------------------------------
/**
* Construct a ContentStream
from a URL
This uses a URLConnection
*
to get the content stream
*
* @see URLConnection
*/
public static class URLStream extends ContentStreamBase {
private final URL url;
public URLStream(URL url) {
this.url = url;
sourceInfo = "url";
}
@Override
public String getContentType() {
// for file:// streams that are octet-streams, try to determine the payload
// type from payload rather than just using the mime type.
if ("file".equals(url.getProtocol())) {
Predicate equals = mimeType -> mimeType.equals(contentType);
if (UNHELPFUL_TYPES.stream().anyMatch(equals)) {
String type = attemptToDetermineContentType();
contentType = (type != null) ? type : contentType;
}
}
return contentType;
}
@Override
public InputStream getStream() throws IOException {
URLConnection conn = this.url.openConnection();
contentType = conn.getContentType();
name = url.toExternalForm();
size = conn.getContentLengthLong();
InputStream is = conn.getInputStream();
String urlFile = url.getFile().toLowerCase(Locale.ROOT);
if ("gzip".equals(conn.getContentEncoding())
|| urlFile.endsWith(".gz")
|| urlFile.endsWith(".gzip")) {
is = new GZIPInputStream(is);
}
return is;
}
}
/** Construct a ContentStream
from a File
*/
public static class FileStream extends ContentStreamBase {
private final File file;
public FileStream(File f) {
file = f;
contentType = null; // ??
name = file.getName();
size = file.length();
sourceInfo = file.toURI().toString();
}
@Override
public String getContentType() {
if (contentType == null) {
contentType = attemptToDetermineContentType();
}
return contentType;
}
@Override
public InputStream getStream() throws IOException {
InputStream is = new FileInputStream(file);
String lowerName = name.toLowerCase(Locale.ROOT);
if (lowerName.endsWith(".gz") || lowerName.endsWith(".gzip")) {
is = new GZIPInputStream(is);
}
return is;
}
}
/** Construct a ContentStream
from a String
*/
public static class StringStream extends ContentStreamBase {
private final String str;
public StringStream(String str) {
this(str, detect(str));
}
public StringStream(String str, String contentType) {
this.str = str;
this.contentType = contentType;
name = null;
try {
size = (long) str.getBytes(DEFAULT_CHARSET).length;
} catch (UnsupportedEncodingException e) {
// won't happen
throw new RuntimeException(e);
}
sourceInfo = "string";
}
public static String detect(String str) {
String detectedContentType = null;
int lim = str.length() - 1;
for (int i = 0; i < lim; i++) {
char ch = str.charAt(i);
if (Character.isWhitespace(ch)) {
continue;
}
// first non-whitespace chars
if (ch == '#' // single line comment
|| (ch == '/'
&& (str.charAt(i + 1) == '/'
|| str.charAt(i + 1) == '*')) // single line or multi-line comment
|| (ch == '{' || ch == '[') // start of JSON object
) {
detectedContentType = APPLICATION_JSON;
} else if (ch == '<') {
detectedContentType = TEXT_XML;
}
break;
}
return detectedContentType;
}
@Override
public InputStream getStream() throws IOException {
return new ByteArrayInputStream(str.getBytes(DEFAULT_CHARSET));
}
/** If a charset is defined (by the contentType) use that, otherwise use a StringReader */
@Override
public Reader getReader() throws IOException {
String charset = getCharsetFromContentType(contentType);
return charset == null ? new StringReader(str) : new InputStreamReader(getStream(), charset);
}
}
/**
* Base reader implementation. If the contentType declares a charset use it, otherwise use
* "utf-8".
*/
@Override
public Reader getReader() throws IOException {
String charset = getCharsetFromContentType(getContentType());
return charset == null
? new InputStreamReader(getStream(), DEFAULT_CHARSET)
: new InputStreamReader(getStream(), charset);
}
// ------------------------------------------------------------------
// Getters / Setters for overrideable attributes
// ------------------------------------------------------------------
@Override
public String getContentType() {
return contentType;
}
public void setContentType(String contentType) {
this.contentType = contentType;
}
@Override
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
@Override
public Long getSize() {
return size;
}
public void setSize(Long size) {
this.size = size;
}
@Override
public String getSourceInfo() {
return sourceInfo;
}
public void setSourceInfo(String sourceInfo) {
this.sourceInfo = sourceInfo;
}
public static ContentStream create(
RequestWriter requestWriter, @SuppressWarnings({"rawtypes"}) SolrRequest req)
throws IOException {
ByteArrayOutputStream baos = new ByteArrayOutputStream();
RequestWriter.ContentWriter contentWriter = requestWriter.getContentWriter(req);
contentWriter.write(baos);
return new ByteArrayStream(baos.toByteArray(), null, contentWriter.getContentType());
}
/** Construct a ContentStream
from a File
*/
public static class ByteArrayStream extends ContentStreamBase {
private final byte[] bytes;
public ByteArrayStream(byte[] bytes, String source) {
this(bytes, source, null);
}
public ByteArrayStream(byte[] bytes, String source, String contentType) {
this.bytes = bytes;
this.contentType = contentType;
name = source;
size = (long) bytes.length;
sourceInfo = source;
}
@Override
public InputStream getStream() throws IOException {
return new ByteArrayInputStream(bytes);
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy