org.apache.solr.handler.designer.DefaultSampleDocumentsLoader Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of solr-core Show documentation
Show all versions of solr-core Show documentation
Apache Solr (module: core)
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler.designer;
import static org.apache.solr.common.params.CommonParams.JSON_MIME;
import static org.apache.solr.handler.loader.CSVLoaderBase.SEPARATOR;
import java.io.BufferedReader;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.Reader;
import java.io.StringReader;
import java.lang.invoke.MethodHandles;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
import javax.xml.stream.XMLInputFactory;
import javax.xml.stream.XMLStreamConstants;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.XMLStreamReader;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.SolrInputField;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.ContentStream;
import org.apache.solr.common.util.ContentStreamBase;
import org.apache.solr.handler.loader.CSVLoaderBase;
import org.apache.solr.handler.loader.JsonLoader;
import org.apache.solr.handler.loader.XMLLoader;
import org.apache.solr.request.SolrQueryRequestBase;
import org.apache.solr.response.SolrQueryResponse;
import org.apache.solr.update.processor.UpdateRequestProcessor;
import org.apache.solr.util.SafeXMLParsing;
import org.noggit.JSONParser;
import org.noggit.ObjectBuilder;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.xml.sax.SAXException;
public class DefaultSampleDocumentsLoader implements SampleDocumentsLoader {
public static final String CSV_MULTI_VALUE_DELIM_PARAM = "csvMultiValueDelimiter";
private static final int MAX_STREAM_SIZE = (5 * 1024 * 1024);
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
public static byte[] streamAsBytes(final InputStream in) throws IOException {
ByteArrayOutputStream baos = new ByteArrayOutputStream();
byte[] buf = new byte[1024];
int r;
try (in) {
while ((r = in.read(buf)) != -1) baos.write(buf, 0, r);
}
return baos.toByteArray();
}
@Override
public SampleDocuments parseDocsFromStream(
SolrParams params, ContentStream stream, final int maxDocsToLoad) throws IOException {
final String contentType = stream.getContentType();
if (contentType == null) {
return SampleDocuments.NONE;
}
if (params == null) {
params = new ModifiableSolrParams();
}
Long streamSize = stream.getSize();
if (streamSize != null && streamSize > MAX_STREAM_SIZE) {
throw new SolrException(
SolrException.ErrorCode.BAD_REQUEST,
"Sample is too big! "
+ MAX_STREAM_SIZE
+ " bytes is the max upload size for sample documents.");
}
String fileSource = "paste";
if ("file".equals(stream.getName())) {
fileSource = stream.getSourceInfo() != null ? stream.getSourceInfo() : "file";
}
byte[] uploadedBytes = streamAsBytes(stream.getStream());
// recheck the upload size in case the stream returned null for getSize
if (uploadedBytes.length > MAX_STREAM_SIZE) {
throw new SolrException(
SolrException.ErrorCode.BAD_REQUEST,
"Sample is too big! "
+ MAX_STREAM_SIZE
+ " bytes is the max upload size for sample documents.");
}
// use a byte stream for the parsers in case they need to re-parse using a different strategy
// e.g. JSON vs. JSON lines or different CSV strategies ...
ContentStreamBase.ByteArrayStream byteStream =
new ContentStreamBase.ByteArrayStream(uploadedBytes, fileSource, contentType);
String charset = ContentStreamBase.getCharsetFromContentType(stream.getContentType());
if (charset == null) {
charset = ContentStreamBase.DEFAULT_CHARSET;
}
List docs = null;
if (stream.getSize() > 0) {
if (contentType.contains(JSON_MIME)) {
docs = loadJsonDocs(params, byteStream, maxDocsToLoad);
} else if (contentType.contains("text/xml") || contentType.contains("application/xml")) {
docs = loadXmlDocs(params, byteStream, maxDocsToLoad);
} else if (contentType.contains("text/csv") || contentType.contains("application/csv")) {
docs = loadCsvDocs(params, fileSource, uploadedBytes, charset, maxDocsToLoad);
} else if (contentType.contains("text/plain")
|| contentType.contains("application/octet-stream")) {
docs = loadJsonLines(params, byteStream, maxDocsToLoad);
} else {
throw new SolrException(
SolrException.ErrorCode.BAD_REQUEST, contentType + " not supported yet!");
}
if (docs != null && maxDocsToLoad > 0 && docs.size() > maxDocsToLoad) {
docs = docs.subList(0, maxDocsToLoad);
}
}
return new SampleDocuments(docs, contentType, fileSource);
}
protected List loadCsvDocs(
SolrParams params, String source, byte[] streamBytes, String charset, final int maxDocsToLoad)
throws IOException {
ContentStream stream;
if (params.get(SEPARATOR) == null) {
String csvStr = new String(streamBytes, charset);
char sep = detectTSV(csvStr);
ModifiableSolrParams modifiableSolrParams = new ModifiableSolrParams(params);
modifiableSolrParams.set(SEPARATOR, String.valueOf(sep));
params = modifiableSolrParams;
stream = new ContentStreamBase.StringStream(csvStr, "text/csv");
} else {
stream = new ContentStreamBase.ByteArrayStream(streamBytes, source, "text/csv");
}
return (new SampleCSVLoader(new CSVRequest(params), maxDocsToLoad)).loadDocs(stream);
}
@SuppressWarnings("unchecked")
protected List loadJsonLines(
SolrParams params, ContentStreamBase.ByteArrayStream stream, final int maxDocsToLoad)
throws IOException {
List