org.janusgraph.diskstorage.solr.Solr6Index Maven / Gradle / Ivy
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.janusgraph.diskstorage.solr;
import com.google.common.base.Joiner;
import com.google.common.base.Preconditions;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang.StringUtils;
import org.apache.http.HttpEntity;
import org.apache.http.HttpEntityEnclosingRequest;
import org.apache.http.HttpException;
import org.apache.http.HttpRequest;
import org.apache.http.HttpRequestInterceptor;
import org.apache.http.client.HttpClient;
import org.apache.http.entity.BufferedHttpEntity;
import org.apache.http.impl.auth.KerberosScheme;
import org.apache.http.protocol.HttpContext;
import org.apache.lucene.analysis.CachingTokenFilter;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
import org.apache.solr.client.solrj.SolrClient;
import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.impl.CloudSolrClient;
import org.apache.solr.client.solrj.impl.HttpClientUtil;
import org.apache.solr.client.solrj.impl.HttpSolrClient;
import org.apache.solr.client.solrj.impl.Krb5HttpClientBuilder;
import org.apache.solr.client.solrj.impl.LBHttpSolrClient;
import org.apache.solr.client.solrj.impl.PreemptiveAuth;
import org.apache.solr.client.solrj.impl.SolrHttpClientBuilder;
import org.apache.solr.client.solrj.request.CollectionAdminRequest;
import org.apache.solr.client.solrj.request.UpdateRequest;
import org.apache.solr.client.solrj.response.CollectionAdminResponse;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.client.solrj.util.ClientUtils;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.cloud.ClusterState;
import org.apache.solr.common.cloud.DocCollection;
import org.apache.solr.common.cloud.Replica;
import org.apache.solr.common.cloud.Slice;
import org.apache.solr.common.cloud.ZkStateReader;
import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.zookeeper.KeeperException;
import org.janusgraph.core.Cardinality;
import org.janusgraph.core.JanusGraphElement;
import org.janusgraph.core.attribute.Cmp;
import org.janusgraph.core.attribute.Geo;
import org.janusgraph.core.attribute.Geoshape;
import org.janusgraph.core.attribute.Text;
import org.janusgraph.core.schema.Mapping;
import org.janusgraph.core.schema.Parameter;
import org.janusgraph.diskstorage.BackendException;
import org.janusgraph.diskstorage.BaseTransaction;
import org.janusgraph.diskstorage.BaseTransactionConfig;
import org.janusgraph.diskstorage.BaseTransactionConfigurable;
import org.janusgraph.diskstorage.PermanentBackendException;
import org.janusgraph.diskstorage.TemporaryBackendException;
import org.janusgraph.diskstorage.configuration.ConfigOption;
import org.janusgraph.diskstorage.configuration.Configuration;
import org.janusgraph.diskstorage.indexing.IndexEntry;
import org.janusgraph.diskstorage.indexing.IndexFeatures;
import org.janusgraph.diskstorage.indexing.IndexMutation;
import org.janusgraph.diskstorage.indexing.IndexProvider;
import org.janusgraph.diskstorage.indexing.IndexQuery;
import org.janusgraph.diskstorage.indexing.KeyInformation;
import org.janusgraph.diskstorage.indexing.RawQuery;
import org.janusgraph.diskstorage.solr.transform.GeoToWktConverter;
import org.janusgraph.diskstorage.util.DefaultTransaction;
import org.janusgraph.graphdb.configuration.PreInitializeConfigOptions;
import org.janusgraph.graphdb.database.serialize.AttributeUtil;
import org.janusgraph.graphdb.internal.Order;
import org.janusgraph.graphdb.query.JanusGraphPredicate;
import org.janusgraph.graphdb.query.condition.And;
import org.janusgraph.graphdb.query.condition.Condition;
import org.janusgraph.graphdb.query.condition.Not;
import org.janusgraph.graphdb.query.condition.Or;
import org.janusgraph.graphdb.query.condition.PredicateCondition;
import org.janusgraph.graphdb.types.ParameterType;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.io.StringReader;
import java.io.UncheckedIOException;
import java.lang.reflect.Constructor;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.time.Instant;
import java.util.AbstractMap.SimpleEntry;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.Spliterator;
import java.util.Spliterators;
import java.util.TimeZone;
import java.util.UUID;
import java.util.function.Function;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import java.util.stream.StreamSupport;
import static org.janusgraph.diskstorage.solr.SolrIndex.DYNAMIC_FIELDS;
import static org.janusgraph.diskstorage.solr.SolrIndex.HTTP_ALLOW_COMPRESSION;
import static org.janusgraph.diskstorage.solr.SolrIndex.HTTP_CONNECTION_TIMEOUT;
import static org.janusgraph.diskstorage.solr.SolrIndex.HTTP_GLOBAL_MAX_CONNECTIONS;
import static org.janusgraph.diskstorage.solr.SolrIndex.HTTP_MAX_CONNECTIONS_PER_HOST;
import static org.janusgraph.diskstorage.solr.SolrIndex.HTTP_URLS;
import static org.janusgraph.diskstorage.solr.SolrIndex.KERBEROS_ENABLED;
import static org.janusgraph.diskstorage.solr.SolrIndex.KEY_FIELD_NAMES;
import static org.janusgraph.diskstorage.solr.SolrIndex.MAX_SHARDS_PER_NODE;
import static org.janusgraph.diskstorage.solr.SolrIndex.NUM_SHARDS;
import static org.janusgraph.diskstorage.solr.SolrIndex.REPLICATION_FACTOR;
import static org.janusgraph.diskstorage.solr.SolrIndex.SOLR_DEFAULT_CONFIG;
import static org.janusgraph.diskstorage.solr.SolrIndex.SOLR_MODE;
import static org.janusgraph.diskstorage.solr.SolrIndex.SOLR_NS;
import static org.janusgraph.diskstorage.solr.SolrIndex.TTL_FIELD;
import static org.janusgraph.diskstorage.solr.SolrIndex.WAIT_SEARCHER;
import static org.janusgraph.graphdb.configuration.GraphDatabaseConfiguration.INDEX_MAX_RESULT_SET_SIZE;
/**
* NOTE: Copied from JanusGraph for supporting Kerberos and adding support for multiple zookeeper clients. Do not change
* This is a copy of SolrIndex.java from org.janusgraph.diskstorage.solr
*/
@PreInitializeConfigOptions
public class Solr6Index implements IndexProvider {
private static final Logger logger = LoggerFactory.getLogger(Solr6Index.class);
private static final String DEFAULT_ID_FIELD = "id";
private static final char CHROOT_START_CHAR = '/';
private enum Mode {
HTTP, CLOUD;
public static Mode parse(String mode) {
for (final Mode m : Mode.values()) {
if (m.toString().equalsIgnoreCase(mode)) return m;
}
throw new IllegalArgumentException("Unrecognized mode: "+mode);
}
}
public static final ConfigOption ZOOKEEPER_URLS = new ConfigOption<>(SOLR_NS,"zookeeper-urls",
"URL of the Zookeeper instance coordinating the SolrCloud cluster",
ConfigOption.Type.MASKABLE, new String[]{"localhost:2181"});
private static final IndexFeatures SOLR_FEATURES = new IndexFeatures.Builder()
.supportsDocumentTTL()
.setDefaultStringMapping(Mapping.TEXT)
.supportedStringMappings(Mapping.TEXT, Mapping.STRING)
.supportsCardinality(Cardinality.SINGLE)
.supportsCardinality(Cardinality.LIST)
.supportsCardinality(Cardinality.SET)
.supportsCustomAnalyzer()
.supportsGeoContains()
.build();
private static final Map SPATIAL_PREDICATES = spatialPredicates();
private final SolrClient solrClient;
private final Configuration configuration;
private final Mode mode;
private final boolean dynFields;
private final Map keyFieldIds;
private final String ttlField;
private final int batchSize;
private final boolean waitSearcher;
private final boolean kerberosEnabled;
public Solr6Index(final Configuration config) throws BackendException {
Preconditions.checkArgument(config!=null);
configuration = config;
mode = Mode.parse(config.get(SOLR_MODE));
kerberosEnabled = config.get(KERBEROS_ENABLED);
dynFields = config.get(DYNAMIC_FIELDS);
keyFieldIds = parseKeyFieldsForCollections(config);
batchSize = config.get(INDEX_MAX_RESULT_SET_SIZE);
ttlField = config.get(TTL_FIELD);
waitSearcher = config.get(WAIT_SEARCHER);
if (kerberosEnabled) {
logger.debug("Kerberos is enabled. Configuring SOLR for Kerberos.");
configureSolrClientsForKerberos();
} else {
logger.debug("Kerberos is NOT enabled.");
logger.debug("KERBEROS_ENABLED name is " + KERBEROS_ENABLED.getName() + " and it is" + (KERBEROS_ENABLED.isOption() ? " " : " not") + " an option.");
logger.debug("KERBEROS_ENABLED type is " + KERBEROS_ENABLED.getType().name());
}
final ModifiableSolrParams clientParams = new ModifiableSolrParams();
switch (mode) {
case CLOUD:
/* ATLAS-2920: Update JanusGraph Solr clients to use all zookeeper entries – start */
final List zookeeperUrls = getZookeeperURLs(config);
/* ATLAS-2920: end */
final CloudSolrClient cloudServer = new CloudSolrClient.Builder()
.withLBHttpSolrClientBuilder(
new LBHttpSolrClient.Builder()
.withHttpSolrClientBuilder(new HttpSolrClient.Builder().withInvariantParams(clientParams))
.withBaseSolrUrls(config.get(HTTP_URLS))
)
.withZkHost(zookeeperUrls)
.sendUpdatesOnlyToShardLeaders()
.build();
cloudServer.connect();
solrClient = cloudServer;
break;
case HTTP:
clientParams.add(HttpClientUtil.PROP_ALLOW_COMPRESSION, config.get(HTTP_ALLOW_COMPRESSION).toString());
clientParams.add(HttpClientUtil.PROP_CONNECTION_TIMEOUT, config.get(HTTP_CONNECTION_TIMEOUT).toString());
clientParams.add(HttpClientUtil.PROP_MAX_CONNECTIONS_PER_HOST, config.get(HTTP_MAX_CONNECTIONS_PER_HOST).toString());
clientParams.add(HttpClientUtil.PROP_MAX_CONNECTIONS, config.get(HTTP_GLOBAL_MAX_CONNECTIONS).toString());
final HttpClient client = HttpClientUtil.createClient(clientParams);
solrClient = new LBHttpSolrClient.Builder()
.withHttpClient(client)
.withBaseSolrUrls(config.get(HTTP_URLS))
.build();
break;
default:
throw new IllegalArgumentException("Unsupported Solr operation mode: " + mode);
}
}
private void configureSolrClientsForKerberos() throws PermanentBackendException {
String kerberosConfig = System.getProperty("java.security.auth.login.config");
if(kerberosConfig == null) {
throw new PermanentBackendException("Unable to configure kerberos for solr client. System property 'java.security.auth.login.config' is not set.");
}
logger.debug("Using kerberos configuration file located at '{}'.", kerberosConfig);
try(Krb5HttpClientBuilder krbBuild = new Krb5HttpClientBuilder()) {
SolrHttpClientBuilder kb = krbBuild.getBuilder();
HttpClientUtil.setHttpClientBuilder(kb);
HttpRequestInterceptor bufferedEntityInterceptor = new HttpRequestInterceptor() {
@Override
public void process(HttpRequest request, HttpContext context) throws HttpException, IOException {
if(request instanceof HttpEntityEnclosingRequest) {
HttpEntityEnclosingRequest enclosingRequest = ((HttpEntityEnclosingRequest) request);
HttpEntity requestEntity = enclosingRequest.getEntity();
enclosingRequest.setEntity(new BufferedHttpEntity(requestEntity));
}
}
};
HttpClientUtil.addRequestInterceptor(bufferedEntityInterceptor);
HttpRequestInterceptor preemptiveAuth = new PreemptiveAuth(new KerberosScheme());
HttpClientUtil.addRequestInterceptor(preemptiveAuth);
}
}
private Map parseKeyFieldsForCollections(Configuration config) throws BackendException {
final Map keyFieldNames = new HashMap<>();
final String[] collectionFieldStatements = config.has(KEY_FIELD_NAMES) ? config.get(KEY_FIELD_NAMES) : new String[0];
for (final String collectionFieldStatement : collectionFieldStatements) {
final String[] parts = collectionFieldStatement.trim().split("=");
if (parts.length != 2) {
throw new PermanentBackendException(
"Unable to parse the collection name / key field name pair. It should be of the format collection=field");
}
final String collectionName = parts[0];
final String keyFieldName = parts[1];
keyFieldNames.put(collectionName, keyFieldName);
}
return keyFieldNames;
}
private String getKeyFieldId(String collection) {
String field = keyFieldIds.get(collection);
if (field==null) field = DEFAULT_ID_FIELD;
return field;
}
/**
* Unlike the ElasticSearch Index, which is schema free, Solr requires a schema to
* support searching. This means that you will need to modify the solr schema with the
* appropriate field definitions in order to work properly. If you have a running instance
* of Solr and you modify its schema with new fields, don't forget to re-index!
* @param store Index store
* @param key New key to register
* @param information data type to register for the key
* @param tx enclosing transaction
* @throws BackendException in case an exception is thrown when
* creating a collection.
*/
@SuppressWarnings("unchecked")
@Override
public void register(String store, String key, KeyInformation information, BaseTransaction tx)
throws BackendException {
if (mode== Mode.CLOUD) {
final CloudSolrClient client = (CloudSolrClient) solrClient;
try {
createCollectionIfNotExists(client, configuration, store);
} catch (final IOException | SolrServerException | InterruptedException | KeeperException e) {
throw new PermanentBackendException(e);
}
}
//Since all data types must be defined in the schema.xml, pre-registering a type does not work
//But we check Analyse feature
String analyzer = ParameterType.STRING_ANALYZER.findParameter(information.getParameters(), null);
if (analyzer != null) {
//If the key have a tokenizer, we try to get it by reflection
try {
((Constructor) ClassLoader.getSystemClassLoader().loadClass(analyzer)
.getConstructor()).newInstance();
} catch (final ReflectiveOperationException e) {
throw new PermanentBackendException(e.getMessage(),e);
}
}
analyzer = ParameterType.TEXT_ANALYZER.findParameter(information.getParameters(), null);
if (analyzer != null) {
//If the key have a tokenizer, we try to get it by reflection
try {
((Constructor) ClassLoader.getSystemClassLoader().loadClass(analyzer)
.getConstructor()).newInstance();
} catch (final ReflectiveOperationException e) {
throw new PermanentBackendException(e.getMessage(),e);
}
}
}
@Override
public void mutate(Map> mutations, KeyInformation.IndexRetriever information,
BaseTransaction tx) throws BackendException {
logger.debug("Mutating SOLR");
try {
for (final Map.Entry> stores : mutations.entrySet()) {
final String collectionName = stores.getKey();
final String keyIdField = getKeyFieldId(collectionName);
final List deleteIds = new ArrayList<>();
final Collection changes = new ArrayList<>();
for (final Map.Entry entry : stores.getValue().entrySet()) {
final String docId = entry.getKey();
final IndexMutation mutation = entry.getValue();
Preconditions.checkArgument(!(mutation.isNew() && mutation.isDeleted()));
Preconditions.checkArgument(!mutation.isNew() || !mutation.hasDeletions());
Preconditions.checkArgument(!mutation.isDeleted() || !mutation.hasAdditions());
//Handle any deletions
if (mutation.hasDeletions()) {
if (mutation.isDeleted()) {
logger.trace("Deleting entire document {}", docId);
deleteIds.add(docId);
} else {
final List fieldDeletions = new ArrayList<>(mutation.getDeletions());
if (mutation.hasAdditions()) {
for (final IndexEntry indexEntry : mutation.getAdditions()) {
fieldDeletions.remove(indexEntry);
}
}
handleRemovalsFromIndex(collectionName, keyIdField, docId, fieldDeletions, information);
}
}
if (mutation.hasAdditions()) {
final int ttl = mutation.determineTTL();
final SolrInputDocument doc = new SolrInputDocument();
doc.setField(keyIdField, docId);
final boolean isNewDoc = mutation.isNew();
if (isNewDoc)
logger.trace("Adding new document {}", docId);
final Map adds = collectFieldValues(mutation.getAdditions(), collectionName,
information);
// If cardinality is not single then we should use the "add" operation to update
// the index so we don't overwrite existing values.
adds.keySet().forEach(v-> {
final KeyInformation keyInformation = information.get(collectionName, v);
final String solrOp = keyInformation.getCardinality() == Cardinality.SINGLE ? "set" : "add";
doc.setField(v, isNewDoc ? adds.get(v) :
new HashMap(1) {{put(solrOp, adds.get(v));}}
);
});
if (ttl>0) {
Preconditions.checkArgument(isNewDoc,
"Solr only supports TTL on new documents [%s]", docId);
doc.setField(ttlField, String.format("+%dSECONDS", ttl));
}
changes.add(doc);
}
}
commitDeletes(collectionName, deleteIds);
commitChanges(collectionName, changes);
}
} catch (final IllegalArgumentException e) {
throw new PermanentBackendException("Unable to complete query on Solr.", e);
} catch (final Exception e) {
throw storageException(e);
}
}
private void handleRemovalsFromIndex(String collectionName, String keyIdField, String docId,
List fieldDeletions, KeyInformation.IndexRetriever information)
throws SolrServerException, IOException, BackendException {
final Map fieldDeletes = new HashMap<>(1);
fieldDeletes.put("set", null);
final SolrInputDocument doc = new SolrInputDocument();
doc.addField(keyIdField, docId);
for(final IndexEntry v: fieldDeletions) {
final KeyInformation keyInformation = information.get(collectionName, v.field);
// If the cardinality is a Set or List, we just need to remove the individual value
// received in the mutation and not set the field to null, but we still consolidate the values
// in the event of multiple removals in one mutation.
final Map deletes = collectFieldValues(fieldDeletions, collectionName, information);
deletes.keySet().forEach(vertex -> {
final Map remove;
if (keyInformation.getCardinality() == Cardinality.SINGLE) {
remove = (Map) fieldDeletes;
} else {
remove = new HashMap<>(1);
remove.put("remove", deletes.get(vertex));
}
doc.setField(vertex, remove);
});
}
final UpdateRequest singleDocument = newUpdateRequest();
singleDocument.add(doc);
solrClient.request(singleDocument, collectionName);
}
private Object convertValue(Object value) throws BackendException {
if (value instanceof Geoshape) {
return GeoToWktConverter.convertToWktString((Geoshape) value);
}
if (value instanceof UUID) {
return value.toString();
}
if(value instanceof Instant) {
if(Math.floorMod(((Instant) value).getNano(), 1000000) != 0) {
throw new IllegalArgumentException("Solr indexes do not support nanoseconds");
}
return new Date(((Instant) value).toEpochMilli());
}
return value;
}
@Override
public void restore(Map>> documents,
KeyInformation.IndexRetriever information, BaseTransaction tx) throws BackendException {
try {
for (final Map.Entry>> stores : documents.entrySet()) {
final String collectionName = stores.getKey();
final List deleteIds = new ArrayList<>();
final List newDocuments = new ArrayList<>();
for (final Map.Entry> entry : stores.getValue().entrySet()) {
final String docID = entry.getKey();
final List content = entry.getValue();
if (content == null || content.isEmpty()) {
if (logger.isTraceEnabled())
logger.trace("Deleting document [{}]", docID);
deleteIds.add(docID);
continue;
}
final SolrInputDocument doc = new SolrInputDocument();
doc.setField(getKeyFieldId(collectionName), docID);
final Map adds = collectFieldValues(content, collectionName, information);
adds.forEach(doc::setField);
newDocuments.add(doc);
}
commitDeletes(collectionName, deleteIds);
commitChanges(collectionName, newDocuments);
}
} catch (final Exception e) {
throw new TemporaryBackendException("Could not restore Solr index", e);
}
}
// This method will create a map of field ids to values. In the case of multiValued fields,
// it will consolidate all the values into one List or Set so it can be updated with a single Solr operation
private Map collectFieldValues(List content, String collectionName,
KeyInformation.IndexRetriever information) throws BackendException {
final Map docs = new HashMap<>();
for (final IndexEntry addition: content) {
final KeyInformation keyInformation = information.get(collectionName, addition.field);
switch (keyInformation.getCardinality()) {
case SINGLE:
docs.put(addition.field, convertValue(addition.value));
break;
case SET:
if (!docs.containsKey(addition.field)) {
docs.put(addition.field, new HashSet<>());
}
((Set