org.janusgraph.diskstorage.solr.Solr6Index Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of atlas-graphdb-janus
Apache Atlas JanusGraph DB Impl
There is a newer version: 2.3.0
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 * 
 * http://www.apache.org/licenses/LICENSE-2.0
 * 

 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.janusgraph.diskstorage.solr;

import com.google.common.base.Joiner;
import com.google.common.base.Preconditions;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang.StringUtils;
import org.apache.http.HttpEntity;
import org.apache.http.HttpEntityEnclosingRequest;
import org.apache.http.HttpException;
import org.apache.http.HttpRequest;
import org.apache.http.HttpRequestInterceptor;
import org.apache.http.client.HttpClient;
import org.apache.http.entity.BufferedHttpEntity;
import org.apache.http.impl.auth.KerberosScheme;
import org.apache.http.protocol.HttpContext;
import org.apache.lucene.analysis.CachingTokenFilter;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
import org.apache.solr.client.solrj.SolrClient;
import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.impl.CloudSolrClient;
import org.apache.solr.client.solrj.impl.HttpClientUtil;
import org.apache.solr.client.solrj.impl.HttpSolrClient;
import org.apache.solr.client.solrj.impl.Krb5HttpClientBuilder;
import org.apache.solr.client.solrj.impl.LBHttpSolrClient;
import org.apache.solr.client.solrj.impl.PreemptiveAuth;
import org.apache.solr.client.solrj.impl.SolrHttpClientBuilder;
import org.apache.solr.client.solrj.request.CollectionAdminRequest;
import org.apache.solr.client.solrj.request.UpdateRequest;
import org.apache.solr.client.solrj.response.CollectionAdminResponse;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.client.solrj.util.ClientUtils;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.cloud.ClusterState;
import org.apache.solr.common.cloud.DocCollection;
import org.apache.solr.common.cloud.Replica;
import org.apache.solr.common.cloud.Slice;
import org.apache.solr.common.cloud.ZkStateReader;
import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.zookeeper.KeeperException;
import org.janusgraph.core.Cardinality;
import org.janusgraph.core.JanusGraphElement;
import org.janusgraph.core.attribute.Cmp;
import org.janusgraph.core.attribute.Geo;
import org.janusgraph.core.attribute.Geoshape;
import org.janusgraph.core.attribute.Text;
import org.janusgraph.core.schema.Mapping;
import org.janusgraph.core.schema.Parameter;
import org.janusgraph.diskstorage.BackendException;
import org.janusgraph.diskstorage.BaseTransaction;
import org.janusgraph.diskstorage.BaseTransactionConfig;
import org.janusgraph.diskstorage.BaseTransactionConfigurable;
import org.janusgraph.diskstorage.PermanentBackendException;
import org.janusgraph.diskstorage.TemporaryBackendException;
import org.janusgraph.diskstorage.configuration.ConfigOption;
import org.janusgraph.diskstorage.configuration.Configuration;
import org.janusgraph.diskstorage.indexing.IndexEntry;
import org.janusgraph.diskstorage.indexing.IndexFeatures;
import org.janusgraph.diskstorage.indexing.IndexMutation;
import org.janusgraph.diskstorage.indexing.IndexProvider;
import org.janusgraph.diskstorage.indexing.IndexQuery;
import org.janusgraph.diskstorage.indexing.KeyInformation;
import org.janusgraph.diskstorage.indexing.RawQuery;
import org.janusgraph.diskstorage.solr.transform.GeoToWktConverter;
import org.janusgraph.diskstorage.util.DefaultTransaction;
import org.janusgraph.graphdb.configuration.PreInitializeConfigOptions;
import org.janusgraph.graphdb.database.serialize.AttributeUtil;
import org.janusgraph.graphdb.internal.Order;
import org.janusgraph.graphdb.query.JanusGraphPredicate;
import org.janusgraph.graphdb.query.condition.And;
import org.janusgraph.graphdb.query.condition.Condition;
import org.janusgraph.graphdb.query.condition.Not;
import org.janusgraph.graphdb.query.condition.Or;
import org.janusgraph.graphdb.query.condition.PredicateCondition;
import org.janusgraph.graphdb.types.ParameterType;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.io.StringReader;
import java.io.UncheckedIOException;
import java.lang.reflect.Constructor;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.time.Instant;
import java.util.AbstractMap.SimpleEntry;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.Spliterator;
import java.util.Spliterators;
import java.util.TimeZone;
import java.util.UUID;
import java.util.function.Function;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import java.util.stream.StreamSupport;

import static org.janusgraph.diskstorage.solr.SolrIndex.DYNAMIC_FIELDS;
import static org.janusgraph.diskstorage.solr.SolrIndex.HTTP_ALLOW_COMPRESSION;
import static org.janusgraph.diskstorage.solr.SolrIndex.HTTP_CONNECTION_TIMEOUT;
import static org.janusgraph.diskstorage.solr.SolrIndex.HTTP_GLOBAL_MAX_CONNECTIONS;
import static org.janusgraph.diskstorage.solr.SolrIndex.HTTP_MAX_CONNECTIONS_PER_HOST;
import static org.janusgraph.diskstorage.solr.SolrIndex.HTTP_URLS;
import static org.janusgraph.diskstorage.solr.SolrIndex.KERBEROS_ENABLED;
import static org.janusgraph.diskstorage.solr.SolrIndex.KEY_FIELD_NAMES;
import static org.janusgraph.diskstorage.solr.SolrIndex.MAX_SHARDS_PER_NODE;
import static org.janusgraph.diskstorage.solr.SolrIndex.NUM_SHARDS;
import static org.janusgraph.diskstorage.solr.SolrIndex.REPLICATION_FACTOR;
import static org.janusgraph.diskstorage.solr.SolrIndex.SOLR_DEFAULT_CONFIG;
import static org.janusgraph.diskstorage.solr.SolrIndex.SOLR_MODE;
import static org.janusgraph.diskstorage.solr.SolrIndex.SOLR_NS;
import static org.janusgraph.diskstorage.solr.SolrIndex.TTL_FIELD;
import static org.janusgraph.diskstorage.solr.SolrIndex.WAIT_SEARCHER;
import static org.janusgraph.graphdb.configuration.GraphDatabaseConfiguration.INDEX_MAX_RESULT_SET_SIZE;

/**
 * NOTE: Copied from JanusGraph for supporting Kerberos and adding support for multiple zookeeper clients. Do not change
 * This is a copy of SolrIndex.java from org.janusgraph.diskstorage.solr
 */
@PreInitializeConfigOptions
public class Solr6Index implements IndexProvider {

    private static final Logger logger = LoggerFactory.getLogger(Solr6Index.class);


    private static final String DEFAULT_ID_FIELD  = "id";
    private static final char   CHROOT_START_CHAR = '/';

    private enum Mode {
        HTTP, CLOUD;

        public static Mode parse(String mode) {
            for (final Mode m : Mode.values()) {
                if (m.toString().equalsIgnoreCase(mode)) return m;
            }
            throw new IllegalArgumentException("Unrecognized mode: "+mode);
        }

    }

    public static final ConfigOption ZOOKEEPER_URLS = new ConfigOption<>(SOLR_NS,"zookeeper-urls",
            "URL of the Zookeeper instance coordinating the SolrCloud cluster",
            ConfigOption.Type.MASKABLE, new String[]{"localhost:2181"});

    private static final IndexFeatures SOLR_FEATURES = new IndexFeatures.Builder()
            .supportsDocumentTTL()
            .setDefaultStringMapping(Mapping.TEXT)
            .supportedStringMappings(Mapping.TEXT, Mapping.STRING)
            .supportsCardinality(Cardinality.SINGLE)
            .supportsCardinality(Cardinality.LIST)
            .supportsCardinality(Cardinality.SET)
            .supportsCustomAnalyzer()
            .supportsGeoContains()
            .build();

    private static final Map SPATIAL_PREDICATES = spatialPredicates();

    private final SolrClient solrClient;
    private final Configuration configuration;
    private final Mode mode;
    private final boolean dynFields;
    private final Map keyFieldIds;
    private final String ttlField;
    private final int batchSize;
    private final boolean waitSearcher;
    private final boolean kerberosEnabled;

    public Solr6Index(final Configuration config) throws BackendException {
        Preconditions.checkArgument(config!=null);
        configuration = config;
        mode = Mode.parse(config.get(SOLR_MODE));
        kerberosEnabled = config.get(KERBEROS_ENABLED);
        dynFields = config.get(DYNAMIC_FIELDS);
        keyFieldIds = parseKeyFieldsForCollections(config);
        batchSize = config.get(INDEX_MAX_RESULT_SET_SIZE);
        ttlField = config.get(TTL_FIELD);
        waitSearcher = config.get(WAIT_SEARCHER);

        if (kerberosEnabled) {
            logger.debug("Kerberos is enabled. Configuring SOLR for Kerberos.");
            configureSolrClientsForKerberos();
        } else {
            logger.debug("Kerberos is NOT enabled.");
            logger.debug("KERBEROS_ENABLED name is " + KERBEROS_ENABLED.getName() + " and it is" + (KERBEROS_ENABLED.isOption() ? " " : " not") + " an option.");
            logger.debug("KERBEROS_ENABLED type is " + KERBEROS_ENABLED.getType().name());
        }
        final ModifiableSolrParams clientParams = new ModifiableSolrParams();
        switch (mode) {
            case CLOUD:
                /* ATLAS-2920: Update JanusGraph Solr clients to use all zookeeper entries – start */
                final List zookeeperUrls = getZookeeperURLs(config);
                /* ATLAS-2920: end */
                final CloudSolrClient cloudServer = new CloudSolrClient.Builder()
                        .withLBHttpSolrClientBuilder(
                                new LBHttpSolrClient.Builder()
                                        .withHttpSolrClientBuilder(new HttpSolrClient.Builder().withInvariantParams(clientParams))
                                        .withBaseSolrUrls(config.get(HTTP_URLS))
                        )
                        .withZkHost(zookeeperUrls)
                        .sendUpdatesOnlyToShardLeaders()
                        .build();
                cloudServer.connect();
                solrClient = cloudServer;
                break;
            case HTTP:
                clientParams.add(HttpClientUtil.PROP_ALLOW_COMPRESSION, config.get(HTTP_ALLOW_COMPRESSION).toString());
                clientParams.add(HttpClientUtil.PROP_CONNECTION_TIMEOUT, config.get(HTTP_CONNECTION_TIMEOUT).toString());
                clientParams.add(HttpClientUtil.PROP_MAX_CONNECTIONS_PER_HOST, config.get(HTTP_MAX_CONNECTIONS_PER_HOST).toString());
                clientParams.add(HttpClientUtil.PROP_MAX_CONNECTIONS, config.get(HTTP_GLOBAL_MAX_CONNECTIONS).toString());
                final HttpClient client = HttpClientUtil.createClient(clientParams);
                solrClient = new LBHttpSolrClient.Builder()
                        .withHttpClient(client)
                        .withBaseSolrUrls(config.get(HTTP_URLS))
                        .build();


                break;
            default:
                throw new IllegalArgumentException("Unsupported Solr operation mode: " + mode);
        }
    }

    private void configureSolrClientsForKerberos() throws PermanentBackendException {
        String kerberosConfig = System.getProperty("java.security.auth.login.config");
        if(kerberosConfig == null) {
            throw new PermanentBackendException("Unable to configure kerberos for solr client. System property 'java.security.auth.login.config' is not set.");
        }
        logger.debug("Using kerberos configuration file located at '{}'.", kerberosConfig);
        try(Krb5HttpClientBuilder krbBuild = new Krb5HttpClientBuilder()) {

            SolrHttpClientBuilder kb = krbBuild.getBuilder();
            HttpClientUtil.setHttpClientBuilder(kb);
            HttpRequestInterceptor bufferedEntityInterceptor = new HttpRequestInterceptor() {
                @Override
                public void process(HttpRequest request, HttpContext context) throws HttpException, IOException {
                    if(request instanceof HttpEntityEnclosingRequest) {
                        HttpEntityEnclosingRequest enclosingRequest = ((HttpEntityEnclosingRequest) request);
                        HttpEntity requestEntity = enclosingRequest.getEntity();
                        enclosingRequest.setEntity(new BufferedHttpEntity(requestEntity));
                    }
                }
            };
            HttpClientUtil.addRequestInterceptor(bufferedEntityInterceptor);

            HttpRequestInterceptor preemptiveAuth = new PreemptiveAuth(new KerberosScheme());
            HttpClientUtil.addRequestInterceptor(preemptiveAuth);
        }
    }

    private Map parseKeyFieldsForCollections(Configuration config) throws BackendException {
        final Map keyFieldNames = new HashMap<>();
        final String[] collectionFieldStatements = config.has(KEY_FIELD_NAMES) ? config.get(KEY_FIELD_NAMES) : new String[0];
        for (final String collectionFieldStatement : collectionFieldStatements) {
            final String[] parts = collectionFieldStatement.trim().split("=");
            if (parts.length != 2) {
                throw new PermanentBackendException(
                        "Unable to parse the collection name / key field name pair. It should be of the format collection=field");
            }
            final String collectionName = parts[0];
            final String keyFieldName = parts[1];
            keyFieldNames.put(collectionName, keyFieldName);
        }
        return keyFieldNames;
    }

    private String getKeyFieldId(String collection) {
        String field = keyFieldIds.get(collection);
        if (field==null) field = DEFAULT_ID_FIELD;
        return field;
    }

    /**
     * Unlike the ElasticSearch Index, which is schema free, Solr requires a schema to
     * support searching. This means that you will need to modify the solr schema with the
     * appropriate field definitions in order to work properly.  If you have a running instance
     * of Solr and you modify its schema with new fields, don't forget to re-index!
     * @param store Index store
     * @param key New key to register
     * @param information data type to register for the key
     * @param tx enclosing transaction
     * @throws BackendException in case an exception is thrown when
     * creating a collection.
     */
    @SuppressWarnings("unchecked")
    @Override
    public void register(String store, String key, KeyInformation information, BaseTransaction tx)
            throws BackendException {
        if (mode== Mode.CLOUD) {
            final CloudSolrClient client = (CloudSolrClient) solrClient;
            try {
                createCollectionIfNotExists(client, configuration, store);
            } catch (final IOException | SolrServerException | InterruptedException | KeeperException e) {
                throw new PermanentBackendException(e);
            }
        }
        //Since all data types must be defined in the schema.xml, pre-registering a type does not work
        //But we check Analyse feature
        String analyzer = ParameterType.STRING_ANALYZER.findParameter(information.getParameters(), null);
        if (analyzer != null) {
            //If the key have a tokenizer, we try to get it by reflection
            try {
                ((Constructor) ClassLoader.getSystemClassLoader().loadClass(analyzer)
                        .getConstructor()).newInstance();
            } catch (final ReflectiveOperationException e) {
                throw new PermanentBackendException(e.getMessage(),e);
            }
        }
        analyzer = ParameterType.TEXT_ANALYZER.findParameter(information.getParameters(), null);
        if (analyzer != null) {
            //If the key have a tokenizer, we try to get it by reflection
            try {
                ((Constructor) ClassLoader.getSystemClassLoader().loadClass(analyzer)
                        .getConstructor()).newInstance();
            } catch (final ReflectiveOperationException e) {
                throw new PermanentBackendException(e.getMessage(),e);
            }
        }
    }

    @Override
    public void mutate(Map> mutations, KeyInformation.IndexRetriever information,
                       BaseTransaction tx) throws BackendException {
        logger.debug("Mutating SOLR");
        try {
            for (final Map.Entry> stores : mutations.entrySet()) {
                final String collectionName = stores.getKey();
                final String keyIdField = getKeyFieldId(collectionName);

                final List deleteIds = new ArrayList<>();
                final Collection changes = new ArrayList<>();

                for (final Map.Entry entry : stores.getValue().entrySet()) {
                    final String docId = entry.getKey();
                    final IndexMutation mutation = entry.getValue();
                    Preconditions.checkArgument(!(mutation.isNew() && mutation.isDeleted()));
                    Preconditions.checkArgument(!mutation.isNew() || !mutation.hasDeletions());
                    Preconditions.checkArgument(!mutation.isDeleted() || !mutation.hasAdditions());

                    //Handle any deletions
                    if (mutation.hasDeletions()) {
                        if (mutation.isDeleted()) {
                            logger.trace("Deleting entire document {}", docId);
                            deleteIds.add(docId);
                        } else {
                            final List fieldDeletions = new ArrayList<>(mutation.getDeletions());
                            if (mutation.hasAdditions()) {
                                for (final IndexEntry indexEntry : mutation.getAdditions()) {
                                    fieldDeletions.remove(indexEntry);
                                }
                            }
                            handleRemovalsFromIndex(collectionName, keyIdField, docId, fieldDeletions, information);
                        }
                    }

                    if (mutation.hasAdditions()) {
                        final int ttl = mutation.determineTTL();

                        final SolrInputDocument doc = new SolrInputDocument();
                        doc.setField(keyIdField, docId);

                        final boolean isNewDoc = mutation.isNew();

                        if (isNewDoc)
                            logger.trace("Adding new document {}", docId);
                        final Map adds = collectFieldValues(mutation.getAdditions(), collectionName,
                                information);
                        // If cardinality is not single then we should use the "add" operation to update
                        // the index so we don't overwrite existing values.
                        adds.keySet().forEach(v-> {
                            final KeyInformation keyInformation = information.get(collectionName, v);
                            final String solrOp = keyInformation.getCardinality() == Cardinality.SINGLE ? "set" : "add";
                            doc.setField(v, isNewDoc ? adds.get(v) :
                                    new HashMap(1) {{put(solrOp, adds.get(v));}}
                            );
                        });
                        if (ttl>0) {
                            Preconditions.checkArgument(isNewDoc,
                                    "Solr only supports TTL on new documents [%s]", docId);
                            doc.setField(ttlField, String.format("+%dSECONDS", ttl));
                        }
                        changes.add(doc);
                    }
                }

                commitDeletes(collectionName, deleteIds);
                commitChanges(collectionName, changes);
            }
        } catch (final IllegalArgumentException e) {
            throw new PermanentBackendException("Unable to complete query on Solr.", e);
        } catch (final Exception e) {
            throw storageException(e);
        }
    }

    private void handleRemovalsFromIndex(String collectionName, String keyIdField, String docId,
                                         List fieldDeletions, KeyInformation.IndexRetriever information)
            throws SolrServerException, IOException, BackendException {
        final Map fieldDeletes = new HashMap<>(1);
        fieldDeletes.put("set", null);
        final SolrInputDocument doc = new SolrInputDocument();
        doc.addField(keyIdField, docId);
        for(final IndexEntry v: fieldDeletions) {
            final KeyInformation keyInformation = information.get(collectionName, v.field);
            // If the cardinality is a Set or List, we just need to remove the individual value
            // received in the mutation and not set the field to null, but we still consolidate the values
            // in the event of multiple removals in one mutation.
            final Map deletes = collectFieldValues(fieldDeletions, collectionName, information);
            deletes.keySet().forEach(vertex -> {
                final Map remove;
                if (keyInformation.getCardinality() == Cardinality.SINGLE) {
                    remove = (Map) fieldDeletes;
                } else {
                    remove = new HashMap<>(1);
                    remove.put("remove", deletes.get(vertex));
                }
                doc.setField(vertex, remove);
            });
        }

        final UpdateRequest singleDocument = newUpdateRequest();
        singleDocument.add(doc);
        solrClient.request(singleDocument, collectionName);

    }

    private Object convertValue(Object value) throws BackendException {
        if (value instanceof Geoshape) {
            return GeoToWktConverter.convertToWktString((Geoshape) value);
        }
        if (value instanceof UUID) {
            return value.toString();
        }
        if(value instanceof Instant) {
            if(Math.floorMod(((Instant) value).getNano(), 1000000) != 0) {
                throw new IllegalArgumentException("Solr indexes do not support nanoseconds");
            }
            return new Date(((Instant) value).toEpochMilli());
        }
        return value;
    }

    @Override
    public void restore(Map>> documents,
                        KeyInformation.IndexRetriever information, BaseTransaction tx) throws BackendException {
        try {
            for (final Map.Entry>> stores : documents.entrySet()) {
                final String collectionName = stores.getKey();

                final List deleteIds = new ArrayList<>();
                final List newDocuments = new ArrayList<>();

                for (final Map.Entry> entry : stores.getValue().entrySet()) {
                    final String docID = entry.getKey();
                    final List content = entry.getValue();

                    if (content == null || content.isEmpty()) {
                        if (logger.isTraceEnabled())
                            logger.trace("Deleting document [{}]", docID);

                        deleteIds.add(docID);
                        continue;
                    }
                    final SolrInputDocument doc = new SolrInputDocument();
                    doc.setField(getKeyFieldId(collectionName), docID);
                    final Map adds = collectFieldValues(content, collectionName, information);
                    adds.forEach(doc::setField);
                    newDocuments.add(doc);
                }
                commitDeletes(collectionName, deleteIds);
                commitChanges(collectionName, newDocuments);
            }
        } catch (final Exception e) {
            throw new TemporaryBackendException("Could not restore Solr index", e);
        }
    }

    // This method will create a map of field ids to values.  In the case of multiValued fields,
    // it will consolidate all the values into one List or Set so it can be updated with a single Solr operation
    private Map collectFieldValues(List content, String collectionName,
                                                   KeyInformation.IndexRetriever information) throws BackendException {
        final Map docs = new HashMap<>();
        for (final IndexEntry addition: content) {
            final KeyInformation keyInformation = information.get(collectionName, addition.field);
            switch (keyInformation.getCardinality()) {
                case SINGLE:
                    docs.put(addition.field, convertValue(addition.value));
                    break;
                case SET:
                    if (!docs.containsKey(addition.field)) {
                        docs.put(addition.field, new HashSet<>());
                    }
                    ((Set