All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.elasticsearch.action.mlt.TransportMoreLikeThisAction Maven / Gradle / Ivy

There is a newer version: 8.14.1
Show newest version
/*
 * Licensed to ElasticSearch and Shay Banon under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership. ElasticSearch licenses this
 * file to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.elasticsearch.action.mlt;

import org.apache.lucene.document.Field;
import org.apache.lucene.index.Term;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.ElasticSearchException;
import org.elasticsearch.ElasticSearchIllegalStateException;
import org.elasticsearch.action.ActionListener;
import org.elasticsearch.action.get.GetRequest;
import org.elasticsearch.action.get.GetResponse;
import org.elasticsearch.action.get.TransportGetAction;
import org.elasticsearch.action.search.SearchRequest;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.action.search.TransportSearchAction;
import org.elasticsearch.action.support.TransportAction;
import org.elasticsearch.cluster.ClusterService;
import org.elasticsearch.cluster.ClusterState;
import org.elasticsearch.cluster.node.DiscoveryNode;
import org.elasticsearch.cluster.routing.MutableShardRouting;
import org.elasticsearch.cluster.routing.RoutingNode;
import org.elasticsearch.cluster.routing.ShardIterator;
import org.elasticsearch.cluster.routing.ShardRouting;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.engine.DocumentMissingException;
import org.elasticsearch.index.get.GetField;
import org.elasticsearch.index.mapper.*;
import org.elasticsearch.index.mapper.internal.SourceFieldMapper;
import org.elasticsearch.index.query.BoolQueryBuilder;
import org.elasticsearch.index.query.MoreLikeThisFieldQueryBuilder;
import org.elasticsearch.indices.IndicesService;
import org.elasticsearch.threadpool.ThreadPool;
import org.elasticsearch.transport.*;

import java.util.Collections;
import java.util.Iterator;
import java.util.Set;

import static com.google.common.collect.Sets.newHashSet;
import static org.elasticsearch.client.Requests.getRequest;
import static org.elasticsearch.client.Requests.searchRequest;
import static org.elasticsearch.index.query.QueryBuilders.*;
import static org.elasticsearch.search.builder.SearchSourceBuilder.searchSource;

/**
 * The more like this action.
 */
public class TransportMoreLikeThisAction extends TransportAction {

    private final TransportSearchAction searchAction;

    private final TransportGetAction getAction;

    private final IndicesService indicesService;

    private final ClusterService clusterService;

    private final TransportService transportService;

    @Inject
    public TransportMoreLikeThisAction(Settings settings, ThreadPool threadPool, TransportSearchAction searchAction, TransportGetAction getAction,
                                       ClusterService clusterService, IndicesService indicesService, TransportService transportService) {
        super(settings, threadPool);
        this.searchAction = searchAction;
        this.getAction = getAction;
        this.indicesService = indicesService;
        this.clusterService = clusterService;
        this.transportService = transportService;

        transportService.registerHandler(MoreLikeThisAction.NAME, new TransportHandler());
    }

    @Override
    protected void doExecute(final MoreLikeThisRequest request, final ActionListener listener) {
        // update to actual index name
        ClusterState clusterState = clusterService.state();
        // update to the concrete index
        final String concreteIndex = clusterState.metaData().concreteIndex(request.index());

        RoutingNode routingNode = clusterState.getRoutingNodes().nodesToShards().get(clusterService.localNode().getId());
        if (routingNode == null) {
            redirect(request, listener, clusterState);
            return;
        }
        boolean hasIndexLocally = false;
        for (MutableShardRouting shardRouting : routingNode.shards()) {
            if (concreteIndex.equals(shardRouting.index())) {
                hasIndexLocally = true;
                break;
            }
        }
        if (!hasIndexLocally) {
            redirect(request, listener, clusterState);
            return;
        }
        Set getFields = newHashSet();
        if (request.fields() != null) {
            Collections.addAll(getFields, request.fields());
        }
        // add the source, in case we need to parse it to get fields
        getFields.add(SourceFieldMapper.NAME);

        GetRequest getRequest = getRequest(concreteIndex)
                .fields(getFields.toArray(new String[getFields.size()]))
                .type(request.type())
                .id(request.id())
                .routing(request.routing())
                .listenerThreaded(true)
                .operationThreaded(true);

        request.beforeLocalFork();
        getAction.execute(getRequest, new ActionListener() {
            @Override
            public void onResponse(GetResponse getResponse) {
                if (!getResponse.isExists()) {
                    listener.onFailure(new DocumentMissingException(null, request.type(), request.id()));
                    return;
                }
                final BoolQueryBuilder boolBuilder = boolQuery();
                try {
                    final DocumentMapper docMapper = indicesService.indexServiceSafe(concreteIndex).mapperService().documentMapper(request.type());
                    if (docMapper == null) {
                        throw new ElasticSearchException("No DocumentMapper found for type [" + request.type() + "]");
                    }
                    final Set fields = newHashSet();
                    if (request.fields() != null) {
                        for (String field : request.fields()) {
                            FieldMappers fieldMappers = docMapper.mappers().smartName(field);
                            if (fieldMappers != null) {
                                fields.add(fieldMappers.mapper().names().indexName());
                            } else {
                                fields.add(field);
                            }
                        }
                    }

                    if (!fields.isEmpty()) {
                        // if fields are not empty, see if we got them in the response
                        for (Iterator it = fields.iterator(); it.hasNext(); ) {
                            String field = it.next();
                            GetField getField = getResponse.getField(field);
                            if (getField != null) {
                                for (Object value : getField.getValues()) {
                                    addMoreLikeThis(request, boolBuilder, getField.getName(), value.toString());
                                }
                                it.remove();
                            }
                        }
                        if (!fields.isEmpty()) {
                            // if we don't get all the fields in the get response, see if we can parse the source
                            parseSource(getResponse, boolBuilder, docMapper, fields, request);
                        }
                    } else {
                        // we did not ask for any fields, try and get it from the source
                        parseSource(getResponse, boolBuilder, docMapper, fields, request);
                    }

                    if (!boolBuilder.hasClauses()) {
                        // no field added, fail
                        listener.onFailure(new ElasticSearchException("No fields found to fetch the 'likeText' from"));
                        return;
                    }

                    // exclude myself
                    Term uidTerm = docMapper.uidMapper().term(request.type(), request.id());
                    boolBuilder.mustNot(termQuery(uidTerm.field(), uidTerm.text()));
                } catch (Throwable e) {
                    listener.onFailure(e);
                    return;
                }

                String[] searchIndices = request.searchIndices();
                if (searchIndices == null) {
                    searchIndices = new String[]{request.index()};
                }
                String[] searchTypes = request.searchTypes();
                if (searchTypes == null) {
                    searchTypes = new String[]{request.type()};
                }
                int size = request.searchSize() != 0 ? request.searchSize() : 10;
                int from = request.searchFrom() != 0 ? request.searchFrom() : 0;
                SearchRequest searchRequest = searchRequest(searchIndices)
                        .types(searchTypes)
                        .searchType(request.searchType())
                        .scroll(request.searchScroll())
                        .extraSource(searchSource()
                                .query(boolBuilder)
                                .from(from)
                                .size(size)
                        )
                        .listenerThreaded(request.listenerThreaded());

                if (request.searchSource() != null) {
                    searchRequest.source(request.searchSource(), request.searchSourceUnsafe());
                }
                searchAction.execute(searchRequest, new ActionListener() {
                    @Override
                    public void onResponse(SearchResponse response) {
                        listener.onResponse(response);
                    }

                    @Override
                    public void onFailure(Throwable e) {
                        listener.onFailure(e);
                    }
                });

            }

            @Override
            public void onFailure(Throwable e) {
                listener.onFailure(e);
            }
        });
    }

    // Redirects the request to a data node, that has the index meta data locally available.
    private void redirect(MoreLikeThisRequest request, final ActionListener listener, ClusterState clusterState) {
        ShardIterator shardIterator = clusterService.operationRouting().getShards(clusterState, request.index(), request.type(), request.id(), request.routing(), null);
        ShardRouting shardRouting = shardIterator.firstOrNull();
        if (shardRouting == null) {
            throw new ElasticSearchException("No shards for index " + request.index());
        }
        String nodeId = shardRouting.currentNodeId();
        DiscoveryNode discoveryNode = clusterState.nodes().get(nodeId);
        transportService.sendRequest(discoveryNode, MoreLikeThisAction.NAME, request, new TransportResponseHandler() {

            @Override
            public SearchResponse newInstance() {
                return new SearchResponse();
            }

            @Override
            public void handleResponse(SearchResponse response) {
                listener.onResponse(response);
            }

            @Override
            public void handleException(TransportException exp) {
                listener.onFailure(exp);
            }

            @Override
            public String executor() {
                return ThreadPool.Names.SAME;
            }
        });
    }

    private void parseSource(GetResponse getResponse, final BoolQueryBuilder boolBuilder, DocumentMapper docMapper, final Set fields, final MoreLikeThisRequest request) {
        if (getResponse.isSourceEmpty()) {
            return;
        }
        docMapper.parse(SourceToParse.source(getResponse.getSourceAsBytesRef()).type(request.type()).id(request.id()), new DocumentMapper.ParseListenerAdapter() {
            @Override
            public boolean beforeFieldAdded(FieldMapper fieldMapper, Field field, Object parseContext) {
                if (fieldMapper instanceof InternalMapper) {
                    return true;
                }
                String value = fieldMapper.value(convertField(field)).toString();
                if (value == null) {
                    return false;
                }

                if (fields.isEmpty() || fields.contains(field.name())) {
                    addMoreLikeThis(request, boolBuilder, fieldMapper, field);
                }

                return false;
            }
        });
    }

    private Object convertField(Field field) {
        if (field.stringValue() != null) {
            return field.stringValue();
        } else if (field.binaryValue() != null) {
            return BytesRef.deepCopyOf(field.binaryValue()).bytes;
        } else if (field.numericValue() != null) {
            return field.numericValue();
        } else {
            throw new ElasticSearchIllegalStateException("Field should have either a string, numeric or binary value");
        }
    }

    private void addMoreLikeThis(MoreLikeThisRequest request, BoolQueryBuilder boolBuilder, FieldMapper fieldMapper, Field field) {
        addMoreLikeThis(request, boolBuilder, field.name(), fieldMapper.value(convertField(field)).toString());
    }

    private void addMoreLikeThis(MoreLikeThisRequest request, BoolQueryBuilder boolBuilder, String fieldName, String likeText) {
        MoreLikeThisFieldQueryBuilder mlt = moreLikeThisFieldQuery(fieldName)
                .likeText(likeText)
                .percentTermsToMatch(request.percentTermsToMatch())
                .boostTerms(request.boostTerms())
                .minDocFreq(request.minDocFreq())
                .maxDocFreq(request.maxDocFreq())
                .minWordLen(request.minWordLen())
                .maxWordLen(request.maxWordLen())
                .minTermFreq(request.minTermFreq())
                .maxQueryTerms(request.maxQueryTerms())
                .stopWords(request.stopWords());
        boolBuilder.should(mlt);
    }

    private class TransportHandler extends BaseTransportRequestHandler {

        @Override
        public MoreLikeThisRequest newInstance() {
            return new MoreLikeThisRequest();
        }

        @Override
        public void messageReceived(MoreLikeThisRequest request, final TransportChannel channel) throws Exception {
            // no need to have a threaded listener since we just send back a response
            request.listenerThreaded(false);
            execute(request, new ActionListener() {
                @Override
                public void onResponse(SearchResponse result) {
                    try {
                        channel.sendResponse(result);
                    } catch (Throwable e) {
                        onFailure(e);
                    }
                }

                @Override
                public void onFailure(Throwable e) {
                    try {
                        channel.sendResponse(e);
                    } catch (Exception e1) {
                        logger.warn("Failed to send response for get", e1);
                    }
                }
            });
        }

        @Override
        public String executor() {
            return ThreadPool.Names.SAME;
        }
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy