All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.elasticsearch.index.fielddata.plain.AbstractIndexOrdinalsFieldData Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to Elasticsearch under one or more contributor
 * license agreements. See the NOTICE file distributed with
 * this work for additional information regarding copyright
 * ownership. Elasticsearch licenses this file to you under
 * the Apache License, Version 2.0 (the "License"); you may
 * not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package org.elasticsearch.index.fielddata.plain;

import org.apache.lucene.index.*;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CharsRefBuilder;
import org.elasticsearch.ElasticsearchException;
import org.elasticsearch.common.Nullable;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.Index;
import org.elasticsearch.index.fielddata.*;
import org.elasticsearch.index.fielddata.IndexFieldData.XFieldComparatorSource.Nested;
import org.elasticsearch.index.fielddata.fieldcomparator.BytesRefFieldComparatorSource;
import org.elasticsearch.index.fielddata.ordinals.GlobalOrdinalsBuilder;
import org.elasticsearch.index.mapper.MappedFieldType.Names;
import org.elasticsearch.indices.breaker.CircuitBreakerService;
import org.elasticsearch.search.MultiValueMode;

import java.io.IOException;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public abstract class AbstractIndexOrdinalsFieldData extends AbstractIndexFieldData implements IndexOrdinalsFieldData {

    protected Settings frequency;
    protected Settings regex;
    protected final CircuitBreakerService breakerService;

    protected AbstractIndexOrdinalsFieldData(Index index, Settings indexSettings, Names fieldNames, FieldDataType fieldDataType,
                                          IndexFieldDataCache cache, CircuitBreakerService breakerService) {
        super(index, indexSettings, fieldNames, fieldDataType, cache);
        final Map groups = fieldDataType.getSettings().getGroups("filter");
        frequency = groups.get("frequency");
        regex = groups.get("regex");
        this.breakerService = breakerService;
    }

    @Override
    public XFieldComparatorSource comparatorSource(@Nullable Object missingValue, MultiValueMode sortMode, Nested nested) {
        return new BytesRefFieldComparatorSource(this, missingValue, sortMode, nested);
    }

    @Override
    public IndexOrdinalsFieldData loadGlobal(DirectoryReader indexReader) {
        if (indexReader.leaves().size() <= 1) {
            // ordinals are already global
            return this;
        }
        boolean fieldFound = false;
        for (LeafReaderContext context : indexReader.leaves()) {
            if (context.reader().getFieldInfos().fieldInfo(getFieldNames().indexName()) != null) {
                fieldFound = true;
                break;
            }
        }
        if (fieldFound == false) {
            // Some directory readers may be wrapped and report different set of fields and use the same cache key.
            // If a field can't be found then it doesn't mean it isn't there,
            // so if a field doesn't exist then we don't cache it and just return an empty field data instance.
            // The next time the field is found, we do cache.
            try {
                return GlobalOrdinalsBuilder.buildEmpty(index(), indexSettings(), indexReader, this);
            } catch (IOException e) {
                throw new RuntimeException(e);
            }
        }
        try {
            return cache.load(indexReader, this);
        } catch (Throwable e) {
            if (e instanceof ElasticsearchException) {
                throw (ElasticsearchException) e;
            } else {
                throw new ElasticsearchException(e.getMessage(), e);
            }
        }
    }

    @Override
    public IndexOrdinalsFieldData localGlobalDirect(DirectoryReader indexReader) throws Exception {
        return GlobalOrdinalsBuilder.build(indexReader, this, indexSettings(), breakerService, logger);
    }

    @Override
    protected AtomicOrdinalsFieldData empty(int maxDoc) {
        return AbstractAtomicOrdinalsFieldData.empty();
    }

    protected TermsEnum filter(Terms terms, LeafReader reader) throws IOException {
        TermsEnum iterator = terms.iterator();
        if (iterator == null) {
            return null;
        }
        if (iterator != null && frequency != null) {
            iterator = FrequencyFilter.filter(iterator, terms, reader, frequency);
        }

        if (iterator != null && regex != null) {
            iterator = RegexFilter.filter(iterator, terms, reader, regex);
        }
        return iterator;
    }

    private static final class FrequencyFilter extends FilteredTermsEnum {

        private int minFreq;
        private int maxFreq;
        public FrequencyFilter(TermsEnum delegate, int minFreq, int maxFreq) {
            super(delegate, false);
            this.minFreq = minFreq;
            this.maxFreq = maxFreq;
        }

        public static TermsEnum filter(TermsEnum toFilter, Terms terms, LeafReader reader, Settings settings) throws IOException {
            int docCount = terms.getDocCount();
            if (docCount == -1) {
                docCount = reader.maxDoc();
            }
            final double minFrequency = settings.getAsDouble("min", 0d);
            final double maxFrequency = settings.getAsDouble("max", docCount+1d);
            final double minSegmentSize = settings.getAsInt("min_segment_size", 0);
            if (minSegmentSize < docCount) {
                final int minFreq = minFrequency > 1.0? (int) minFrequency : (int)(docCount * minFrequency);
                final int maxFreq = maxFrequency > 1.0? (int) maxFrequency : (int)(docCount * maxFrequency);
                assert minFreq < maxFreq;
                return new FrequencyFilter(toFilter, minFreq, maxFreq);
            }

            return toFilter;

        }

        @Override
        protected AcceptStatus accept(BytesRef arg0) throws IOException {
            int docFreq = docFreq();
            if (docFreq >= minFreq && docFreq <= maxFreq) {
                return AcceptStatus.YES;
            }
            return AcceptStatus.NO;
        }
    }

    private static final class RegexFilter extends FilteredTermsEnum {

        private final Matcher matcher;
        private final CharsRefBuilder spare = new CharsRefBuilder();

        public RegexFilter(TermsEnum delegate, Matcher matcher) {
            super(delegate, false);
            this.matcher = matcher;
        }
        public static TermsEnum filter(TermsEnum iterator, Terms terms, LeafReader reader, Settings regex) {
            String pattern = regex.get("pattern");
            if (pattern == null) {
                return iterator;
            }
            Pattern p = Pattern.compile(pattern);
            return new RegexFilter(iterator, p.matcher(""));
        }

        @Override
        protected AcceptStatus accept(BytesRef arg0) throws IOException {
            spare.copyUTF8Bytes(arg0);
            matcher.reset(spare.get());
            if (matcher.matches()) {
                return AcceptStatus.YES;
            }
            return AcceptStatus.NO;
        }
    }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy