All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.elasticsearch.index.codec.postingsformat.PostingFormats Maven / Gradle / Ivy

There is a newer version: 8.14.1
Show newest version
/*
 * Licensed to ElasticSearch and Shay Banon under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership. ElasticSearch licenses this
 * file to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.elasticsearch.index.codec.postingsformat;

import com.google.common.collect.ImmutableCollection;
import com.google.common.collect.ImmutableMap;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.bloom.BloomFilteringPostingsFormat;
import org.elasticsearch.common.collect.MapBuilder;

/**
 * This class represents the set of Elasticsearch "build-in"
 * {@link PostingsFormatProvider.Factory postings format factories}
 * 
    *
  • direct: a postings format that uses disk-based storage but loads * its terms and postings directly into memory. Note this postings format is * very memory intensive and has certain limitation that don't allow segments to * grow beyond 2.1GB see {@link DirectPostingsFormat} for details.
  • *

    *

  • memory: a postings format that stores its entire terms, postings, * positions and payloads in a finite state transducer. This format should only * be used for primary keys or with fields where each term is contained in a * very low number of documents.
  • *

    *

  • pulsing: a postings format in-lines the posting lists for very low * frequent terms in the term dictionary. This is useful to improve lookup * performance for low-frequent terms.
  • *

    *

  • bloom_default: a postings format that uses a bloom filter to * improve term lookup performance. This is useful for primarily keys or fields * that are used as a delete key
  • *

    *

  • bloom_pulsing: a postings format that combines the advantages of * bloom and pulsing to further improve lookup performance
  • *

    *

  • default: the default Elasticsearch postings format offering best * general purpose performance. This format is used if no postings format is * specified in the field mapping.
  • *
*/ public class PostingFormats { private static final ImmutableMap builtInPostingFormats; static { MapBuilder buildInPostingFormatsX = MapBuilder.newMapBuilder(); // add defaults ones for (String luceneName : PostingsFormat.availablePostingsFormats()) { buildInPostingFormatsX.put(luceneName, new PreBuiltPostingsFormatProvider.Factory(PostingsFormat.forName(luceneName))); } final ElasticSearch090PostingsFormat defaultFormat = new ElasticSearch090PostingsFormat(); buildInPostingFormatsX.put("direct", new PreBuiltPostingsFormatProvider.Factory("direct", PostingsFormat.forName("Direct"))); buildInPostingFormatsX.put("memory", new PreBuiltPostingsFormatProvider.Factory("memory", PostingsFormat.forName("Memory"))); // LUCENE UPGRADE: Need to change this to the relevant ones on a lucene upgrade buildInPostingFormatsX.put("pulsing", new PreBuiltPostingsFormatProvider.Factory("pulsing", PostingsFormat.forName("Pulsing41"))); buildInPostingFormatsX.put("default", new PreBuiltPostingsFormatProvider.Factory("default", defaultFormat)); buildInPostingFormatsX.put("bloom_pulsing", new PreBuiltPostingsFormatProvider.Factory("bloom_pulsing", wrapInBloom(PostingsFormat.forName("Pulsing41")))); buildInPostingFormatsX.put("bloom_default", new PreBuiltPostingsFormatProvider.Factory("bloom_default", wrapInBloom(PostingsFormat.forName("Lucene41")))); builtInPostingFormats = buildInPostingFormatsX.immutableMap(); } public static final boolean luceneBloomFilter = false; static PostingsFormat wrapInBloom(PostingsFormat delegate) { if (luceneBloomFilter) { return new BloomFilteringPostingsFormat(delegate, new BloomFilterLucenePostingsFormatProvider.CustomBloomFilterFactory()); } return new BloomFilterPostingsFormat(delegate, BloomFilter.Factory.DEFAULT); } public static PostingsFormatProvider.Factory getAsFactory(String name) { return builtInPostingFormats.get(name); } public static PostingsFormatProvider getAsProvider(String name) { return builtInPostingFormats.get(name).get(); } public static ImmutableCollection listFactories() { return builtInPostingFormats.values(); } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy