
org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexDefinition Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.jackrabbit.oak.plugins.index.lucene;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
import org.apache.jackrabbit.guava.common.collect.ImmutableMap;
import org.apache.jackrabbit.oak.plugins.index.lucene.util.CompressingCodec;
import org.apache.jackrabbit.oak.plugins.index.lucene.util.TokenizerChain;
import org.apache.jackrabbit.oak.plugins.index.lucene.writer.CommitMitigatingTieredMergePolicy;
import org.apache.jackrabbit.oak.plugins.index.search.FieldNames;
import org.apache.jackrabbit.oak.plugins.index.search.FulltextIndexConstants;
import org.apache.jackrabbit.oak.plugins.index.search.IndexDefinition;
import org.apache.jackrabbit.oak.plugins.index.search.IndexFormatVersion;
import org.apache.jackrabbit.oak.spi.state.ChildNodeEntry;
import org.apache.jackrabbit.oak.spi.state.NodeState;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.miscellaneous.LimitTokenCountAnalyzer;
import org.apache.lucene.analysis.miscellaneous.PerFieldAnalyzerWrapper;
import org.apache.lucene.analysis.path.PathHierarchyTokenizerFactory;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.index.LogByteSizeMergePolicy;
import org.apache.lucene.index.LogDocMergePolicy;
import org.apache.lucene.index.MergePolicy;
import org.apache.lucene.index.NoMergePolicy;
import org.apache.lucene.index.TieredMergePolicy;
import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.Nullable;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import static org.apache.jackrabbit.oak.plugins.index.search.FulltextIndexConstants.ANL_DEFAULT;
import static org.apache.jackrabbit.oak.plugins.index.search.FulltextIndexConstants.INDEX_ORIGINAL_TERM;
import static org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexConstants.VERSION;
import static org.apache.jackrabbit.oak.plugins.index.search.util.ConfigUtil.getOptionalValue;
public class LuceneIndexDefinition extends IndexDefinition {
private static final Logger log = LoggerFactory.getLogger(LuceneIndexDefinition.class);
private final boolean saveDirListing;
private final Map analyzers;
private final Analyzer analyzer;
private final Codec codec;
private final int maxFieldLength;
public LuceneIndexDefinition(NodeState root, NodeState defn, String indexPath) {
this(root, getIndexDefinitionState(defn), determineIndexFormatVersion(defn), determineUniqueId(defn), indexPath);
}
LuceneIndexDefinition(NodeState root, NodeState defn, IndexFormatVersion version, String uid, String indexPath) {
super(root, defn, version, uid, indexPath);
this.saveDirListing = getOptionalValue(defn, LuceneIndexConstants.SAVE_DIR_LISTING, true);
this.maxFieldLength = getOptionalValue(defn, LuceneIndexConstants.MAX_FIELD_LENGTH, DEFAULT_MAX_FIELD_LENGTH);
this.analyzers = collectAnalyzers(defn);
this.analyzer = createAnalyzer();
this.codec = createCodec();
}
public static Builder newBuilder(NodeState root, NodeState defn, String indexPath){
return (Builder)new Builder()
.root(root)
.defn(defn)
.indexPath(indexPath);
}
public static class Builder extends IndexDefinition.Builder {
@Override
public LuceneIndexDefinition build() {
return (LuceneIndexDefinition)super.build();
}
@Override
public LuceneIndexDefinition.Builder reindex() {
super.reindex();
return this;
}
@Override
protected IndexDefinition createInstance(NodeState indexDefnStateToUse) {
return new LuceneIndexDefinition(root, indexDefnStateToUse, version, uid, indexPath);
}
}
@Override
protected String getDefaultFunctionName() {
return "lucene";//TODO should this be LuceneIndexConstants.TYPE_LUCENE?
}
@Override
protected double getDefaultCostPerEntry(IndexFormatVersion version) {
//For older format cost per entry would be higher as it does a runtime
//aggregation
return version == IndexFormatVersion.V1 ? 1.5 : 1.0;
}
public boolean saveDirListing() {
return saveDirListing;
}
@Nullable
public Codec getCodec() {
return codec;
}
@NotNull
public MergePolicy getMergePolicy() {
// MP is not cached to avoid complaining about multiple IWs with multiplexing writers
return createMergePolicy();
}
public Analyzer getAnalyzer(){
return analyzer;
}
//~---------------------------------------------------< Analyzer >
private Analyzer createAnalyzer() {
Analyzer result;
Analyzer defaultAnalyzer = LuceneIndexConstants.ANALYZER;
if (analyzers.containsKey(ANL_DEFAULT)){
defaultAnalyzer = analyzers.get(ANL_DEFAULT);
}
if (!evaluatePathRestrictions()){
result = defaultAnalyzer;
} else {
Map analyzerMap = ImmutableMap.builder()
.put(FieldNames.ANCESTORS,
new TokenizerChain(new PathHierarchyTokenizerFactory(Collections.emptyMap())))
.build();
result = new PerFieldAnalyzerWrapper(defaultAnalyzer, analyzerMap);
}
//In case of negative value no limits would be applied
if (maxFieldLength < 0){
return result;
}
return new LimitTokenCountAnalyzer(result, maxFieldLength);
}
private static Map collectAnalyzers(NodeState defn) {
Map analyzerMap = new HashMap<>();
NodeStateAnalyzerFactory factory = new NodeStateAnalyzerFactory(VERSION);
NodeState analyzersTree = defn.getChildNode(FulltextIndexConstants.ANALYZERS);
for (ChildNodeEntry cne : analyzersTree.getChildNodeEntries()) {
Analyzer a = factory.createInstance(cne.getNodeState());
analyzerMap.put(cne.getName(), a);
}
if (getOptionalValue(analyzersTree, INDEX_ORIGINAL_TERM, false) && !analyzerMap.containsKey(ANL_DEFAULT)) {
analyzerMap.put(ANL_DEFAULT, new OakAnalyzer(VERSION, true));
}
return ImmutableMap.copyOf(analyzerMap);
}
//~---------------------------------------------< utility >
private Codec createCodec() {
String mmp = System.getProperty("oak.lucene.compressing-codec");
if (mmp != null) {
return new CompressingCodec();
}
String codecName = getOptionalValue(definition, LuceneIndexConstants.CODEC_NAME, null);
Codec codec = null;
if (codecName != null) {
// prevent LUCENE-6482
// (also done in LuceneIndexProviderService, just to be save)
OakCodec ensureLucene46CodecLoaded = new OakCodec();
// to ensure the JVM doesn't optimize away object creation
// (probably not really needed; just to be save)
log.debug("Lucene46Codec is loaded: {}", ensureLucene46CodecLoaded);
codec = Codec.forName(codecName);
log.debug("Codec is loaded: {}", codecName);
} else if (fullTextEnabled) {
codec = new OakCodec();
}
return codec;
}
private MergePolicy createMergePolicy() {
String mmp = System.getProperty("oak.lucene.cmmp");
if (mmp != null) {
return new CommitMitigatingTieredMergePolicy();
}
String mergePolicyName = getOptionalValue(definition, LuceneIndexConstants.MERGE_POLICY_NAME, null);
MergePolicy mergePolicy = null;
if (mergePolicyName != null) {
if (mergePolicyName.equalsIgnoreCase("no")) {
mergePolicy = NoMergePolicy.COMPOUND_FILES;
} else if (mergePolicyName.equalsIgnoreCase("mitigated")) {
mergePolicy = new CommitMitigatingTieredMergePolicy();
} else if (mergePolicyName.equalsIgnoreCase("tiered") || mergePolicyName.equalsIgnoreCase("default")) {
mergePolicy = new TieredMergePolicy();
} else if (mergePolicyName.equalsIgnoreCase("logbyte")) {
mergePolicy = new LogByteSizeMergePolicy();
} else if (mergePolicyName.equalsIgnoreCase("logdoc")) {
mergePolicy = new LogDocMergePolicy();
}
}
if (mergePolicy == null) {
mergePolicy = new TieredMergePolicy();
}
return mergePolicy;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy