All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.elasticsearch.index.fielddata.plain.SortedSetDVAtomicFieldData Maven / Gradle / Ivy

/*
 * Licensed to Elasticsearch under one or more contributor
 * license agreements. See the NOTICE file distributed with
 * this work for additional information regarding copyright
 * ownership. Elasticsearch licenses this file to you under
 * the Apache License, Version 2.0 (the "License"); you may
 * not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.elasticsearch.index.fielddata.plain;

import org.apache.lucene.index.AtomicReader;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LongsRef;
import org.elasticsearch.ElasticsearchIllegalStateException;
import org.elasticsearch.common.util.BigArrays;
import org.elasticsearch.common.util.IntArray;
import org.elasticsearch.index.fielddata.AtomicFieldData;
import org.elasticsearch.index.fielddata.BytesValues;
import org.elasticsearch.index.fielddata.ordinals.Ordinals;

import java.io.IOException;

/**
 * {@link AtomicFieldData} impl based on Lucene's {@link SortedSetDocValues}.
 * 

Implementation note: Lucene's ordinal for unset values is -1 whereas Elasticsearch's is 0, this is why there are all * these +1 to translate from Lucene's ordinals to ES's. */ abstract class SortedSetDVAtomicFieldData { private final AtomicReader reader; private final String field; private volatile IntArray hashes; SortedSetDVAtomicFieldData(AtomicReader reader, String field) { this.reader = reader; this.field = field; } public boolean isMultiValued() { // we could compute it when loading the values for the first time and then cache it but it would defeat the point of // doc values which is to make loading faster return true; } public int getNumDocs() { return reader.maxDoc(); } public long getNumberUniqueValues() { final SortedSetDocValues values = getValuesNoException(reader, field); return values.getValueCount(); } public long getMemorySizeInBytes() { // There is no API to access memory usage per-field and RamUsageEstimator can't help since there are often references // from a per-field instance to all other instances handled by the same format return -1L; } public void close() { // no-op } public org.elasticsearch.index.fielddata.BytesValues.WithOrdinals getBytesValues(boolean needsHashes) { final SortedSetDocValues values = getValuesNoException(reader, field); return new SortedSetValues(reader, field, values); } public org.elasticsearch.index.fielddata.BytesValues.WithOrdinals getHashedBytesValues() { final SortedSetDocValues values = getValuesNoException(reader, field); if (hashes == null) { synchronized (this) { if (hashes == null) { final long valueCount = values.getValueCount(); final IntArray hashes = BigArrays.newIntArray(1L + valueCount); BytesRef scratch = new BytesRef(16); hashes.set(0, scratch.hashCode()); for (long i = 0; i < valueCount; ++i) { values.lookupOrd(i, scratch); hashes.set(1L + i, scratch.hashCode()); } this.hashes = hashes; } } } return new SortedSetHashedValues(reader, field, values, hashes); } private static SortedSetDocValues getValuesNoException(AtomicReader reader, String field) { try { SortedSetDocValues values = reader.getSortedSetDocValues(field); if (values == null) { // This field has not been populated assert reader.getFieldInfos().fieldInfo(field) == null; values = SortedSetDocValues.EMPTY; } return values; } catch (IOException e) { throw new ElasticsearchIllegalStateException("Couldn't load doc values", e); } } static class SortedSetValues extends BytesValues.WithOrdinals { protected final SortedSetDocValues values; SortedSetValues(AtomicReader reader, String field, SortedSetDocValues values) { super(new SortedSetDocs(new SortedSetOrdinals(reader, field, values.getValueCount()), values)); this.values = values; } @Override public BytesRef getValueByOrd(long ord) { assert ord != Ordinals.MISSING_ORDINAL; values.lookupOrd(ord - 1, scratch); return scratch; } @Override public BytesRef nextValue() { values.lookupOrd(ordinals.nextOrd()-1, scratch); return scratch; } } static final class SortedSetHashedValues extends SortedSetValues { private final IntArray hashes; SortedSetHashedValues(AtomicReader reader, String field, SortedSetDocValues values, IntArray hashes) { super(reader, field, values); this.hashes = hashes; } @Override public int currentValueHash() { assert ordinals.currentOrd() >= 0; return hashes.get(ordinals.currentOrd()); } } static final class SortedSetOrdinals implements Ordinals { // We don't store SortedSetDocValues as a member because Ordinals must be thread-safe private final AtomicReader reader; private final String field; private final long numOrds; public SortedSetOrdinals(AtomicReader reader, String field, long numOrds) { super(); this.reader = reader; this.field = field; this.numOrds = numOrds; } @Override public long getMemorySizeInBytes() { // Ordinals can't be distinguished from the atomic field data instance return -1; } @Override public boolean isMultiValued() { return true; } @Override public int getNumDocs() { return reader.maxDoc(); } @Override public long getNumOrds() { return numOrds; } @Override public long getMaxOrd() { return 1 + numOrds; } @Override public Docs ordinals() { final SortedSetDocValues values = getValuesNoException(reader, field); assert values.getValueCount() == numOrds; return new SortedSetDocs(this, values); } } static class SortedSetDocs implements Ordinals.Docs { private final SortedSetOrdinals ordinals; private final SortedSetDocValues values; private final LongsRef longScratch; private int ordIndex = Integer.MAX_VALUE; private long currentOrdinal = -1; SortedSetDocs(SortedSetOrdinals ordinals, SortedSetDocValues values) { this.ordinals = ordinals; this.values = values; longScratch = new LongsRef(8); } @Override public Ordinals ordinals() { return ordinals; } @Override public int getNumDocs() { return ordinals.getNumDocs(); } @Override public long getNumOrds() { return ordinals.getNumOrds(); } @Override public long getMaxOrd() { return ordinals.getMaxOrd(); } @Override public boolean isMultiValued() { return ordinals.isMultiValued(); } @Override public long getOrd(int docId) { values.setDocument(docId); return currentOrdinal = 1 + values.nextOrd(); } @Override public LongsRef getOrds(int docId) { values.setDocument(docId); longScratch.offset = 0; longScratch.length = 0; for (long ord = values.nextOrd(); ord != SortedSetDocValues.NO_MORE_ORDS; ord = values.nextOrd()) { longScratch.longs = ArrayUtil.grow(longScratch.longs, longScratch.length + 1); longScratch.longs[longScratch.length++] = 1 + ord; } return longScratch; } @Override public long nextOrd() { assert ordIndex < longScratch.length; return currentOrdinal = longScratch.longs[ordIndex++]; } @Override public int setDocument(int docId) { // For now, we consume all ords and pass them to the iter instead of doing it in a streaming way because Lucene's // SORTED_SET doc values are cached per thread, you can't have a fully independent instance final LongsRef ords = getOrds(docId); ordIndex = 0; return ords.length; } @Override public long currentOrd() { return currentOrdinal; } } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy