org.opensearch.index.mapper.IdFieldMapper Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of opensearch Show documentation
Show all versions of opensearch Show documentation
OpenSearch subproject :server
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
/*
* Modifications Copyright OpenSearch Contributors. See
* GitHub history for details.
*/
package org.opensearch.index.mapper;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.TermInSetQuery;
import org.apache.lucene.util.BytesRef;
import org.opensearch.common.annotation.PublicApi;
import org.opensearch.common.logging.DeprecationLogger;
import org.opensearch.common.lucene.Lucene;
import org.opensearch.common.util.BigArrays;
import org.opensearch.core.indices.breaker.CircuitBreakerService;
import org.opensearch.index.fielddata.IndexFieldData;
import org.opensearch.index.fielddata.IndexFieldData.XFieldComparatorSource.Nested;
import org.opensearch.index.fielddata.IndexFieldDataCache;
import org.opensearch.index.fielddata.LeafFieldData;
import org.opensearch.index.fielddata.ScriptDocValues;
import org.opensearch.index.fielddata.SortedBinaryDocValues;
import org.opensearch.index.fielddata.fieldcomparator.BytesRefFieldComparatorSource;
import org.opensearch.index.fielddata.plain.PagedBytesIndexFieldData;
import org.opensearch.index.query.QueryShardContext;
import org.opensearch.indices.IndicesService;
import org.opensearch.search.DocValueFormat;
import org.opensearch.search.MultiValueMode;
import org.opensearch.search.aggregations.support.CoreValuesSourceType;
import org.opensearch.search.aggregations.support.ValuesSourceType;
import org.opensearch.search.lookup.SearchLookup;
import org.opensearch.search.sort.BucketedSort;
import org.opensearch.search.sort.SortOrder;
import java.io.IOException;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.function.Supplier;
/**
* A mapper for the _id field. It does nothing since _id is neither indexed nor
* stored, but we need to keep it so that its FieldType can be used to generate
* queries.
*
* @opensearch.api
*/
@PublicApi(since = "1.0.0")
public class IdFieldMapper extends MetadataFieldMapper {
private static final DeprecationLogger deprecationLogger = DeprecationLogger.getLogger(IdFieldMapper.class);
static final String ID_FIELD_DATA_DEPRECATION_MESSAGE =
"Loading the fielddata on the _id field is deprecated and will be removed in future versions. "
+ "If you require sorting or aggregating on this field you should also include the id in the "
+ "body of your documents, and map this field as a keyword field that has [doc_values] enabled";
public static final String NAME = "_id";
public static final String CONTENT_TYPE = "_id";
/**
* Default parameters
*
* @opensearch.internal
*/
public static class Defaults {
public static final String NAME = IdFieldMapper.NAME;
public static final FieldType FIELD_TYPE = new FieldType();
public static final FieldType NESTED_FIELD_TYPE;
static {
FIELD_TYPE.setTokenized(false);
FIELD_TYPE.setIndexOptions(IndexOptions.DOCS);
FIELD_TYPE.setStored(true);
FIELD_TYPE.setOmitNorms(true);
FIELD_TYPE.freeze();
NESTED_FIELD_TYPE = new FieldType();
NESTED_FIELD_TYPE.setTokenized(false);
NESTED_FIELD_TYPE.setIndexOptions(IndexOptions.DOCS);
NESTED_FIELD_TYPE.setStored(true);
NESTED_FIELD_TYPE.setOmitNorms(true);
NESTED_FIELD_TYPE.setStored(false);
NESTED_FIELD_TYPE.freeze();
}
}
public static final TypeParser PARSER = new FixedTypeParser(c -> new IdFieldMapper(() -> c.mapperService().isIdFieldDataEnabled()));
/**
* Field type for ID field
*
* @opensearch.internal
*/
static final class IdFieldType extends TermBasedFieldType {
private final Supplier fieldDataEnabled;
IdFieldType(Supplier fieldDataEnabled) {
super(NAME, true, true, false, TextSearchInfo.SIMPLE_MATCH_ONLY, Collections.emptyMap());
this.fieldDataEnabled = fieldDataEnabled;
setIndexAnalyzer(Lucene.KEYWORD_ANALYZER);
}
@Override
public String typeName() {
return CONTENT_TYPE;
}
@Override
public boolean isSearchable() {
// The _id field is always searchable.
return true;
}
@Override
public ValueFetcher valueFetcher(QueryShardContext context, SearchLookup lookup, String format) {
throw new UnsupportedOperationException("Cannot fetch values for internal field [" + name() + "].");
}
@Override
public Query termQuery(Object value, QueryShardContext context) {
return termsQuery(Arrays.asList(value), context);
}
@Override
public Query existsQuery(QueryShardContext context) {
return new MatchAllDocsQuery();
}
@Override
public Query termsQuery(List> values, QueryShardContext context) {
failIfNotIndexed();
BytesRef[] bytesRefs = new BytesRef[values.size()];
for (int i = 0; i < bytesRefs.length; i++) {
Object idObject = values.get(i);
if (idObject instanceof BytesRef) {
idObject = ((BytesRef) idObject).utf8ToString();
}
bytesRefs[i] = Uid.encodeId(idObject.toString());
}
return new TermInSetQuery(name(), bytesRefs);
}
@Override
public IndexFieldData.Builder fielddataBuilder(String fullyQualifiedIndexName, Supplier searchLookup) {
if (fieldDataEnabled.get() == false) {
throw new IllegalArgumentException(
"Fielddata access on the _id field is disallowed, "
+ "you can re-enable it by updating the dynamic cluster setting: "
+ IndicesService.INDICES_ID_FIELD_DATA_ENABLED_SETTING.getKey()
);
}
final IndexFieldData.Builder fieldDataBuilder = new PagedBytesIndexFieldData.Builder(
name(),
TextFieldMapper.Defaults.FIELDDATA_MIN_FREQUENCY,
TextFieldMapper.Defaults.FIELDDATA_MAX_FREQUENCY,
TextFieldMapper.Defaults.FIELDDATA_MIN_SEGMENT_SIZE,
CoreValuesSourceType.BYTES
);
return new IndexFieldData.Builder() {
@Override
public IndexFieldData> build(IndexFieldDataCache cache, CircuitBreakerService breakerService) {
deprecationLogger.deprecate("id_field_data", ID_FIELD_DATA_DEPRECATION_MESSAGE);
final IndexFieldData> fieldData = fieldDataBuilder.build(cache, breakerService);
return new IndexFieldData() {
@Override
public String getFieldName() {
return fieldData.getFieldName();
}
@Override
public ValuesSourceType getValuesSourceType() {
return fieldData.getValuesSourceType();
}
@Override
public LeafFieldData load(LeafReaderContext context) {
return wrap(fieldData.load(context));
}
@Override
public LeafFieldData loadDirect(LeafReaderContext context) throws Exception {
return wrap(fieldData.loadDirect(context));
}
@Override
public SortField sortField(Object missingValue, MultiValueMode sortMode, Nested nested, boolean reverse) {
XFieldComparatorSource source = new BytesRefFieldComparatorSource(this, missingValue, sortMode, nested);
return new SortField(getFieldName(), source, reverse);
}
@Override
public BucketedSort newBucketedSort(
BigArrays bigArrays,
Object missingValue,
MultiValueMode sortMode,
Nested nested,
SortOrder sortOrder,
DocValueFormat format,
int bucketSize,
BucketedSort.ExtraData extra
) {
throw new UnsupportedOperationException("can't sort on the [" + CONTENT_TYPE + "] field");
}
};
}
};
}
}
private static LeafFieldData wrap(LeafFieldData in) {
return new LeafFieldData() {
@Override
public void close() {
in.close();
}
@Override
public long ramBytesUsed() {
return in.ramBytesUsed();
}
@Override
public ScriptDocValues> getScriptValues() {
return new ScriptDocValues.Strings(getBytesValues());
}
@Override
public SortedBinaryDocValues getBytesValues() {
SortedBinaryDocValues inValues = in.getBytesValues();
return new SortedBinaryDocValues() {
@Override
public BytesRef nextValue() throws IOException {
BytesRef encoded = inValues.nextValue();
return new BytesRef(
Uid.decodeId(Arrays.copyOfRange(encoded.bytes, encoded.offset, encoded.offset + encoded.length))
);
}
@Override
public int docValueCount() {
final int count = inValues.docValueCount();
// If the count is not 1 then the impl is not correct as the binary representation
// does not preserve order. But id fields only have one value per doc so we are good.
assert count == 1;
return inValues.docValueCount();
}
@Override
public boolean advanceExact(int doc) throws IOException {
return inValues.advanceExact(doc);
}
};
}
};
}
private IdFieldMapper(Supplier fieldDataEnabled) {
super(new IdFieldType(fieldDataEnabled));
}
@Override
public void preParse(ParseContext context) {
BytesRef id = Uid.encodeId(context.sourceToParse().id());
context.doc().add(new Field(NAME, id, Defaults.FIELD_TYPE));
}
@Override
protected String contentType() {
return CONTENT_TYPE;
}
}