com.vmware.xenon.services.common.LuceneIndexDocumentHelper Maven / Gradle / Ivy
/*
* Copyright (c) 2014-2017 VMware, Inc. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed
* under the License is distributed on an "AS IS" BASIS, without warranties or
* conditions of any kind, EITHER EXPRESS OR IMPLIED. See the License for the
* specific language governing permissions and limitations under the License.
*/
package com.vmware.xenon.services.common;
import java.net.URI;
import java.time.Instant;
import java.time.LocalDateTime;
import java.time.ZoneId;
import java.time.ZonedDateTime;
import java.util.Arrays;
import java.util.Collection;
import java.util.Date;
import java.util.EnumSet;
import java.util.HashMap;
import java.util.Map;
import java.util.Map.Entry;
import com.esotericsoftware.kryo.KryoException;
import com.esotericsoftware.kryo.io.Output;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.DoublePoint;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.LongPoint;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.document.SortedDocValuesField;
import org.apache.lucene.document.StoredField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.NumericUtils;
import com.vmware.xenon.common.ReflectionUtils;
import com.vmware.xenon.common.ServiceDocument;
import com.vmware.xenon.common.ServiceDocumentDescription;
import com.vmware.xenon.common.ServiceDocumentDescription.PropertyDescription;
import com.vmware.xenon.common.ServiceDocumentDescription.PropertyIndexingOption;
import com.vmware.xenon.common.ServiceDocumentDescription.PropertyUsageOption;
import com.vmware.xenon.common.ServiceDocumentDescription.TypeName;
import com.vmware.xenon.common.TaskState;
import com.vmware.xenon.common.Utils;
import com.vmware.xenon.common.serialization.KryoSerializers;
import com.vmware.xenon.services.common.QueryTask.QuerySpecification;
/**
* Internal only helper used by the {@code LuceneDocumentIndexService} to add new documents
* to the index (each document added represents a specific version of a service state)
*
* Each instance of this class is kept in a thread local variable in the index service, to
* avoid allocations of Document and Field instances for every service state update.
*/
class LuceneIndexDocumentHelper {
public static final String GROUP_BY_PROPERTY_NAME_SUFFIX = "_groupBySuffix";
public static final String SORT_PROPERTY_NAME_SUFFIX = "_sort";
private static final String FIELD_NAME_INDEXING_PREFIX = "xenon.indexing";
public static final String FIELD_NAME_INDEXING_ID = FIELD_NAME_INDEXING_PREFIX + ".id";
public static final String FIELD_NAME_INDEXING_METADATA_VALUE_TOMBSTONE_TIME =
FIELD_NAME_INDEXING_PREFIX + ".metadata.tombstone.time";
private static final String DISABLE_SORT_FIELD_NAMING_PROPERTY_NAME =
Utils.PROPERTY_NAME_PREFIX + "LuceneIndexDocumentHelper.DISABLE_SORT_FIELD_NAMING";
private static boolean DISABLE_SORT_FIELD_NAMING = Boolean.getBoolean(
DISABLE_SORT_FIELD_NAMING_PROPERTY_NAME);
public static final long ACTIVE_DOCUMENT_TOMBSTONE_TIME = Long.MAX_VALUE;
private Document doc = new Document();
public Document getDoc() {
return this.doc;
}
abstract static class LongFieldContext {
public StoredField storedField;
public LongPoint longPoint;
public NumericDocValuesField numericDocField;
public abstract void initialize();
}
abstract static class StringFieldContext {
public StringField stringField;
public SortedDocValuesField sortedField;
public abstract void initialize();
}
private final LongFieldContext versionField = new LongFieldContext() {
@Override
public void initialize() {
this.storedField = new StoredField(ServiceDocument.FIELD_NAME_VERSION, 0L);
this.longPoint = new LongPoint(ServiceDocument.FIELD_NAME_VERSION, 0L);
this.numericDocField = new NumericDocValuesField(ServiceDocument.FIELD_NAME_VERSION,
0L);
}
};
private final LongFieldContext updateTimeField = new LongFieldContext() {
@Override
public void initialize() {
this.storedField = new StoredField(ServiceDocument.FIELD_NAME_UPDATE_TIME_MICROS, 0L);
this.longPoint = new LongPoint(ServiceDocument.FIELD_NAME_UPDATE_TIME_MICROS, 0L);
this.numericDocField = new NumericDocValuesField(
ServiceDocument.FIELD_NAME_UPDATE_TIME_MICROS, 0L);
}
};
private final LongFieldContext expirationTimeField = new LongFieldContext() {
@Override
public void initialize() {
this.storedField = new StoredField(ServiceDocument.FIELD_NAME_EXPIRATION_TIME_MICROS,
0L);
this.longPoint = new LongPoint(ServiceDocument.FIELD_NAME_EXPIRATION_TIME_MICROS, 0L);
this.numericDocField = new NumericDocValuesField(
ServiceDocument.FIELD_NAME_EXPIRATION_TIME_MICROS, 0L);
}
};
private final LongFieldContext tombstoneTimeField = new LongFieldContext() {
@Override
public void initialize() {
this.numericDocField = new NumericDocValuesField(
FIELD_NAME_INDEXING_METADATA_VALUE_TOMBSTONE_TIME, ACTIVE_DOCUMENT_TOMBSTONE_TIME);
}
};
private final StringFieldContext selfLinkField = new StringFieldContext() {
@Override
public void initialize() {
this.stringField = new StringField(ServiceDocument.FIELD_NAME_SELF_LINK, "", Store.YES);
this.sortedField = new SortedDocValuesField(
createSortFieldPropertyName(ServiceDocument.FIELD_NAME_SELF_LINK),
new BytesRef(" "));
}
};
private final StringFieldContext kindField = new StringFieldContext() {
@Override
public void initialize() {
this.stringField = new StringField(ServiceDocument.FIELD_NAME_KIND, "", Store.NO);
}
};
private final StringFieldContext authPrincipalLinkField = new StringFieldContext() {
@Override
public void initialize() {
this.stringField = new StringField(ServiceDocument.FIELD_NAME_AUTH_PRINCIPAL_LINK, "",
Store.NO);
}
};
private final StringFieldContext txIdField = new StringFieldContext() {
@Override
public void initialize() {
this.stringField = new StringField(ServiceDocument.FIELD_NAME_TRANSACTION_ID, "",
Store.NO);
}
};
private final StringFieldContext updateActionField = new StringFieldContext() {
@Override
public void initialize() {
this.stringField = new StringField(ServiceDocument.FIELD_NAME_UPDATE_ACTION, "",
Store.YES);
}
};
private final StringFieldContext indexingIdField = new StringFieldContext() {
@Override
public void initialize() {
this.stringField = new StringField(FIELD_NAME_INDEXING_ID, "", Store.YES);
}
};
private final Map storedFields = new HashMap<>();
private final Map stringFields = new HashMap<>();
private final Map storedStringFields = new HashMap<>();
private final Map sortedStringFields = new HashMap<>();
private final Map longPointFields = new HashMap<>();
private final Map doublePointFields = new HashMap<>();
private Map numericFields = new HashMap<>();
public LuceneIndexDocumentHelper() {
this.selfLinkField.initialize();
this.kindField.initialize();
this.authPrincipalLinkField.initialize();
this.expirationTimeField.initialize();
this.txIdField.initialize();
this.updateActionField.initialize();
this.updateTimeField.initialize();
this.versionField.initialize();
this.tombstoneTimeField.initialize();
this.indexingIdField.initialize();
}
void addSelfLinkField(String selfLink) {
StringFieldContext ctx = this.selfLinkField;
ctx.stringField.setStringValue(selfLink);
ctx.sortedField.setBytesValue(new BytesRef(selfLink));
this.doc.add(ctx.stringField);
this.doc.add(ctx.sortedField);
}
void addKindField(String kind) {
this.kindField.stringField.setStringValue(kind);
this.doc.add(this.kindField.stringField);
}
void addUpdateActionField(String action) {
this.updateActionField.stringField.setStringValue(action);
this.doc.add(this.updateActionField.stringField);
}
void addTxIdField(String txId) {
this.txIdField.stringField.setStringValue(txId);
this.doc.add(this.txIdField.stringField);
}
void addAuthPrincipalLinkField(String authLink) {
this.authPrincipalLinkField.stringField.setStringValue(authLink);
this.doc.add(this.authPrincipalLinkField.stringField);
}
void addVersionField(long version) {
updateLongFieldContext(version, this.versionField);
}
void addUpdateTimeField(long updateTimeMicros) {
updateLongFieldContext(updateTimeMicros, this.updateTimeField);
}
void addExpirationTimeField(long exp) {
updateLongFieldContext(exp, this.expirationTimeField);
}
void addTombstoneTimeField() {
this.tombstoneTimeField.numericDocField.setLongValue(ACTIVE_DOCUMENT_TOMBSTONE_TIME);
this.doc.add(this.tombstoneTimeField.numericDocField);
}
void addIndexingIdField(String selfLink, Long epoch, long version) {
StringBuilder sb = new StringBuilder(selfLink);
if (epoch != null) {
sb.append(":").append(epoch);
}
String indexingId = sb.append(":").append(version).toString();
this.indexingIdField.stringField.setStringValue(indexingId);
this.doc.add(this.indexingIdField.stringField);
}
private void updateLongFieldContext(long value, LongFieldContext ctx) {
ctx.storedField.setLongValue(value);
this.doc.add(ctx.storedField);
ctx.longPoint.setLongValue(value);
this.doc.add(ctx.longPoint);
ctx.numericDocField.setLongValue(value);
this.doc.add(ctx.numericDocField);
}
void addNumericField(String propertyName, long propertyValue,
boolean isStored, boolean isCollectionItem, boolean sorted) {
if (isStored) {
Field field = isCollectionItem ? new StoredField(propertyName, propertyValue)
: getAndSetStoredField(propertyName, propertyValue);
this.doc.add(field);
}
// LongPoint adds an index field to the document that allows for efficient search
// and range queries
if (isCollectionItem) {
this.doc.add(new LongPoint(propertyName, propertyValue));
} else {
LongPoint lpField = this.longPointFields.computeIfAbsent(propertyName, (k) -> {
return new LongPoint(propertyName, propertyValue);
});
lpField.setLongValue(propertyValue);
this.doc.add(lpField);
}
// NumericDocValues allow for efficient group operations for a property.
NumericDocValuesField ndField = getAndSetNumericField(propertyName, propertyValue,
isCollectionItem);
this.doc.add(ndField);
if (sorted) {
// special handling for groupBy queries, docValuesField can not be added twice
// We suffix the property name with "_group", add a SortedDocValuesField
Field sdField = getAndSetSortedStoredField(propertyName + GROUP_BY_PROPERTY_NAME_SUFFIX,
Long.toString(propertyValue));
this.doc.add(sdField);
}
}
private void addNumericField(String propertyName, double propertyValue,
boolean stored, boolean isCollectionItem, boolean sorted) {
long longPropertyValue = NumericUtils.doubleToSortableLong(propertyValue);
if (stored) {
Field field = isCollectionItem ? new StoredField(propertyName, propertyValue)
: getAndSetStoredField(propertyName, propertyValue);
this.doc.add(field);
}
// DoublePoint adds an index field to the document that allows for efficient search
// and range queries
if (isCollectionItem) {
this.doc.add(new DoublePoint(propertyName, propertyValue));
} else {
DoublePoint dpField = this.doublePointFields.computeIfAbsent(propertyName,
(k) -> {
return new DoublePoint(propertyName, propertyValue);
});
dpField.setDoubleValue(propertyValue);
this.doc.add(dpField);
}
NumericDocValuesField ndField = getAndSetNumericField(propertyName, longPropertyValue,
isCollectionItem);
this.doc.add(ndField);
if (sorted) {
// special handling for groupBy queries
Field sdField = getAndSetSortedStoredField(propertyName + GROUP_BY_PROPERTY_NAME_SUFFIX,
Double.toString(propertyValue));
this.doc.add(sdField);
}
}
public void addBinaryStateFieldToDocument(ServiceDocument s, byte[] serializedDocument,
ServiceDocumentDescription desc) {
try {
int count = 0;
if (serializedDocument == null) {
Output o = KryoSerializers.serializeDocumentForIndexing(s,
desc.serializedStateSizeLimit);
count = o.position();
serializedDocument = o.getBuffer();
} else {
count = serializedDocument.length;
}
Field bodyField = new StoredField(
LuceneDocumentIndexService.LUCENE_FIELD_NAME_BINARY_SERIALIZED_STATE,
serializedDocument, 0, count);
this.doc.add(bodyField);
} catch (KryoException ke) {
throw new IllegalArgumentException(
"Failure serializing state of service " + s.documentSelfLink
+ ", possibly due to size limit."
+ " Service author should override getDocumentTemplate() and adjust"
+ " ServiceDocumentDescription.serializedStateSizeLimit. Cause: "
+ ke.toString());
}
}
public void addIndexableFieldsToDocument(Object podo,
ServiceDocumentDescription sd) {
for (Entry e : sd.propertyDescriptions.entrySet()) {
String name = e.getKey();
PropertyDescription pd = e.getValue();
if (pd.usageOptions != null
&& pd.usageOptions.contains(PropertyUsageOption.INFRASTRUCTURE)) {
continue;
}
Object v = ReflectionUtils.getPropertyValue(pd, podo);
addIndexableFieldToDocument(v, pd, name, false, true);
}
}
/**
* Add single indexable field to the Lucene {@link Document}.
* This function recurses if the field value is a PODO, map, array, or collection.
*/
private void addIndexableFieldToDocument(Object podo, PropertyDescription pd,
String fieldName, boolean isCollectionItem, boolean allowSortedField) {
Field luceneField = null;
Field luceneDocValuesField = null;
Field.Store fsv = Field.Store.NO;
boolean isSortedField = false;
boolean expandField = false;
Object v = podo;
if (v == null) {
return;
}
EnumSet opts = pd.indexingOptions;
if (opts != null) {
if (opts.contains(PropertyIndexingOption.STORE_ONLY)) {
return;
}
if (opts.contains(PropertyIndexingOption.SORT)) {
isSortedField = true;
}
if (opts.contains(PropertyIndexingOption.EXPAND)) {
expandField = true;
}
}
if (pd.usageOptions != null) {
if (pd.usageOptions.contains(PropertyUsageOption.LINK)) {
fsv = Field.Store.YES;
}
if (pd.usageOptions.contains(PropertyUsageOption.LINKS)) {
expandField = true;
}
}
boolean isStored = fsv == Field.Store.YES;
String stringValue = null;
if (v instanceof String) {
stringValue = v.toString();
if (opts == null) {
luceneField = getAndSetStringField(fieldName, stringValue, fsv, isCollectionItem);
} else {
if (opts.contains(PropertyIndexingOption.CASE_INSENSITIVE)) {
stringValue = stringValue.toLowerCase();
}
if (opts.contains(PropertyIndexingOption.TEXT)) {
luceneField = new TextField(fieldName, stringValue, fsv);
} else {
luceneField = getAndSetStringField(fieldName, stringValue, fsv,
isCollectionItem);
}
}
} else if (v instanceof URI) {
stringValue = QuerySpecification.toMatchValue((URI) v);
luceneField = getAndSetStringField(fieldName, stringValue, fsv, isCollectionItem);
} else if (pd.typeName.equals(TypeName.ENUM)) {
stringValue = QuerySpecification.toMatchValue((Enum>) v);
luceneField = getAndSetStringField(fieldName, stringValue, fsv, isCollectionItem);
} else if (pd.typeName.equals(TypeName.LONG)) {
long value = ((Number) v).longValue();
addNumericField(fieldName, value, isStored, isCollectionItem, isSortedField);
// Set sorted to false; Appropriate SortedDocValues field is added in addNumericField
isSortedField = false;
} else if (pd.typeName.equals(TypeName.DATE)) {
// Index as microseconds since UNIX epoch
long micros = 0;
if (v instanceof Date) {
micros = ((Date) v).getTime() * 1000;
} else if (v instanceof ZonedDateTime) {
Instant inst = ((ZonedDateTime) v).toInstant();
micros = instantToMicros(inst);
} else if (v instanceof Instant) {
Instant inst = (Instant) v;
micros = instantToMicros(inst);
} else if (v instanceof LocalDateTime) {
Instant inst = ((LocalDateTime) v).atZone(ZoneId.systemDefault()).toInstant();
micros = instantToMicros(inst);
}
addNumericField(fieldName, micros, isStored, isCollectionItem, false);
isSortedField = false;
} else if (pd.typeName.equals(TypeName.DOUBLE)) {
double value = ((Number) v).doubleValue();
addNumericField(fieldName, value, isStored, isCollectionItem, isSortedField);
// Set sorted to false; Appropriate SortedDocValues field is added in addNumericField
isSortedField = false;
} else if (pd.typeName.equals(TypeName.BOOLEAN)) {
stringValue = QuerySpecification.toMatchValue((boolean) v);
luceneField = getAndSetStringField(fieldName, stringValue, fsv, isCollectionItem);
} else if (pd.typeName.equals(TypeName.BYTES)) {
// Don't store bytes in the index
isSortedField = false;
} else if (pd.typeName.equals(TypeName.PODO)) {
// Ignore all complex fields if they are not explicitly marked with EXPAND.
// We special case all fields of TaskState to ensure task based services have
// a guaranteed minimum level indexing and query behavior
if (!(v instanceof TaskState) && !expandField) {
return;
}
addObjectIndexableFieldToDocument(v, pd, fieldName, isCollectionItem);
return;
} else if (expandField && pd.typeName.equals(TypeName.MAP)) {
addMapIndexableFieldToDocument(v, pd, fieldName);
return;
} else if (expandField && (pd.typeName.equals(TypeName.COLLECTION))) {
addCollectionIndexableFieldToDocument(v, pd, fieldName);
return;
} else {
stringValue = v.toString();
luceneField = getAndSetStringField(fieldName, stringValue, fsv, isCollectionItem);
}
if (isSortedField && allowSortedField) {
luceneDocValuesField = getAndSetSortedStoredField(
createSortFieldPropertyName(fieldName), stringValue);
}
if (luceneField != null) {
this.doc.add(luceneField);
}
if (luceneDocValuesField != null) {
this.doc.add(luceneDocValuesField);
}
}
private long instantToMicros(Instant inst) {
long micros;
micros = inst.getEpochSecond() * 1000 * 1000;
micros += inst.getNano() / 1000;
return micros;
}
private void addObjectIndexableFieldToDocument(Object v, PropertyDescription pd,
String fieldNamePrefix, boolean isCollectionItem) {
for (Entry e : pd.fieldDescriptions.entrySet()) {
PropertyDescription fieldDescription = e.getValue();
Object fieldValue = ReflectionUtils.getPropertyValue(fieldDescription, v);
if (v == null) {
continue;
}
if (pd.indexingOptions.contains(PropertyIndexingOption.SORT)) {
fieldDescription.indexingOptions.add(PropertyIndexingOption.SORT);
}
String fieldName = QuerySpecification.buildCompositeFieldName(fieldNamePrefix,
e.getKey());
addIndexableFieldToDocument(fieldValue, fieldDescription, fieldName,
isCollectionItem, true);
}
}
@SuppressWarnings({ "rawtypes" })
private void addMapIndexableFieldToDocument(Object v, PropertyDescription pd,
String fieldNamePrefix) {
final String errorMsg = "Field not supported. Map keys must be of type String.";
Map m = (Map) v;
if (pd.indexingOptions.contains(PropertyIndexingOption.SORT)) {
pd.elementDescription.indexingOptions.add(PropertyIndexingOption.SORT);
}
for (Object o : m.entrySet()) {
Entry entry = (Entry) o;
Object mapKey = entry.getKey();
if (!(mapKey instanceof String)) {
throw new IllegalArgumentException(errorMsg);
}
// There is a risk our field thread local maps grow too much from discrete field names
// built from map keys. It should be unlikely however: "keys" need to be a set of well known
// names for queries to be useful. Even 1M discrete field names should be OK on a loaded
// node
addIndexableFieldToDocument(entry.getValue(), pd.elementDescription,
QuerySpecification.buildCompositeFieldName(fieldNamePrefix, (String) mapKey), false, true);
if (pd.indexingOptions.contains(PropertyIndexingOption.FIXED_ITEM_NAME)) {
addIndexableFieldToDocument(entry.getKey(), new PropertyDescription(), fieldNamePrefix, true, true);
addIndexableFieldToDocument(entry.getValue(), pd.elementDescription,
fieldNamePrefix, true, false);
}
}
}
@SuppressWarnings("rawtypes")
private void addCollectionIndexableFieldToDocument(Object v,
PropertyDescription pd, String fieldNamePrefix) {
fieldNamePrefix = QuerySpecification.buildCollectionItemName(fieldNamePrefix);
Collection c;
if (v instanceof Collection) {
c = (Collection) v;
} else {
c = Arrays.asList((Object[]) v);
}
if (pd.indexingOptions.contains(PropertyIndexingOption.SORT)) {
pd.elementDescription.indexingOptions.add(PropertyIndexingOption.SORT);
}
for (Object cv : c) {
if (cv == null) {
continue;
}
addIndexableFieldToDocument(cv, pd.elementDescription, fieldNamePrefix, true, true);
}
}
private Field getAndSetSortedStoredField(String name, String value) {
Field f = this.sortedStringFields.computeIfAbsent(name, (k) -> {
return new SortedDocValuesField(name, new BytesRef(value));
});
f.setBytesValue(new BytesRef(value));
return f;
}
private Field getAndSetStringField(String name, String value, Field.Store fsv,
boolean isCollectionItem) {
if (isCollectionItem) {
return new StringField(name, value, fsv);
}
if (fsv == Field.Store.YES) {
return getAndSetStoredField(name, value);
} else {
return getAndSetStringField(name, value);
}
}
private Field getAndSetStringField(String name, String value) {
Field f = this.stringFields.computeIfAbsent(name, (k) -> {
return new StringField(name, value, Field.Store.NO);
});
f.setStringValue(value);
return f;
}
private Field getAndSetStoredField(String name, String value) {
Field f = this.storedStringFields.computeIfAbsent(name, (k) -> {
return new StringField(name, value, Field.Store.YES);
});
f.setStringValue(value);
return f;
}
private Field getAndSetStoredField(String name, Long value) {
Field f = this.storedFields.computeIfAbsent(name, (k) -> {
return new StoredField(name, value);
});
f.setLongValue(value);
return f;
}
private Field getAndSetStoredField(String name, Double value) {
Field f = this.storedFields.computeIfAbsent(name, (k) -> {
return new StoredField(name, value);
});
f.setDoubleValue(value);
return f;
}
private NumericDocValuesField getAndSetNumericField(String propertyName, long propertyValue,
boolean isCollectionItem) {
if (isCollectionItem) {
return new NumericDocValuesField(propertyName, propertyValue);
}
NumericDocValuesField ndField = this.numericFields.computeIfAbsent(propertyName,
(k) -> {
return new NumericDocValuesField(propertyName, propertyValue);
});
ndField.setLongValue(propertyValue);
return ndField;
}
static String createSortFieldPropertyName(String propertyName) {
return DISABLE_SORT_FIELD_NAMING ? propertyName : propertyName + SORT_PROPERTY_NAME_SUFFIX;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy