org.neo4j.kernel.api.impl.fulltext.LuceneFulltextDocumentStructure Maven / Gradle / Ivy
/*
* Copyright (c) "Neo4j"
* Neo4j Sweden AB [https://neo4j.com]
*
* This file is part of Neo4j.
*
* Neo4j is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see .
*/
package org.neo4j.kernel.api.impl.fulltext;
import static org.apache.lucene.document.Field.Store.NO;
import static org.apache.lucene.document.Field.Store.YES;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.WildcardQuery;
import org.neo4j.values.AnyValue;
import org.neo4j.values.storable.TextArray;
import org.neo4j.values.storable.TextValue;
import org.neo4j.values.storable.Value;
import org.neo4j.values.storable.ValueGroup;
public class LuceneFulltextDocumentStructure {
public static final String FIELD_ENTITY_ID = "__neo4j__lucene__fulltext__index__internal__id__";
private static final ThreadLocal perThreadDocument = ThreadLocal.withInitial(DocWithId::new);
private LuceneFulltextDocumentStructure() {}
private static DocWithId reuseDocument(long id) {
DocWithId doc = perThreadDocument.get();
doc.setId(id);
return doc;
}
/**
* @return A document with the properties set, or null if no properties were
* relevant (= none of the properties were of type TEXT - which is the only type we support in the fulltext indexes).
*/
public static Document documentRepresentingProperties(long id, String[] propertyNames, Value[] values) {
DocWithId document = reuseDocument(id);
int setValues = document.setValues(propertyNames, values);
return setValues == 0 ? null : document.document;
}
private static Field encodeValueField(String propertyKey, Value value) {
TextValue textValue = (TextValue) value;
String stringValue = textValue.stringValue();
return new TextField(propertyKey, stringValue, NO);
}
static long getNodeId(Document from) {
String entityId = from.get(FIELD_ENTITY_ID);
return Long.parseLong(entityId);
}
static Term newTermForChangeOrRemove(long id) {
return new Term(FIELD_ENTITY_ID, "" + id);
}
static Query newCountEntityEntriesQuery(long nodeId, String[] propertyKeys, Value... propertyValues) {
BooleanQuery.Builder builder = new BooleanQuery.Builder();
builder.add(new TermQuery(newTermForChangeOrRemove(nodeId)), BooleanClause.Occur.MUST);
for (int i = 0; i < propertyKeys.length; i++) {
String propertyKey = propertyKeys[i];
Value value = propertyValues[i];
// Only match on entries that doesn't contain fields we don't expect
if (value.valueGroup() != ValueGroup.TEXT && value.valueGroup() != ValueGroup.TEXT_ARRAY) {
Query valueQuery = new ConstantScoreQuery(new WildcardQuery(new Term(propertyKey, "*")));
builder.add(valueQuery, BooleanClause.Occur.MUST_NOT);
}
// Why don't we match on the TEXT values that actually should be in the index?
// 1. The analyzer used for our index can have split the property value into several terms so we cannot
// check that the exact property value exist in the index.
// 2. There are some characters that analyzers will skip completely and if we have a property value with
// only such characters there will be no reference to the field at all, so we cannot use a wildcard query
// either.
}
return builder.build();
}
private static class DocWithId {
private final Document document;
private final Field idField;
private final Field idValueField;
private DocWithId() {
idField = new StringField(FIELD_ENTITY_ID, "", YES);
idValueField = new NumericDocValuesField(FIELD_ENTITY_ID, 0L);
document = new Document();
document.add(idField);
document.add(idValueField);
}
private void setId(long id) {
removeAllValueFields();
idField.setStringValue(Long.toString(id));
idValueField.setLongValue(id);
}
private int setValues(String[] names, Value[] values) {
int i = 0;
int nbrAddedValues = 0;
for (String name : names) {
Value value = values[i++];
if (value != null) {
if (value.valueGroup() == ValueGroup.TEXT) {
document.add(encodeValueField(name, value));
nbrAddedValues++;
}
if (value.valueGroup() == ValueGroup.TEXT_ARRAY) {
var array = (TextArray) value;
for (AnyValue val : array) {
document.add(encodeValueField(name, (Value) val));
}
nbrAddedValues++;
}
}
}
return nbrAddedValues;
}
private void removeAllValueFields() {
document.clear();
document.add(idField);
document.add(idValueField);
}
}
}