com.infomaximum.database.utils.PrefixIndexUtils Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of rdao Show documentation
Show all versions of rdao Show documentation
Library for creating a light cluster
The newest version!
package com.infomaximum.database.utils;
import com.infomaximum.database.domainobject.Value;
import com.infomaximum.database.exception.DatabaseException;
import com.infomaximum.database.provider.DBDataCommand;
import com.infomaximum.database.provider.DBIterator;
import com.infomaximum.database.provider.DBTransaction;
import com.infomaximum.database.provider.KeyValue;
import com.infomaximum.database.schema.Field;
import com.infomaximum.database.schema.PrefixIndex;
import com.infomaximum.database.schema.dbstruct.DBField;
import com.infomaximum.database.schema.dbstruct.DBPrefixIndex;
import com.infomaximum.database.schema.dbstruct.DBTable;
import com.infomaximum.database.utils.key.FieldKey;
import com.infomaximum.database.utils.key.Key;
import com.infomaximum.database.utils.key.PrefixIndexKey;
import java.io.Serializable;
import java.util.*;
import java.util.function.Function;
public class PrefixIndexUtils {
@FunctionalInterface
public interface Action {
boolean apply(int beginIndex, int endIndex);
}
public static final int PREFERRED_MAX_ID_COUNT_PER_BLOCK = 1024;
private static final Comparator searchingWordComparator = Comparator.comparingInt(String::length);
public static SortedSet buildSortedSet() {
return new TreeSet<>(Comparator.reverseOrder());
}
public static void diffIndexedLexemes(List fields, Value[] prevValues, Value[] newValues,
Collection outDeletingLexemes, Collection outInsertingLexemes) {
diffIndexedLexemes(fields, prevValues, newValues, outDeletingLexemes, outInsertingLexemes, Field::getNumber);
}
public static void diffIndexedLexemes(int[] fieldIds, Object[] prevValues, Object[] newValues,
Collection outDeletingLexemes, Collection outInsertingLexemes) {
outDeletingLexemes.clear();
outInsertingLexemes.clear();
SortedSet prevLexemes = buildSortedSet();
SortedSet newLexemes = buildSortedSet();
for (int fieldId : fieldIds) {
Object prevValue = prevValues[fieldId];
String prevText = prevValue != null ? (String) prevValue : null;
PrefixIndexUtils.splitIndexingTextIntoLexemes(prevText, prevLexemes);
Object newValue = fieldId < newValues.length ? newValues[fieldId] : prevValue;
if (newValue != null) {
String newText = (String) newValue;
PrefixIndexUtils.splitIndexingTextIntoLexemes(newText, newLexemes);
}
}
for (String newLexeme : newLexemes) {
if (!prevLexemes.contains(newLexeme)) {
outInsertingLexemes.add(newLexeme);
}
}
for (String prevLexeme : prevLexemes) {
if (!newLexemes.contains(prevLexeme)) {
outDeletingLexemes.add(prevLexeme);
}
}
}
public static void getIndexedLexemes(DBField[] fields, Object[] newValues, Collection outInsertingLexemes) {
outInsertingLexemes.clear();
SortedSet newLexemes = buildSortedSet();
for (DBField field : fields) {
Object newValue = field.getId() < newValues.length ? newValues[field.getId()] : null;
String newText = newValue != null ? (String) newValue : null;
PrefixIndexUtils.splitIndexingTextIntoLexemes(newText, newLexemes);
}
outInsertingLexemes.addAll(newLexemes);
}
public static void diffIndexedLexemes(List fields, Value[] prevValues, Value[] newValues,
Collection outDeletingLexemes, Collection outInsertingLexemes,
Function numberGetter) {
outDeletingLexemes.clear();
outInsertingLexemes.clear();
SortedSet prevLexemes = buildSortedSet();
SortedSet newLexemes = buildSortedSet();
for (T field : fields) {
int number = numberGetter.apply(field);
Value prevValue = prevValues[number];
String prevText = prevValue != null ? (String) prevValue.getValue() : null;
PrefixIndexUtils.splitIndexingTextIntoLexemes(prevText, prevLexemes);
Value newValue = number < newValues.length ? newValues[number] : prevValue;
String newText = newValue != null ? (String) newValue.getValue() : prevText;
PrefixIndexUtils.splitIndexingTextIntoLexemes(newText, newLexemes);
}
for (String newLexeme : newLexemes) {
if (!prevLexemes.contains(newLexeme)) {
outInsertingLexemes.add(newLexeme);
}
}
for (String prevLexeme : prevLexemes) {
if (!newLexemes.contains(prevLexeme)) {
outDeletingLexemes.add(prevLexeme);
}
}
}
public static boolean forEachWord(String text, Action action) {
if (text == null) {
return true;
}
int beginWordPos = -1;
for (int i = 0; i < text.length(); ++i) {
if (Character.isWhitespace(text.charAt(i))) {
if (beginWordPos != -1) {
if (!action.apply(beginWordPos, i)) {
return false;
}
beginWordPos = -1;
}
} else if (beginWordPos == -1) {
beginWordPos = i;
}
}
if (beginWordPos != -1) {
return action.apply(beginWordPos, text.length());
}
return true;
}
/**
* @return sorted list by length of word
*/
public static List splitSearchingTextIntoWords(String text) {
List result = new ArrayList<>();
forEachWord(text,
(beginIndex, endIndex) -> result.add(text.substring(beginIndex, endIndex).toLowerCase()));
result.sort(searchingWordComparator);
return result;
}
public static void splitIndexingTextIntoLexemes(final String text, SortedSet inOutLexemes) {
splitIndexingTextIntoLexemes(text, (Collection) inOutLexemes);
if (inOutLexemes.isEmpty()) {
return;
}
Iterator i = inOutLexemes.iterator();
String target = i.next();
while (i.hasNext()) {
String next = i.next();
if (target.startsWith(next)) {
i.remove();
} else {
target = next;
}
}
}
private static void splitIndexingTextIntoLexemes(final String text, Collection inOutLexemes) {
if (text == null || text.isEmpty()) {
return;
}
forEachWord(text, (beginIndex, endIndex) -> {
splitIntoLexeme(text.substring(beginIndex, endIndex).toLowerCase(), inOutLexemes);
return true;
});
}
private static void splitIntoLexeme(final String word, Collection destination) {
int beginLexemePos = 0;
for (int i = 0; i < word.length(); ++i) {
char c = word.charAt(i);
if (!Character.isAlphabetic(c) && !Character.isDigit(c)) {
if (beginLexemePos != -1) {
destination.add(word.substring(beginLexemePos));
beginLexemePos = -1;
}
} else if (beginLexemePos == -1) {
beginLexemePos = i;
}
}
if (beginLexemePos != -1) {
destination.add(word.substring(beginLexemePos));
}
}
public static byte[] appendId(long id, byte[] ids) {
int pos = binarySearch(id, ids);
if (pos >= 0) {
return ids;
}
pos = - pos - 1;
return TypeConvert.allocateBuffer(ids.length + Key.ID_BYTE_SIZE)
.put(ids, 0, pos)
.putLong(id)
.put(ids, pos, ids.length - pos)
.array();
}
public static byte[] removeId(long id, byte[] ids) {
if (ids == null) {
return null;
}
int pos = binarySearch(id, ids);
if (pos < 0) {
return null;
}
byte[] newIds = new byte[ids.length - Key.ID_BYTE_SIZE];
System.arraycopy(ids, 0, newIds, 0, pos);
System.arraycopy(ids, pos + Key.ID_BYTE_SIZE, newIds, pos, ids.length - pos - Key.ID_BYTE_SIZE);
return newIds;
}
public static int getIdCount(byte[] ids) {
return ids.length / Key.ID_BYTE_SIZE;
}
/**
* @param sortedSearchingWords is sorted list by length of word
*/
public static boolean contains(final List sortedSearchingWords, final String[] indexingTexts, List tempList) {
tempList.clear();
for (String text : indexingTexts) {
splitIndexingTextIntoLexemes(text, tempList);
}
tempList.sort(searchingWordComparator);
if (sortedSearchingWords.size() > tempList.size()){
return false;
}
int matchCount = 0;
for (String word : sortedSearchingWords) {
for (int j = 0; j < tempList.size(); ++j) {
if (tempList.get(j).startsWith(word)) {
tempList.remove(j);
++matchCount;
break;
}
}
}
return matchCount == sortedSearchingWords.size();
}
public static void removeIndexedLexemes(PrefixIndex index, long id, Collection lexemes, DBTransaction transaction) throws DatabaseException {
if (lexemes.isEmpty()) {
return;
}
try (DBIterator iterator = transaction.createIterator(index.columnFamily)) {
for (String lexeme : lexemes) {
KeyValue keyValue = iterator.seek(PrefixIndexKey.buildKeyPatternForEdit(lexeme, index));
while (keyValue != null) {
byte[] newIds = removeId(id, keyValue.getValue());
if (newIds != null) {
if (newIds.length != 0) {
transaction.put(index.columnFamily, keyValue.getKey(), newIds);
} else {
transaction.delete(index.columnFamily, keyValue.getKey());
}
}
keyValue = iterator.next();
}
}
}
}
public static void removeIndexedLexemes(DBPrefixIndex index, long id, Collection lexemes, DBTable table, DBDataCommand dataCommand) throws DatabaseException {
if (lexemes.isEmpty()) {
return;
}
try (DBIterator iterator = dataCommand.createIterator(table.getIndexColumnFamily())) {
for (String lexeme : lexemes) {
KeyValue keyValue = iterator.seek(PrefixIndexKey.buildKeyPatternForEdit(lexeme, index));
while (keyValue != null) {
byte[] newIds = removeId(id, keyValue.getValue());
if (newIds != null) {
if (newIds.length != 0) {
dataCommand.put(table.getIndexColumnFamily(), keyValue.getKey(), newIds);
} else {
dataCommand.delete(table.getIndexColumnFamily(), keyValue.getKey());
}
}
keyValue = iterator.next();
}
}
}
}
public static void insertIndexedLexemes(PrefixIndex index, long id, Collection lexemes, DBTransaction transaction) throws DatabaseException {
if (lexemes.isEmpty()) {
return;
}
try (DBIterator iterator = transaction.createIterator(index.columnFamily)) {
for (String lexeme : lexemes) {
KeyValue keyValue = iterator.seek(PrefixIndexKey.buildKeyPatternForEdit(lexeme, index));
byte[] key;
byte[] idsValue;
if (keyValue != null) {
KeyValue prevKeyValue;
do {
long lastId = TypeConvert.unpackLong(keyValue.getValue(), keyValue.getValue().length - FieldKey.ID_BYTE_SIZE);
if (id < lastId) {
key = keyValue.getKey();
idsValue = appendId(id, keyValue.getValue());
break;
}
prevKeyValue = keyValue;
keyValue = iterator.next();
if (keyValue == null) {
key = prevKeyValue.getKey();
if (getIdCount(prevKeyValue.getValue()) < PREFERRED_MAX_ID_COUNT_PER_BLOCK) {
idsValue = appendId(id, prevKeyValue.getValue());
} else {
PrefixIndexKey.incrementBlockNumber(key);
idsValue = TypeConvert.pack(id);
}
break;
}
} while (true);
} else {
key = new PrefixIndexKey(lexeme, index).pack();
idsValue = TypeConvert.pack(id);
}
transaction.put(index.columnFamily, key, idsValue);
}
}
}
public static void insertIndexedLexemes(DBPrefixIndex index, long id, Collection lexemes, DBTable table, DBDataCommand dataCommand) throws DatabaseException {
if (lexemes.isEmpty()) {
return;
}
try (DBIterator iterator = dataCommand.createIterator(table.getIndexColumnFamily())) {
for (String lexeme : lexemes) {
KeyValue keyValue = iterator.seek(PrefixIndexKey.buildKeyPatternForEdit(lexeme, index));
byte[] key;
byte[] idsValue;
if (keyValue != null) {
KeyValue prevKeyValue;
do {
long lastId = TypeConvert.unpackLong(keyValue.getValue(), keyValue.getValue().length - FieldKey.ID_BYTE_SIZE);
if (id < lastId) {
key = keyValue.getKey();
idsValue = appendId(id, keyValue.getValue());
break;
}
prevKeyValue = keyValue;
keyValue = iterator.next();
if (keyValue == null) {
key = prevKeyValue.getKey();
if (getIdCount(prevKeyValue.getValue()) < PREFERRED_MAX_ID_COUNT_PER_BLOCK) {
idsValue = appendId(id, prevKeyValue.getValue());
} else {
PrefixIndexKey.incrementBlockNumber(key);
idsValue = TypeConvert.pack(id);
}
break;
}
} while (true);
} else {
key = new PrefixIndexKey(lexeme, index).pack();
idsValue = TypeConvert.pack(id);
}
dataCommand.put(table.getIndexColumnFamily(), key, idsValue);
}
}
}
public static void insertIndexedLexemes(DBPrefixIndex index, long id, Collection lexemes, String indexColumnFamily, DBTransaction transaction) throws DatabaseException {
if (lexemes.isEmpty()) {
return;
}
try (DBIterator iterator = transaction.createIterator(indexColumnFamily)) {
for (String lexeme : lexemes) {
KeyValue keyValue = iterator.seek(PrefixIndexKey.buildKeyPatternForEdit(lexeme, index));
byte[] key;
byte[] idsValue;
if (keyValue != null) {
KeyValue prevKeyValue;
do {
long lastId = TypeConvert.unpackLong(keyValue.getValue(), keyValue.getValue().length - FieldKey.ID_BYTE_SIZE);
if (id < lastId) {
key = keyValue.getKey();
idsValue = appendId(id, keyValue.getValue());
break;
}
prevKeyValue = keyValue;
keyValue = iterator.next();
if (keyValue == null) {
key = prevKeyValue.getKey();
if (getIdCount(prevKeyValue.getValue()) < PREFERRED_MAX_ID_COUNT_PER_BLOCK) {
idsValue = appendId(id, prevKeyValue.getValue());
} else {
PrefixIndexKey.incrementBlockNumber(key);
idsValue = TypeConvert.pack(id);
}
break;
}
} while (true);
} else {
key = new PrefixIndexKey(lexeme, index).pack();
idsValue = TypeConvert.pack(id);
}
transaction.put(indexColumnFamily, key, idsValue);
}
}
}
private static int binarySearch(long value, byte[] longs) {
if ((longs.length % Long.BYTES) != 0) {
throw new IllegalArgumentException("Size of longs must be multiple of " + Long.BYTES);
}
int low = 0;
int high = (longs.length / Long.BYTES) - 1;
while (low <= high) {
int mid = (low + high) >>> 1;
long midVal = TypeConvert.unpackLong(longs, mid * Long.BYTES);
if (midVal < value)
low = mid + 1;
else if (midVal > value)
high = mid - 1;
else
return mid * Long.BYTES; // key found
}
return -(low * Long.BYTES + 1); // key not found.
}
}
© 2015 - 2024 Weber Informatics LLC | Privacy Policy