org.lumongo.server.index.LumongoIndex Maven / Gradle / Ivy
The newest version!
package org.lumongo.server.index;
import com.hazelcast.core.IExecutorService;
import com.hazelcast.core.ILock;
import com.hazelcast.core.Member;
import com.mongodb.BasicDBObject;
import com.mongodb.MongoClient;
import com.mongodb.client.MongoCollection;
import com.mongodb.client.MongoDatabase;
import com.mongodb.client.model.UpdateOptions;
import info.debatty.java.lsh.SuperBit;
import org.apache.commons.pool.BasePoolableObjectFactory;
import org.apache.commons.pool.impl.GenericObjectPool;
import org.apache.log4j.Logger;
import org.apache.lucene.analysis.miscellaneous.PerFieldAnalyzerWrapper;
import org.apache.lucene.facet.FacetsConfig;
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter;
import org.apache.lucene.index.IndexCommit;
import org.apache.lucene.index.IndexDeletionPolicy;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.queryparser.classic.QueryParser.Operator;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.BoostQuery;
import org.apache.lucene.search.FieldDoc;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.MMapDirectory;
import org.apache.lucene.store.NRTCachingDirectory;
import org.apache.lucene.util.BytesRef;
import org.bson.Document;
import org.lumongo.LumongoConstants;
import org.lumongo.cluster.message.Lumongo;
import org.lumongo.cluster.message.Lumongo.AssociatedDocument;
import org.lumongo.cluster.message.Lumongo.DeleteRequest;
import org.lumongo.cluster.message.Lumongo.FetchType;
import org.lumongo.cluster.message.Lumongo.FieldConfig;
import org.lumongo.cluster.message.Lumongo.FieldSort;
import org.lumongo.cluster.message.Lumongo.GetFieldNamesResponse;
import org.lumongo.cluster.message.Lumongo.GetNumberOfDocsResponse;
import org.lumongo.cluster.message.Lumongo.GetTermsRequest;
import org.lumongo.cluster.message.Lumongo.GetTermsResponse;
import org.lumongo.cluster.message.Lumongo.GetTermsResponseInternal;
import org.lumongo.cluster.message.Lumongo.HighlightRequest;
import org.lumongo.cluster.message.Lumongo.IndexSegmentResponse;
import org.lumongo.cluster.message.Lumongo.IndexSettings;
import org.lumongo.cluster.message.Lumongo.LastIndexResult;
import org.lumongo.cluster.message.Lumongo.LastResult;
import org.lumongo.cluster.message.Lumongo.QueryRequest;
import org.lumongo.cluster.message.Lumongo.ResultDocument;
import org.lumongo.cluster.message.Lumongo.ScoredResult;
import org.lumongo.cluster.message.Lumongo.SegmentCountResponse;
import org.lumongo.cluster.message.Lumongo.SegmentResponse;
import org.lumongo.cluster.message.Lumongo.SortRequest;
import org.lumongo.cluster.message.Lumongo.SortValue;
import org.lumongo.cluster.message.Lumongo.SortValues;
import org.lumongo.cluster.message.Lumongo.StoreRequest;
import org.lumongo.server.config.ClusterConfig;
import org.lumongo.server.config.IndexConfig;
import org.lumongo.server.config.IndexConfigUtil;
import org.lumongo.server.config.MongoConfig;
import org.lumongo.server.exceptions.InvalidIndexConfig;
import org.lumongo.server.exceptions.SegmentDoesNotExist;
import org.lumongo.server.hazelcast.HazelcastManager;
import org.lumongo.server.hazelcast.UpdateSegmentsTask;
import org.lumongo.server.search.LumongoMultiFieldQueryParser;
import org.lumongo.server.search.QueryCacheKey;
import org.lumongo.server.search.QueryWithFilters;
import org.lumongo.storage.constants.MongoConstants;
import org.lumongo.storage.lucene.DistributedDirectory;
import org.lumongo.storage.lucene.MongoDirectory;
import org.lumongo.storage.rawfiles.DocumentStorage;
import org.lumongo.storage.rawfiles.MongoDocumentStorage;
import org.lumongo.util.DeletingFileVisitor;
import org.lumongo.util.LockHandler;
import org.lumongo.util.LumongoThreadFactory;
import org.lumongo.util.LumongoUtil;
import org.lumongo.util.SegmentUtil;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.Timer;
import java.util.TimerTask;
import java.util.TreeSet;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.locks.ReadWriteLock;
import java.util.concurrent.locks.ReentrantReadWriteLock;
public class LumongoIndex implements IndexSegmentInterface {
public static final String CONFIG_SUFFIX = "_config";
private static final String STORAGE_DB_SUFFIX = "_rs";
private static final String RESULT_STORAGE_COLLECTION = "resultStorage";
private final static Logger log = Logger.getLogger(LumongoIndex.class);
private static final String SETTINGS_ID = "settings";
private final IndexConfig indexConfig;
private final MongoConfig mongoConfig;
private final ClusterConfig clusterConfig;
private final MongoClient mongo;
private final GenericObjectPool parsers;
private final ConcurrentHashMap segmentMap;
private final ConcurrentHashMap hazelLockMap;
private final ReadWriteLock indexLock;
private final ExecutorService segmentPool;
private final int numberOfSegments;
private final String indexName;
private final HazelcastManager hazelcastManager;
private final DocumentStorage documentStorage;
private Map> memberToSegmentMap;
private Map segmentToMemberMap;
private Timer commitTimer;
private TimerTask commitTask;
private LumongoAnalyzerFactory lumongoAnalyzerFactory;
private LockHandler documentLockHandler;
private FacetsConfig facetsConfig;
private LumongoIndex(HazelcastManager hazelcastManger, MongoConfig mongoConfig, ClusterConfig clusterConfig, IndexConfig indexConfig) throws Exception {
this.documentLockHandler = new LockHandler();
this.hazelcastManager = hazelcastManger;
this.mongoConfig = mongoConfig;
this.clusterConfig = clusterConfig;
this.indexConfig = indexConfig;
this.indexName = indexConfig.getIndexName();
this.numberOfSegments = indexConfig.getNumberOfSegments();
this.mongo = new MongoClient(mongoConfig.getMongoHost(), mongoConfig.getMongoPort());
MongoClient storageMongoClient = new MongoClient(mongoConfig.getMongoHost(), mongoConfig.getMongoPort());
String rawStorageDb = mongoConfig.getDatabaseName() + "_" + indexName + STORAGE_DB_SUFFIX;
this.documentStorage = new MongoDocumentStorage(storageMongoClient, indexName, rawStorageDb, RESULT_STORAGE_COLLECTION, clusterConfig.isSharded());
this.segmentPool = Executors.newCachedThreadPool(new LumongoThreadFactory(indexName + "-segments"));
this.parsers = new GenericObjectPool<>(new BasePoolableObjectFactory() {
@Override
public LumongoMultiFieldQueryParser makeObject() throws Exception {
return new LumongoMultiFieldQueryParser(getPerFieldAnalyzer(), LumongoIndex.this.indexConfig);
}
});
this.indexLock = new ReentrantReadWriteLock(true);
this.segmentMap = new ConcurrentHashMap<>();
this.hazelLockMap = new ConcurrentHashMap<>();
commitTimer = new Timer(indexName + "-CommitTimer", true);
commitTask = new TimerTask() {
@Override
public void run() {
if (LumongoIndex.this.indexConfig.getIndexSettings().getIdleTimeWithoutCommit() != 0) {
doCommit(false);
}
}
};
commitTimer.scheduleAtFixedRate(commitTask, 1000, 1000);
this.lumongoAnalyzerFactory = new LumongoAnalyzerFactory(indexConfig);
}
public static IndexConfig loadIndexSettings(MongoClient mongo, String database, String indexName) throws InvalidIndexConfig {
MongoDatabase db = mongo.getDatabase(database);
MongoCollection dbCollection = db.getCollection(indexName + CONFIG_SUFFIX);
Document settings = dbCollection.find(new BasicDBObject(MongoConstants.StandardFields._ID, SETTINGS_ID)).first();
if (settings == null) {
throw new InvalidIndexConfig(indexName, "Index settings not found");
}
return IndexConfigUtil.fromDocument(settings);
}
public static LumongoIndex loadIndex(HazelcastManager hazelcastManager, MongoConfig mongoConfig, MongoClient mongo, ClusterConfig clusterConfig,
String indexName) throws Exception {
IndexConfig indexConfig = loadIndexSettings(mongo, mongoConfig.getDatabaseName(), indexName);
log.info("Loading index <" + indexName + ">");
return new LumongoIndex(hazelcastManager, mongoConfig, clusterConfig, indexConfig);
}
public static LumongoIndex createIndex(HazelcastManager hazelcastManager, MongoConfig mongoConfig, ClusterConfig clusterConfig, IndexConfig indexConfig)
throws Exception {
LumongoIndex i = new LumongoIndex(hazelcastManager, mongoConfig, clusterConfig, indexConfig);
i.storeIndexSettings();
return i;
}
/** From org.apache.solr.search.QueryUtils **/
public static boolean isNegative(Query q) {
if (!(q instanceof BooleanQuery))
return false;
BooleanQuery bq = (BooleanQuery) q;
Collection clauses = bq.clauses();
if (clauses.size() == 0)
return false;
for (BooleanClause clause : clauses) {
if (!clause.isProhibited())
return false;
}
return true;
}
/** Fixes a negative query by adding a MatchAllDocs query clause.
* The query passed in *must* be a negative query.
*/
public static Query fixNegativeQuery(Query q) {
float boost = 1f;
if (q instanceof BoostQuery) {
BoostQuery bq = (BoostQuery) q;
boost = bq.getBoost();
q = bq.getQuery();
}
BooleanQuery bq = (BooleanQuery) q;
BooleanQuery.Builder newBqB = new BooleanQuery.Builder();
newBqB.setDisableCoord(bq.isCoordDisabled());
newBqB.setMinimumNumberShouldMatch(bq.getMinimumNumberShouldMatch());
for (BooleanClause clause : bq) {
newBqB.add(clause);
}
newBqB.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST);
BooleanQuery newBq = newBqB.build();
return new BoostQuery(newBq, boost);
}
public void updateIndexSettings(IndexSettings request) {
indexLock.writeLock().lock();
try {
indexConfig.configure(request);
storeIndexSettings();
}
finally {
indexLock.writeLock().unlock();
}
}
public FieldConfig.FieldType getSortFieldType(String fieldName) {
return indexConfig.getFieldTypeForSortField(fieldName);
}
private void doCommit(boolean force) {
indexLock.readLock().lock();
try {
Collection segments = segmentMap.values();
for (LumongoSegment segment : segments) {
try {
if (force) {
segment.forceCommit();
}
else {
segment.doCommit();
}
}
catch (Exception e) {
log.error("Failed to flush segment <" + segment.getSegmentNumber() + "> for index <" + indexName + ">: " + e.getClass().getSimpleName()
+ ": ", e);
}
}
}
finally {
indexLock.readLock().unlock();
}
}
public void updateSegmentMap(Map> newMemberToSegmentMap) {
indexLock.writeLock().lock();
try {
log.info("Updating segments map");
this.memberToSegmentMap = newMemberToSegmentMap;
this.segmentToMemberMap = new HashMap<>();
for (Member m : memberToSegmentMap.keySet()) {
for (int i : memberToSegmentMap.get(m)) {
segmentToMemberMap.put(i, m);
}
}
Member self = hazelcastManager.getSelf();
Set newSegments = memberToSegmentMap.get(self);
log.info("Settings segments for this node <" + self + "> to <" + newSegments + ">");
segmentMap.keySet().stream().filter(segmentNumber -> !newSegments.contains(segmentNumber)).forEach(segmentNumber -> {
try {
unloadSegment(segmentNumber, false);
}
catch (Exception e) {
log.error("Error unloading segment <" + segmentNumber + "> for index <" + indexName + ">");
log.error(e.getClass().getSimpleName() + ": ", e);
}
});
newSegments.stream().filter(segmentNumber -> !segmentMap.containsKey(segmentNumber)).forEach(segmentNumber -> {
try {
loadSegment(segmentNumber);
}
catch (Exception e) {
log.error("Error loading segment <" + segmentNumber + "> for index <" + indexName + ">");
log.error(e.getClass().getSimpleName() + ": ", e);
}
});
}
finally {
indexLock.writeLock().unlock();
}
}
public void loadAllSegments() throws Exception {
indexLock.writeLock().lock();
try {
Member self = hazelcastManager.getSelf();
this.memberToSegmentMap = new HashMap<>();
this.memberToSegmentMap.put(self, new HashSet<>());
for (int segmentNumber = 0; segmentNumber < numberOfSegments; segmentNumber++) {
loadSegment(segmentNumber);
this.memberToSegmentMap.get(self).add(segmentNumber);
}
this.segmentToMemberMap = new HashMap<>();
for (Member m : memberToSegmentMap.keySet()) {
for (int i : memberToSegmentMap.get(m)) {
segmentToMemberMap.put(i, m);
}
}
}
finally {
indexLock.writeLock().unlock();
}
}
public void unload(boolean terminate) throws IOException {
indexLock.writeLock().lock();
try {
log.info("Canceling timers for <" + indexName + ">");
commitTask.cancel();
commitTimer.cancel();
if (!terminate) {
log.info("Committing <" + indexName + ">");
doCommit(true);
}
log.info("Shutting segment pool for <" + indexName + ">");
segmentPool.shutdownNow();
for (Integer segmentNumber : segmentMap.keySet()) {
unloadSegment(segmentNumber, terminate);
}
}
finally {
indexLock.writeLock().unlock();
}
}
private void loadSegment(int segmentNumber) throws Exception {
indexLock.writeLock().lock();
try {
if (!segmentMap.containsKey(segmentNumber)) {
String lockName = indexName + "-" + segmentNumber;
ILock hzLock = hazelcastManager.getLock(lockName);
hazelLockMap.put(segmentNumber, hzLock);
log.info("Waiting for lock for index <" + indexName + "> segment <" + segmentNumber + ">");
hzLock.lock();
log.info("Obtained lock for index <" + indexName + "> segment <" + segmentNumber + ">");
//Just for clarity
IndexSegmentInterface indexSegmentInterface = this;
//doesnt need to be done each time and it is done in StartNode but helps with test cases that take different paths
FacetsConfig.DEFAULT_DIM_CONFIG.multiValued = true;
facetsConfig = new FacetsConfig();
LumongoSegment s = new LumongoSegment(segmentNumber, indexSegmentInterface, indexConfig, facetsConfig, documentStorage);
segmentMap.put(segmentNumber, s);
log.info("Loaded segment <" + segmentNumber + "> for index <" + indexName + ">");
log.info("Current segments <" + (new TreeSet<>(segmentMap.keySet())) + "> for index <" + indexName + ">");
}
}
finally {
indexLock.writeLock().unlock();
}
}
public IndexWriter getIndexWriter(int segmentNumber) throws Exception {
Directory d;
if (indexConfig.getIndexSettings().getStoreIndexOnDisk()) {
d = MMapDirectory.open(getPathForIndex(segmentNumber));
}
else {
String indexSegmentDbName = getIndexSegmentDbName(segmentNumber);
String indexSegmentCollectionName = getIndexSegmentCollectionName(segmentNumber) + "_index";
MongoDirectory mongoDirectory = new MongoDirectory(mongo, indexSegmentDbName, indexSegmentCollectionName, clusterConfig.isSharded(),
clusterConfig.getIndexBlockSize());
d = new DistributedDirectory(mongoDirectory);
}
IndexWriterConfig config = new IndexWriterConfig(getPerFieldAnalyzer());
config.setMaxBufferedDocs(Integer.MAX_VALUE);
config.setRAMBufferSizeMB(100);
config.setIndexDeletionPolicy(new IndexDeletionPolicy() {
public void onInit(List extends IndexCommit> commits) {
// Note that commits.size() should normally be 1:
onCommit(commits);
}
/**
* Deletes all commits except the most recent one.
*/
@Override
public void onCommit(List extends IndexCommit> commits) {
// Note that commits.size() should normally be 2 (if not
// called by onInit above):
int size = commits.size();
for (int i = 0; i < size - 1; i++) {
//log.info("Deleting old commit for segment <" + segmentNumber + "> on index <" + indexName);
commits.get(i).delete();
}
}
});
//ConcurrentMergeScheduler concurrentMergeScheduler = new ConcurrentMergeScheduler();
//concurrentMergeScheduler.setMaxMergesAndThreads(8,2);
//config.setMergeScheduler(concurrentMergeScheduler);
config.setUseCompoundFile(false);
NRTCachingDirectory nrtCachingDirectory = new NRTCachingDirectory(d, 15, 90);
return new IndexWriter(nrtCachingDirectory, config);
}
private Path getPathForIndex(int segmentNumber) {
return Paths.get("indexes", indexName + "_" + segmentNumber + "_idx");
}
private Path getPathForFacetsIndex(int segmentNumber) {
return Paths.get("indexes", indexName + "_" + segmentNumber + "_facets");
}
public DirectoryTaxonomyWriter getTaxoWriter(int segmentNumber) throws IOException {
Directory d;
if (indexConfig.getIndexSettings().getStoreIndexOnDisk()) {
d = MMapDirectory.open(getPathForFacetsIndex(segmentNumber));
}
else {
String indexSegmentDbName = getIndexSegmentDbName(segmentNumber);
String indexSegmentCollectionName = getIndexSegmentCollectionName(segmentNumber) + "_facets";
MongoDirectory mongoDirectory = new MongoDirectory(mongo, indexSegmentDbName, indexSegmentCollectionName, clusterConfig.isSharded(),
clusterConfig.getIndexBlockSize());
d = new DistributedDirectory(mongoDirectory);
}
NRTCachingDirectory nrtCachingDirectory = new NRTCachingDirectory(d, 2, 10);
return new DirectoryTaxonomyWriter(nrtCachingDirectory);
}
public PerFieldAnalyzerWrapper getPerFieldAnalyzer() throws Exception {
return lumongoAnalyzerFactory.getPerFieldAnalyzer();
}
private String getIndexSegmentCollectionName(int segmentNumber) {
return indexName + "_" + segmentNumber;
}
private String getIndexSegmentDbName(int segmentNumber) {
return mongoConfig.getDatabaseName() + "_" + indexName;
}
public void unloadSegment(int segmentNumber, boolean terminate) throws IOException {
indexLock.writeLock().lock();
try {
ILock hzLock = hazelLockMap.get(segmentNumber);
try {
if (segmentMap.containsKey(segmentNumber)) {
LumongoSegment s = segmentMap.remove(segmentNumber);
if (s != null) {
log.info("Closing segment <" + segmentNumber + "> for index <" + indexName + ">");
s.close(terminate);
log.info("Removed segment <" + segmentNumber + "> for index <" + indexName + ">");
log.info("Current segments <" + (new TreeSet<>(segmentMap.keySet())) + "> for index <" + indexName + ">");
}
}
}
finally {
try {
hzLock.forceUnlock();
log.info("Unlocked lock for index <" + indexName + "> segment <" + segmentNumber + ">");
}
catch (Exception e) {
log.error("Failed to unlock <" + segmentNumber + ">: ", e);
}
}
}
finally {
indexLock.writeLock().unlock();
}
}
/**
* called on older cluster node when a new member is added
*
* @param currentMembers
* - current cluster members
* @param memberAdded
* - member that is being added
*/
public void handleServerAdded(Set currentMembers, Member memberAdded) {
indexLock.writeLock().lock();
try {
forceBalance(currentMembers);
}
finally {
indexLock.writeLock().unlock();
}
}
public void forceBalance(Set currentMembers) {
indexLock.writeLock().lock();
try {
mapSanityCheck(currentMembers);
balance(currentMembers);
IExecutorService executorService = hazelcastManager.getExecutorService();
List> results = new ArrayList<>();
for (Member m : currentMembers) {
try {
UpdateSegmentsTask ust = new UpdateSegmentsTask(m.getSocketAddress().getPort(), indexName, memberToSegmentMap);
if (!m.localMember()) {
Future dt = executorService.submitToMember(ust, m);
results.add(dt);
}
}
catch (Exception e) {
log.error(e.getClass().getSimpleName() + ": ", e);
}
}
try {
UpdateSegmentsTask ust = new UpdateSegmentsTask(hazelcastManager.getHazelcastPort(), indexName, memberToSegmentMap);
ust.call();
}
catch (Exception e) {
log.error(e.getClass().getSimpleName() + ": ", e);
}
for (Future result : results) {
try {
result.get();
}
catch (Exception e) {
log.error(e.getClass().getSimpleName() + ": ", e);
}
}
}
finally {
indexLock.writeLock().unlock();
}
}
/**
* Called on older cluster node when member is removed
*
* @param currentMembers
* - current cluster members
* @param memberRemoved
* - member that is being removed
*/
public void handleServerRemoved(Set currentMembers, Member memberRemoved) {
indexLock.writeLock().lock();
try {
Set segmentsToRedist = memberToSegmentMap.remove(memberRemoved);
if (segmentsToRedist != null) {
Member first = currentMembers.iterator().next();
memberToSegmentMap.get(first).addAll(segmentsToRedist);
}
forceBalance(currentMembers);
}
finally {
indexLock.writeLock().unlock();
}
}
private void balance(Set currentMembers) {
indexLock.writeLock().lock();
try {
boolean balanced = false;
do {
int minSegmentsForMember = Integer.MAX_VALUE;
int maxSegmentsForMember = Integer.MIN_VALUE;
Member minMember = null;
Member maxMember = null;
for (Member m : currentMembers) {
int segmentsForMemberCount = 0;
Set segmentsForMember = memberToSegmentMap.get(m);
if (segmentsForMember != null) {
segmentsForMemberCount = segmentsForMember.size();
}
if (segmentsForMemberCount < minSegmentsForMember) {
minSegmentsForMember = segmentsForMemberCount;
minMember = m;
}
if (segmentsForMemberCount > maxSegmentsForMember) {
maxSegmentsForMember = segmentsForMemberCount;
maxMember = m;
}
}
if ((maxSegmentsForMember - minSegmentsForMember) > 1) {
int valueToMove = memberToSegmentMap.get(maxMember).iterator().next();
log.info("Moving segment <" + valueToMove + "> from <" + maxMember + "> to <" + minMember + "> of Index <" + indexName + ">");
memberToSegmentMap.get(maxMember).remove(valueToMove);
if (!memberToSegmentMap.containsKey(minMember)) {
memberToSegmentMap.put(minMember, new HashSet<>());
}
memberToSegmentMap.get(minMember).add(valueToMove);
}
else {
balanced = true;
}
}
while (!balanced);
}
finally {
indexLock.writeLock().unlock();
}
}
private void mapSanityCheck(Set currentMembers) {
indexLock.writeLock().lock();
try {
// add all segments to a set
Set allSegments = new HashSet<>();
for (int segment = 0; segment < indexConfig.getNumberOfSegments(); segment++) {
allSegments.add(segment);
}
// ensure all members are in the map and contain an empty set
for (Member m : currentMembers) {
if (!memberToSegmentMap.containsKey(m)) {
memberToSegmentMap.put(m, new HashSet<>());
}
if (memberToSegmentMap.get(m) == null) {
memberToSegmentMap.put(m, new HashSet<>());
}
}
// get all members of the map
Set mapMembers = memberToSegmentMap.keySet();
for (Member m : mapMembers) {
// get current segments
Set segments = memberToSegmentMap.get(m);
Set invalidSegments = new HashSet<>();
// check if valid segment
segments.stream().filter(segment -> !allSegments.contains(segment)).forEach(segment -> {
if ((segment < 0) || (segment >= indexConfig.getNumberOfSegments())) {
log.error("Segment <" + segment + "> should not exist for cluster");
}
else {
log.error("Segment <" + segment + "> is duplicated in node <" + m + ">");
}
invalidSegments.add(segment);
});
// remove any invalid segments for the cluster
segments.removeAll(invalidSegments);
// remove from all segments to keep track of segments already used
allSegments.removeAll(segments);
}
// adds any segments that are missing back to the first node
if (!allSegments.isEmpty()) {
log.error("Segments <" + allSegments + "> are missing from the cluster. Adding back in.");
memberToSegmentMap.values().iterator().next().addAll(allSegments);
}
}
finally {
indexLock.writeLock().unlock();
}
}
public LumongoSegment findSegmentFromUniqueId(String uniqueId) throws SegmentDoesNotExist {
indexLock.readLock().lock();
try {
int segmentNumber = getSegmentNumberForUniqueId(uniqueId);
LumongoSegment s = segmentMap.get(segmentNumber);
if (s == null) {
throw new SegmentDoesNotExist(indexName, segmentNumber);
}
return s;
}
finally {
indexLock.readLock().unlock();
}
}
public Member findMember(String uniqueId) {
indexLock.readLock().lock();
try {
int segmentNumber = getSegmentNumberForUniqueId(uniqueId);
return segmentToMemberMap.get(segmentNumber);
}
finally {
indexLock.readLock().unlock();
}
}
public Map getSegmentToMemberMap() {
return new HashMap<>(segmentToMemberMap);
}
private int getSegmentNumberForUniqueId(String uniqueId) {
int numSegments = indexConfig.getNumberOfSegments();
return SegmentUtil.findSegmentForUniqueId(uniqueId, numSegments);
}
public void deleteIndex() throws Exception {
{
MongoDatabase db = mongo.getDatabase(mongoConfig.getDatabaseName());
MongoCollection dbCollection = db.getCollection(indexName + CONFIG_SUFFIX);
dbCollection.drop();
}
if (indexConfig.getIndexSettings().getStoreIndexOnDisk()) {
for (int i = 0; i < numberOfSegments; i++) {
{
Path p = getPathForIndex(i);
Files.walkFileTree(p, new DeletingFileVisitor());
}
{
Path p = getPathForFacetsIndex(i);
Files.walkFileTree(p, new DeletingFileVisitor());
}
}
}
else {
for (int i = 0; i < numberOfSegments; i++) {
String dbName = getIndexSegmentDbName(i);
String collectionName = getIndexSegmentCollectionName(i);
MongoDirectory.dropIndex(mongo, dbName, collectionName);
}
}
documentStorage.drop();
for (int i = 0; i < numberOfSegments; i++) {
String dbName = getIndexSegmentDbName(i);
MongoDatabase db = mongo.getDatabase(dbName);
db.drop();
}
}
public void storeInternal(StoreRequest storeRequest) throws Exception {
indexLock.readLock().lock();
try {
long timestamp = hazelcastManager.getClusterTime();
String uniqueId = storeRequest.getUniqueId();
ReadWriteLock documentLock = documentLockHandler.getLock(uniqueId);
try {
documentLock.writeLock().lock();
if (storeRequest.hasResultDocument()) {
ResultDocument resultDocument = storeRequest.getResultDocument();
Document document;
if (resultDocument.hasDocument()) {
document = LumongoUtil.byteArrayToMongoDocument(resultDocument.getDocument().toByteArray());
}
else {
document = new Document();
}
LumongoSegment s = findSegmentFromUniqueId(uniqueId);
s.index(uniqueId, timestamp, document, resultDocument.getMetadataList());
if (indexConfig.getIndexSettings().getStoreDocumentInMongo()) {
documentStorage.storeSourceDocument(storeRequest.getUniqueId(), timestamp, document, resultDocument.getMetadataList());
}
}
if (storeRequest.getClearExistingAssociated()) {
documentStorage.deleteAssociatedDocuments(uniqueId);
}
for (AssociatedDocument ad : storeRequest.getAssociatedDocumentList()) {
ad = AssociatedDocument.newBuilder(ad).setTimestamp(timestamp).build();
documentStorage.storeAssociatedDocument(ad);
}
}
finally {
documentLock.writeLock().unlock();
}
}
finally {
indexLock.readLock().unlock();
}
}
/** From org.apache.solr.search.QueryUtils **/
public void deleteDocument(DeleteRequest deleteRequest) throws Exception {
indexLock.readLock().lock();
try {
String uniqueId = deleteRequest.getUniqueId();
ReadWriteLock documentLock = documentLockHandler.getLock(uniqueId);
try {
documentLock.writeLock().lock();
if (deleteRequest.getDeleteDocument()) {
LumongoSegment s = findSegmentFromUniqueId(deleteRequest.getUniqueId());
s.deleteDocument(uniqueId);
documentStorage.deleteSourceDocument(uniqueId);
}
if (deleteRequest.getDeleteAllAssociated()) {
documentStorage.deleteAssociatedDocuments(uniqueId);
}
else if (deleteRequest.hasFilename()) {
String fileName = deleteRequest.getFilename();
documentStorage.deleteAssociatedDocument(uniqueId, fileName);
}
}
finally {
documentLock.writeLock().unlock();
}
}
finally {
indexLock.readLock().unlock();
}
}
public void handleCosineSimQuery(QueryWithFilters queryWithFilters, Lumongo.CosineSimRequest cosineSimRequest) {
indexLock.readLock().lock();
try {
double vector[] = new double[cosineSimRequest.getVectorCount()];
for (int i = 0; i < cosineSimRequest.getVectorCount(); i++) {
vector[i] = cosineSimRequest.getVector(i);
}
SuperBit superBit = indexConfig.getSuperBitForField(cosineSimRequest.getField());
boolean[] signature = superBit.signature(vector);
int mm = (int) ((1 - (Math.acos(cosineSimRequest.getSimilarity()) / Math.PI)) * signature.length);
BooleanQuery.Builder booleanQueryBuilder = new BooleanQuery.Builder();
booleanQueryBuilder.setMinimumNumberShouldMatch(mm);
for (int i = 0; i < signature.length; i++) {
String fieldName = LumongoConstants.SUPERBIT_PREFIX + "." + cosineSimRequest.getField() + "." + i;
booleanQueryBuilder.add(new BooleanClause(new TermQuery(new org.apache.lucene.index.Term(fieldName, signature[i] ? "1" : "0")),
BooleanClause.Occur.SHOULD));
queryWithFilters.addSimilarityOverride(
Lumongo.FieldSimilarity.newBuilder().setField(fieldName).setSimilarity(Lumongo.AnalyzerSettings.Similarity.CONSTANT).build());
}
BooleanQuery query = booleanQueryBuilder.build();
queryWithFilters.addScoredFilterQuery(query);
}
finally {
indexLock.readLock().unlock();
}
}
public Query getQuery(Lumongo.Query lumongoQuery) throws Exception {
indexLock.readLock().lock();
Lumongo.Query.Operator defaultOperator = lumongoQuery.getDefaultOp();
String queryText = lumongoQuery.getQ();
Integer minimumShouldMatchNumber = lumongoQuery.getMm();
List queryFields = lumongoQuery.getQfList();
try {
Operator operator = null;
if (defaultOperator.equals(Lumongo.Query.Operator.OR)) {
operator = Operator.OR;
}
else if (defaultOperator.equals(Lumongo.Query.Operator.AND)) {
operator = Operator.AND;
}
else {
//this should never happen
log.error("Unknown operator type: <" + defaultOperator + ">");
}
LumongoMultiFieldQueryParser qp = null;
if (queryText == null || queryText.isEmpty()) {
if (queryFields.isEmpty()) {
return new MatchAllDocsQuery();
}
else {
queryText = "*";
}
}
try {
qp = parsers.borrowObject();
qp.setMinimumNumberShouldMatch(minimumShouldMatchNumber);
qp.setDefaultOperator(operator);
if (lumongoQuery.getDismax()) {
qp.enableDismax(lumongoQuery.getDismaxTie());
}
else {
qp.disableDismax();
}
if (queryFields.isEmpty()) {
qp.setDefaultField(indexConfig.getIndexSettings().getDefaultSearchField());
}
else {
Set fields = new LinkedHashSet<>();
HashMap boostMap = new HashMap<>();
for (String queryField : queryFields) {
if (queryField.contains("^")) {
try {
float boost = Float.parseFloat(queryField.substring(queryField.indexOf("^") + 1));
queryField = queryField.substring(0, queryField.indexOf("^"));
boostMap.put(queryField, boost);
}
catch (Exception e) {
throw new IllegalArgumentException("Invalid queryText field boost <" + queryField + ">");
}
}
fields.add(queryField);
}
qp.setDefaultFields(fields, boostMap);
}
Query query = qp.parse(queryText);
boolean negative = isNegative(query);
if (negative) {
query = fixNegativeQuery(query);
}
return query;
}
finally {
parsers.returnObject(qp);
}
}
finally {
indexLock.readLock().unlock();
}
}
public IndexSegmentResponse queryInternal(final QueryWithFilters queryWithFilters, final QueryRequest queryRequest) throws Exception {
indexLock.readLock().lock();
try {
int amount = queryRequest.getAmount() + queryRequest.getStart();
if (indexConfig.getNumberOfSegments() != 1) {
if (!queryRequest.getFetchFull() && (amount > 0)) {
amount = (int) (((amount / numberOfSegments) + indexConfig.getIndexSettings().getMinSegmentRequest()) * indexConfig.getIndexSettings()
.getRequestFactor());
}
}
final int requestedAmount = amount;
final HashMap lastScoreDocMap = new HashMap<>();
FieldDoc after;
LastResult lr = queryRequest.getLastResult();
if (lr != null) {
for (LastIndexResult lir : lr.getLastIndexResultList()) {
if (indexName.equals(lir.getIndexName())) {
for (ScoredResult sr : lir.getLastForSegmentList()) {
int docId = sr.getDocId();
float score = sr.getScore();
SortRequest sortRequest = queryRequest.getSortRequest();
Object[] sortTerms = new Object[sortRequest.getFieldSortCount()];
int sortTermsIndex = 0;
SortValues sortValues = sr.getSortValues();
for (FieldSort fs : sortRequest.getFieldSortList()) {
String sortField = fs.getSortField();
FieldConfig.FieldType sortType = indexConfig.getFieldTypeForSortField(sortField);
if (sortType == null) {
throw new Exception(sortField + " is not defined as a sortable field");
}
SortValue sortValue = sortValues.getSortValue(sortTermsIndex);
if (sortValue.getExists()) {
if (IndexConfigUtil.isNumericOrDateFieldType(sortType)) {
if (IndexConfigUtil.isNumericIntFieldType(sortType)) {
sortTerms[sortTermsIndex] = sortValue.getIntegerValue();
}
else if (IndexConfigUtil.isNumericLongFieldType(sortType)) {
sortTerms[sortTermsIndex] = sortValue.getLongValue();
}
else if (IndexConfigUtil.isNumericFloatFieldType(sortType)) {
sortTerms[sortTermsIndex] = sortValue.getFloatValue();
}
else if (IndexConfigUtil.isNumericDoubleFieldType(sortType)) {
sortTerms[sortTermsIndex] = sortValue.getDoubleValue();
}
else if (IndexConfigUtil.isDateFieldType(sortType)) {
sortTerms[sortTermsIndex] = sortValue.getDateValue();
}
else {
throw new Exception("Invalid numeric sort type <" + sortType + "> for sort field <" + sortField + ">");
}
}
else { //string
sortTerms[sortTermsIndex] = new BytesRef(sortValue.getStringValue());
}
}
else {
sortTerms[sortTermsIndex] = null;
}
sortTermsIndex++;
}
after = new FieldDoc(docId, score, sortTerms, sr.getSegment());
lastScoreDocMap.put(sr.getSegment(), after);
}
}
}
}
IndexSegmentResponse.Builder builder = IndexSegmentResponse.newBuilder();
List> responses = new ArrayList<>();
for (final LumongoSegment segment : segmentMap.values()) {
Future response = segmentPool.submit(() -> {
QueryCacheKey queryCacheKey = null;
if (!queryRequest.getDontCache()) {
queryCacheKey = new QueryCacheKey(queryRequest);
}
return segment
.querySegment(queryWithFilters, requestedAmount, lastScoreDocMap.get(segment.getSegmentNumber()), queryRequest.getFacetRequest(),
queryRequest.getSortRequest(), queryCacheKey, queryRequest.getResultFetchType(), queryRequest.getDocumentFieldsList(),
queryRequest.getDocumentMaskedFieldsList(), queryRequest.getHighlightRequestList(), queryRequest.getAnalysisRequestList(),
queryRequest.getDebug());
});
responses.add(response);
}
for (Future response : responses) {
try {
SegmentResponse rs = response.get();
builder.addSegmentReponse(rs);
}
catch (ExecutionException e) {
Throwable t = e.getCause();
if (t instanceof OutOfMemoryError) {
throw (OutOfMemoryError) t;
}
throw ((Exception) e.getCause());
}
}
builder.setIndexName(indexName);
return builder.build();
}
finally {
indexLock.readLock().unlock();
}
}
public Integer getNumberOfSegments() {
return numberOfSegments;
}
public double getSegmentTolerance() {
return indexConfig.getIndexSettings().getSegmentTolerance();
}
private void storeIndexSettings() {
indexLock.writeLock().lock();
try {
MongoDatabase db = mongo.getDatabase(mongoConfig.getDatabaseName());
MongoCollection dbCollection = db.getCollection(indexConfig.getIndexName() + CONFIG_SUFFIX);
Document settings = IndexConfigUtil.toDocument(indexConfig);
settings.put(MongoConstants.StandardFields._ID, SETTINGS_ID);
Document query = new Document();
query.put(MongoConstants.StandardFields._ID, SETTINGS_ID);
dbCollection.replaceOne(query, settings, new UpdateOptions().upsert(true));
}
finally {
indexLock.writeLock().unlock();
}
}
public void reloadIndexSettings() throws Exception {
indexLock.writeLock().lock();
try {
IndexConfig newIndexConfig = loadIndexSettings(mongo, mongoConfig.getDatabaseName(), indexName);
IndexSettings indexSettings = newIndexConfig.getIndexSettings();
indexConfig.configure(indexSettings);
parsers.clear();
//force analyzer to be fetched first so it doesn't fail only on one segment below
getPerFieldAnalyzer();
for (LumongoSegment s : segmentMap.values()) {
try {
s.updateIndexSettings(indexSettings);
}
catch (Exception ignored) {
}
}
}
finally {
indexLock.writeLock().unlock();
}
}
public void optimize() throws Exception {
indexLock.readLock().lock();
try {
for (final LumongoSegment segment : segmentMap.values()) {
segment.optimize();
}
}
finally {
indexLock.readLock().unlock();
}
}
public GetNumberOfDocsResponse getNumberOfDocs() throws Exception {
indexLock.readLock().lock();
try {
List> responses = new ArrayList<>();
for (final LumongoSegment segment : segmentMap.values()) {
Future response = segmentPool.submit(segment::getNumberOfDocs);
responses.add(response);
}
GetNumberOfDocsResponse.Builder responseBuilder = GetNumberOfDocsResponse.newBuilder();
responseBuilder.setNumberOfDocs(0);
for (Future response : responses) {
try {
SegmentCountResponse scr = response.get();
responseBuilder.addSegmentCountResponse(scr);
responseBuilder.setNumberOfDocs(responseBuilder.getNumberOfDocs() + scr.getNumberOfDocs());
}
catch (InterruptedException e) {
throw new Exception("Interrupted while waiting for segment results");
}
catch (Exception e) {
Throwable cause = e.getCause();
if (cause instanceof Exception) {
throw e;
}
throw e;
}
}
return responseBuilder.build();
}
finally {
indexLock.readLock().unlock();
}
}
public GetFieldNamesResponse getFieldNames() throws Exception {
indexLock.readLock().lock();
try {
List> responses = new ArrayList<>();
for (final LumongoSegment segment : segmentMap.values()) {
Future response = segmentPool.submit(segment::getFieldNames);
responses.add(response);
}
GetFieldNamesResponse.Builder responseBuilder = GetFieldNamesResponse.newBuilder();
Set fields = new HashSet<>();
for (Future response : responses) {
try {
GetFieldNamesResponse gfnr = response.get();
fields.addAll(gfnr.getFieldNameList());
}
catch (ExecutionException e) {
Throwable cause = e.getCause();
if (cause instanceof Exception) {
throw e;
}
else {
throw new Exception(cause);
}
}
}
fields.remove(LumongoConstants.TIMESTAMP_FIELD);
fields.remove(LumongoConstants.STORED_DOC_FIELD);
fields.remove(LumongoConstants.STORED_META_FIELD);
fields.remove(LumongoConstants.ID_FIELD);
fields.remove(LumongoConstants.FIELDS_LIST_FIELD);
List toRemove = new ArrayList<>();
for (String field : fields) {
if (field.startsWith(FacetsConfig.DEFAULT_INDEX_FIELD_NAME)) {
toRemove.add(field);
}
if (field.startsWith(LumongoConstants.SUPERBIT_PREFIX)) {
toRemove.add(field);
}
}
fields.removeAll(toRemove);
responseBuilder.addAllFieldName(fields);
return responseBuilder.build();
}
finally {
indexLock.readLock().unlock();
}
}
public void clear() throws Exception {
indexLock.writeLock().lock();
try {
List> responses = new ArrayList<>();
for (final LumongoSegment segment : segmentMap.values()) {
Future response = segmentPool.submit(() -> {
segment.clear();
return null;
});
responses.add(response);
}
for (Future response : responses) {
try {
response.get();
}
catch (ExecutionException e) {
Throwable cause = e.getCause();
if (cause instanceof Exception) {
throw e;
}
else {
throw new Exception(cause);
}
}
}
documentStorage.deleteAllDocuments();
}
finally {
indexLock.writeLock().unlock();
}
}
public GetTermsResponseInternal getTerms(final GetTermsRequest request) throws Exception {
indexLock.readLock().lock();
try {
List> responses = new ArrayList<>();
for (final LumongoSegment segment : segmentMap.values()) {
Future response = segmentPool.submit(() -> segment.getTerms(request));
responses.add(response);
}
GetTermsResponseInternal.Builder getTermsResponseInternalBuilder = GetTermsResponseInternal.newBuilder();
for (Future response : responses) {
try {
GetTermsResponse gtr = response.get();
getTermsResponseInternalBuilder.addGetTermsResponse(gtr);
}
catch (ExecutionException e) {
Throwable cause = e.getCause();
if (cause instanceof Exception) {
throw e;
}
else {
throw new Exception(cause);
}
}
}
return getTermsResponseInternalBuilder.build();
}
finally {
indexLock.readLock().unlock();
}
}
//handle forwarding, and locking here like other store requests
public void storeAssociatedDocument(String uniqueId, String fileName, InputStream is, boolean compress, long clusterTime,
HashMap metadataMap) throws Exception {
indexLock.readLock().lock();
try {
documentStorage.storeAssociatedDocument(uniqueId, fileName, is, compress, clusterTime, metadataMap);
}
finally {
indexLock.readLock().unlock();
}
}
public InputStream getAssociatedDocumentStream(String uniqueId, String fileName) throws IOException {
indexLock.readLock().lock();
try {
return documentStorage.getAssociatedDocumentStream(uniqueId, fileName);
}
finally {
indexLock.readLock().unlock();
}
}
public void getAssociatedDocuments(OutputStream outputStream, Document filter) throws IOException {
indexLock.readLock().lock();
try {
documentStorage.getAssociatedDocuments(outputStream, filter);
}
finally {
indexLock.readLock().unlock();
}
}
public ResultDocument getSourceDocument(String uniqueId, Long timestamp, FetchType resultFetchType, List fieldsToReturn, List fieldsToMask,
List highlightRequests) throws Exception {
indexLock.readLock().lock();
try {
LumongoSegment s = findSegmentFromUniqueId(uniqueId);
return s.getSourceDocument(uniqueId, timestamp, resultFetchType, fieldsToReturn, fieldsToMask);
}
finally {
indexLock.readLock().unlock();
}
}
public AssociatedDocument getAssociatedDocument(String uniqueId, String fileName, FetchType associatedFetchType) throws Exception {
indexLock.readLock().lock();
try {
return documentStorage.getAssociatedDocument(uniqueId, fileName, associatedFetchType);
}
finally {
indexLock.readLock().unlock();
}
}
public List getAssociatedDocuments(String uniqueId, FetchType associatedFetchType) throws Exception {
indexLock.readLock().lock();
try {
return documentStorage.getAssociatedDocuments(uniqueId, associatedFetchType);
}
finally {
indexLock.readLock().unlock();
}
}
}
© 2015 - 2024 Weber Informatics LLC | Privacy Policy