
org.neo4j.index.impl.lucene.legacy.LuceneDataSource Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of neo4j-lucene-index Show documentation
Show all versions of neo4j-lucene-index Show documentation
Integration layer between Neo4j and Lucene, providing one possible implementation of the Index API.
/*
* Copyright (c) 2002-2016 "Neo Technology,"
* Network Engine for Objects in Lund AB [http://neotechnology.com]
*
* This file is part of Neo4j.
*
* Neo4j is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see .
*/
package org.neo4j.index.impl.lucene.legacy;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.core.KeywordAnalyzer;
import org.apache.lucene.analysis.core.LowerCaseFilter;
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexCommit;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.SnapshotDeletionPolicy;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.TopFieldCollector;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.RAMDirectory;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.concurrent.locks.ReentrantReadWriteLock;
import org.neo4j.graphdb.Node;
import org.neo4j.graphdb.Relationship;
import org.neo4j.graphdb.ResourceIterator;
import org.neo4j.graphdb.config.Setting;
import org.neo4j.graphdb.factory.GraphDatabaseSettings;
import org.neo4j.graphdb.index.IndexManager;
import org.neo4j.helpers.collection.Pair;
import org.neo4j.helpers.collection.PrefetchingResourceIterator;
import org.neo4j.io.fs.FileSystemAbstraction;
import org.neo4j.kernel.configuration.Config;
import org.neo4j.kernel.impl.factory.GraphDatabaseFacadeFactory;
import org.neo4j.kernel.impl.index.IndexConfigStore;
import org.neo4j.kernel.impl.index.IndexEntityType;
import org.neo4j.kernel.lifecycle.LifecycleAdapter;
/**
* An DataSource optimized for the {@link LuceneIndexImplementation}.
*/
public class LuceneDataSource extends LifecycleAdapter
{
private final File storeDir;
private final Config config;
private final FileSystemAbstraction fileSystemAbstraction;
public abstract static class Configuration
{
public static final Setting lucene_searcher_cache_size = GraphDatabaseSettings.lucene_searcher_cache_size;
public static final Setting ephemeral = GraphDatabaseFacadeFactory.Configuration.ephemeral;
}
/**
* Default {@link Analyzer} for fulltext parsing.
*/
public static final Analyzer LOWER_CASE_WHITESPACE_ANALYZER = new Analyzer()
{
@Override
protected TokenStreamComponents createComponents( String fieldName )
{
Tokenizer source = new WhitespaceTokenizer();
TokenStream filter = new LowerCaseFilter( source );
return new TokenStreamComponents( source, filter );
}
@Override
public String toString()
{
return "LOWER_CASE_WHITESPACE_ANALYZER";
}
};
public static final Analyzer WHITESPACE_ANALYZER = new WhitespaceAnalyzer();
public static final Analyzer KEYWORD_ANALYZER = new KeywordAnalyzer();
private IndexClockCache indexSearchers;
private File baseStorePath;
private final ReentrantReadWriteLock lock = new ReentrantReadWriteLock();
final IndexConfigStore indexStore;
private IndexTypeCache typeCache;
private boolean closed;
private LuceneFilesystemFacade filesystemFacade;
/**
* Constructs this data source.
*/
public LuceneDataSource( File storeDir, Config config, IndexConfigStore indexStore, FileSystemAbstraction fileSystemAbstraction )
{
this.storeDir = storeDir;
this.config = config;
this.indexStore = indexStore;
this.typeCache = new IndexTypeCache( indexStore );
this.fileSystemAbstraction = fileSystemAbstraction;
}
@Override
public void init()
{
this.filesystemFacade = config.get( Configuration.ephemeral ) ? LuceneFilesystemFacade.MEMORY
: LuceneFilesystemFacade.FS;
indexSearchers = new IndexClockCache( config.get( Configuration.lucene_searcher_cache_size ) );
this.baseStorePath = this.filesystemFacade.ensureDirectoryExists( fileSystemAbstraction,
getLuceneIndexStoreDirectory( storeDir ) );
this.filesystemFacade.cleanWriteLocks( baseStorePath );
this.typeCache = new IndexTypeCache( indexStore );
closed = false;
}
public static File getLuceneIndexStoreDirectory(File storeDir)
{
return new File( storeDir, "index" );
}
IndexType getType( IndexIdentifier identifier, boolean recovery )
{
return typeCache.getIndexType( identifier, recovery );
}
@Override
public void shutdown() throws IOException
{
synchronized ( this )
{
if ( closed )
{
return;
}
closed = true;
for ( IndexReference searcher : indexSearchers.values() )
{
searcher.dispose( true );
}
indexSearchers.clear();
}
}
private synchronized IndexReference[] getAllIndexes()
{
Collection indexReferences = indexSearchers.values();
return indexReferences.toArray( new IndexReference[indexReferences.size()] );
}
void force()
{
for ( IndexReference index : getAllIndexes() )
{
try
{
index.getWriter().commit();
}
catch ( IOException e )
{
throw new RuntimeException( "Unable to commit changes to " + index.getIdentifier(), e );
}
}
}
void getReadLock()
{
lock.readLock().lock();
}
void releaseReadLock()
{
lock.readLock().unlock();
}
void getWriteLock()
{
lock.writeLock().lock();
}
void releaseWriteLock()
{
lock.writeLock().unlock();
}
/**
* If nothing has changed underneath (since the searcher was last created
* or refreshed) {@code searcher} is returned. But if something has changed a
* refreshed searcher is returned. It makes use if the
* {@link DirectoryReader#openIfChanged(DirectoryReader, IndexWriter, boolean)} which faster than opening an index
* from
* scratch.
*
* @param searcher the {@link IndexSearcher} to refresh.
* @return a refreshed version of the searcher or, if nothing has changed,
* {@code null}.
* @throws RuntimeException if there's a problem with the index.
*/
private IndexReference refreshSearcher( IndexReference searcher )
{
try
{
// TODO: this cast should always succeed, maybe check nonetheless?
DirectoryReader reader = (DirectoryReader) searcher.getSearcher().getIndexReader();
IndexWriter writer = searcher.getWriter();
IndexReader reopened = DirectoryReader.openIfChanged( reader, writer );
if ( reopened != null )
{
IndexSearcher newSearcher = newIndexSearcher( searcher.getIdentifier(), reopened );
searcher.detachOrClose();
return new IndexReference( searcher.getIdentifier(), newSearcher, writer );
}
return searcher;
}
catch ( IOException e )
{
throw new RuntimeException( e );
}
}
static File getFileDirectory( File storeDir, IndexEntityType type )
{
File path = new File( storeDir, "lucene" );
String extra = type.nameToLowerCase();
return new File( path, extra );
}
static File getFileDirectory( File storeDir, IndexIdentifier identifier )
{
return new File( getFileDirectory( storeDir, identifier.entityType ), identifier.indexName );
}
static Directory getDirectory( File storeDir, IndexIdentifier identifier ) throws IOException
{
return FSDirectory.open( getFileDirectory( storeDir, identifier ).toPath() );
}
static TopFieldCollector scoringCollector( Sort sorting, int n ) throws IOException
{
return TopFieldCollector.create( sorting, n, false, true, false );
}
IndexReference getIndexSearcher( IndexIdentifier identifier )
{
assertNotClosed();
IndexReference searcher = indexSearchers.get( identifier );
if ( searcher == null )
{
return syncGetIndexSearcher( identifier );
}
synchronized ( searcher )
{
/*
* We need to get again a reference to the searcher because it might be so that
* it was refreshed while we waited. Once in here though no one will mess with
* our searcher
*/
searcher = indexSearchers.get( identifier );
if ( searcher == null || searcher.isClosed() )
{
return syncGetIndexSearcher( identifier );
}
searcher = refreshSearcherIfNeeded( searcher );
searcher.incRef();
return searcher;
}
}
private void assertNotClosed()
{
if ( closed )
{
throw new IllegalStateException( "Lucene index provider has been shut down" );
}
}
synchronized IndexReference syncGetIndexSearcher( IndexIdentifier identifier )
{
try
{
IndexReference searcher = indexSearchers.get( identifier );
if ( searcher == null )
{
IndexWriter writer = newIndexWriter( identifier );
IndexReader reader = DirectoryReader.open( writer );
IndexSearcher indexSearcher = newIndexSearcher( identifier, reader );
searcher = new IndexReference( identifier, indexSearcher, writer );
indexSearchers.put( identifier, searcher );
}
else
{
synchronized ( searcher )
{
searcher = refreshSearcherIfNeeded( searcher );
}
}
searcher.incRef();
return searcher;
}
catch ( IOException e )
{
throw new RuntimeException( e );
}
}
private IndexSearcher newIndexSearcher( IndexIdentifier identifier, IndexReader reader )
{
IndexSearcher searcher = new IndexSearcher( reader );
IndexType type = getType( identifier, false );
if ( type.getSimilarity() != null )
{
searcher.setSimilarity( type.getSimilarity() );
}
return searcher;
}
private IndexReference refreshSearcherIfNeeded( IndexReference searcher )
{
if ( searcher.checkAndClearStale() )
{
searcher = refreshSearcher( searcher );
if ( searcher != null )
{
indexSearchers.put( searcher.getIdentifier(), searcher );
}
}
return searcher;
}
void invalidateIndexSearcher( IndexIdentifier identifier )
{
IndexReference searcher = indexSearchers.get( identifier );
if ( searcher != null )
{
searcher.setStale();
}
}
void deleteIndex( IndexIdentifier identifier, boolean recovery )
{
closeIndex( identifier );
deleteFileOrDirectory( getFileDirectory( baseStorePath, identifier ) );
boolean removeFromIndexStore =
!recovery || (indexStore.has( identifier.entityType.entityClass(), identifier.indexName ));
if ( removeFromIndexStore )
{
indexStore.remove( identifier.entityType.entityClass(), identifier.indexName );
}
typeCache.invalidate( identifier );
}
private static void deleteFileOrDirectory( File file )
{
if ( file.exists() )
{
if ( file.isDirectory() )
{
for ( File child : file.listFiles() )
{
deleteFileOrDirectory( child );
}
}
file.delete();
}
}
private/*synchronized elsewhere*/IndexWriter newIndexWriter( IndexIdentifier identifier )
{
assertNotClosed();
try
{
Directory dir = filesystemFacade.getDirectory( baseStorePath, identifier ); //getDirectory(
// baseStorePath, identifier );
directoryExists( dir );
IndexType type = getType( identifier, false );
IndexWriterConfig writerConfig = new IndexWriterConfig( type.analyzer );
writerConfig.setIndexDeletionPolicy( new MultipleBackupDeletionPolicy() );
Similarity similarity = type.getSimilarity();
if ( similarity != null )
{
writerConfig.setSimilarity( similarity );
}
IndexWriter indexWriter = new IndexWriter( dir, writerConfig );
// TODO We should tamper with this value and see how it affects the
// general performance. Lucene docs says rather <10 for mixed
// reads/writes
// writer.setMergeFactor( 8 );
return indexWriter;
}
catch ( IOException e )
{
throw new RuntimeException( e );
}
}
private boolean directoryExists( Directory dir )
{
try
{
String[] files = dir.listAll();
return files != null && files.length > 0;
}
catch ( IOException e )
{
return false;
}
}
static Document findDocument( IndexType type, IndexSearcher searcher, long entityId )
{
try
{
TopDocs docs = searcher.search( type.idTermQuery( entityId ), 1 );
if ( docs.scoreDocs.length > 0 )
{
return searcher.doc( docs.scoreDocs[0].doc );
}
return null;
}
catch ( IOException e )
{
throw new RuntimeException( e );
}
}
static boolean documentIsEmpty( Document document )
{
List fields = document.getFields();
for ( IndexableField field : fields )
{
if ( !(LuceneLegacyIndex.KEY_DOC_ID.equals( field.name() ) || LuceneLegacyIndex.KEY_END_NODE_ID.equals( field.name() ) || LuceneLegacyIndex.KEY_START_NODE_ID
.equals( field.name() )) )
{
return false;
}
}
return true;
}
private synchronized void closeIndex( IndexIdentifier identifier )
{
try
{
IndexReference searcher = indexSearchers.remove( identifier );
if ( searcher != null )
{
searcher.dispose( true );
}
}
catch ( IOException e )
{
throw new RuntimeException( "Unable to close lucene writer " + identifier, e );
}
}
public ResourceIterator listStoreFiles( boolean includeLogicalLogs ) throws IOException
{ // Never include logical logs since they are of little importance
final Collection files = new ArrayList<>();
final Collection> snapshots = new ArrayList<>();
makeSureAllIndexesAreInstantiated();
for ( IndexReference writer : getAllIndexes() )
{
SnapshotDeletionPolicy deletionPolicy = (SnapshotDeletionPolicy) writer.getWriter().getConfig()
.getIndexDeletionPolicy();
File indexDirectory = getFileDirectory( baseStorePath, writer.getIdentifier() );
IndexCommit commit;
try
{
// Throws IllegalStateException if no commits yet
commit = deletionPolicy.snapshot();
}
catch ( IllegalStateException e )
{
/*
* This is insane but happens if we try to snapshot an existing index
* that has no commits. This is a bad API design - it should return null
* or something. This is not exceptional.
*
* For the time being we just do a commit and try again.
*/
writer.getWriter().commit();
commit = deletionPolicy.snapshot();
}
for ( String fileName : commit.getFileNames() )
{
files.add( new File( indexDirectory, fileName ) );
}
snapshots.add( Pair.of( deletionPolicy, commit ) );
}
return new PrefetchingResourceIterator()
{
private final Iterator filesIterator = files.iterator();
@Override
protected File fetchNextOrNull()
{
return filesIterator.hasNext() ? filesIterator.next() : null;
}
@Override
public void close()
{
for ( Pair policyAndCommit : snapshots )
{
try
{
policyAndCommit.first().release( policyAndCommit.other() );
}
catch ( IOException e )
{
// TODO What to do?
e.printStackTrace();
}
}
}
};
}
public ResourceIterator listStoreFiles() throws IOException
{
return listStoreFiles( false );
}
private void makeSureAllIndexesAreInstantiated()
{
for ( String name : indexStore.getNames( Node.class ) )
{
Map config = indexStore.get( Node.class, name );
if ( config.get( IndexManager.PROVIDER ).equals( LuceneIndexImplementation.SERVICE_NAME ) )
{
IndexIdentifier identifier = new IndexIdentifier( IndexEntityType.Node, name );
getIndexSearcher( identifier ).close();
}
}
for ( String name : indexStore.getNames( Relationship.class ) )
{
Map config = indexStore.get( Relationship.class, name );
if ( config.get( IndexManager.PROVIDER ).equals( LuceneIndexImplementation.SERVICE_NAME ) )
{
IndexIdentifier identifier = new IndexIdentifier( IndexEntityType.Relationship, name );
getIndexSearcher( identifier ).close();
}
}
}
private enum LuceneFilesystemFacade
{
FS
{
@Override
Directory getDirectory( File baseStorePath, IndexIdentifier identifier ) throws IOException
{
return FSDirectory.open( getFileDirectory( baseStorePath, identifier ).toPath() );
}
@Override
void cleanWriteLocks( File dir )
{
if ( !dir.isDirectory() )
{
return;
}
for ( File file : dir.listFiles() )
{
if ( file.isDirectory() )
{
cleanWriteLocks( file );
}
else if ( file.getName().equals( "write.lock" ) )
{
boolean success = file.delete();
assert success;
}
}
}
@Override
File ensureDirectoryExists( FileSystemAbstraction fileSystem, File dir )
{
if ( !dir.exists() && !dir.mkdirs() )
{
String message = String.format( "Unable to create directory path[%s] for Neo4j store" + ".",
dir.getAbsolutePath() );
throw new RuntimeException( message );
}
return dir;
}
},
MEMORY
{
@Override
Directory getDirectory( File baseStorePath, IndexIdentifier identifier )
{
return new RAMDirectory();
}
@Override
void cleanWriteLocks( File path )
{
}
@Override
File ensureDirectoryExists( FileSystemAbstraction fileSystem, File path )
{
try
{
fileSystem.mkdirs( path );
}
catch ( IOException e )
{
throw new RuntimeException( e );
}
return path;
}
};
abstract Directory getDirectory( File baseStorePath, IndexIdentifier identifier ) throws IOException;
abstract File ensureDirectoryExists( FileSystemAbstraction fileSystem, File path );
abstract void cleanWriteLocks( File path );
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy