org.neo4j.index.impl.lucene.legacy.LuceneDataSource Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of neo4j-lucene-index Show documentation
Integration layer between Neo4j and Lucene, providing one possible implementation of the Index API.
There is a newer version: 5.26.1
/*
 * Copyright (c) 2002-2016 "Neo Technology,"
 * Network Engine for Objects in Lund AB [http://neotechnology.com]
 *
 * This file is part of Neo4j.
 *
 * Neo4j is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see .
 */
package org.neo4j.index.impl.lucene.legacy;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.core.KeywordAnalyzer;
import org.apache.lucene.analysis.core.LowerCaseFilter;
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexCommit;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.SnapshotDeletionPolicy;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.TopFieldCollector;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.RAMDirectory;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.concurrent.locks.ReentrantReadWriteLock;

import org.neo4j.graphdb.Node;
import org.neo4j.graphdb.Relationship;
import org.neo4j.graphdb.ResourceIterator;
import org.neo4j.graphdb.config.Setting;
import org.neo4j.graphdb.factory.GraphDatabaseSettings;
import org.neo4j.graphdb.index.IndexManager;
import org.neo4j.helpers.collection.Pair;
import org.neo4j.helpers.collection.PrefetchingResourceIterator;
import org.neo4j.io.fs.FileSystemAbstraction;
import org.neo4j.kernel.configuration.Config;
import org.neo4j.kernel.impl.factory.GraphDatabaseFacadeFactory;
import org.neo4j.kernel.impl.index.IndexConfigStore;
import org.neo4j.kernel.impl.index.IndexEntityType;
import org.neo4j.kernel.lifecycle.LifecycleAdapter;


/**
 * An DataSource optimized for the {@link LuceneIndexImplementation}.
 */
public class LuceneDataSource extends LifecycleAdapter
{
    private final File storeDir;
    private final Config config;
    private final FileSystemAbstraction fileSystemAbstraction;

    public abstract static class Configuration
    {
        public static final Setting lucene_searcher_cache_size = GraphDatabaseSettings.lucene_searcher_cache_size;
        public static final Setting ephemeral = GraphDatabaseFacadeFactory.Configuration.ephemeral;
    }

    /**
     * Default {@link Analyzer} for fulltext parsing.
     */
    public static final Analyzer LOWER_CASE_WHITESPACE_ANALYZER = new Analyzer()
    {
        @Override
        protected TokenStreamComponents createComponents( String fieldName )
        {
            Tokenizer source = new WhitespaceTokenizer();
            TokenStream filter = new LowerCaseFilter( source );
            return new TokenStreamComponents( source, filter );
        }

        @Override
        public String toString()
        {
            return "LOWER_CASE_WHITESPACE_ANALYZER";
        }
    };
    public static final Analyzer WHITESPACE_ANALYZER = new WhitespaceAnalyzer();
    public static final Analyzer KEYWORD_ANALYZER = new KeywordAnalyzer();
    private IndexClockCache indexSearchers;
    private File baseStorePath;
    private final ReentrantReadWriteLock lock = new ReentrantReadWriteLock();
    final IndexConfigStore indexStore;
    private IndexTypeCache typeCache;
    private boolean closed;
    private LuceneFilesystemFacade filesystemFacade;

    /**
     * Constructs this data source.
     */
    public LuceneDataSource( File storeDir, Config config, IndexConfigStore indexStore, FileSystemAbstraction fileSystemAbstraction )
    {
        this.storeDir = storeDir;
        this.config = config;
        this.indexStore = indexStore;
        this.typeCache = new IndexTypeCache( indexStore );
        this.fileSystemAbstraction = fileSystemAbstraction;
    }

    @Override
    public void init()
    {
        this.filesystemFacade = config.get( Configuration.ephemeral ) ? LuceneFilesystemFacade.MEMORY
                : LuceneFilesystemFacade.FS;
        indexSearchers = new IndexClockCache( config.get( Configuration.lucene_searcher_cache_size ) );
        this.baseStorePath = this.filesystemFacade.ensureDirectoryExists( fileSystemAbstraction,
                getLuceneIndexStoreDirectory( storeDir ) );
        this.filesystemFacade.cleanWriteLocks( baseStorePath );
        this.typeCache = new IndexTypeCache( indexStore );
        closed = false;
    }

    public static File getLuceneIndexStoreDirectory(File storeDir)
    {
        return new File( storeDir, "index" );
    }

    IndexType getType( IndexIdentifier identifier, boolean recovery )
    {
        return typeCache.getIndexType( identifier, recovery );
    }

    @Override
    public void shutdown() throws IOException
    {
        synchronized ( this )
        {
            if ( closed )
            {
                return;
            }
            closed = true;
            for ( IndexReference searcher : indexSearchers.values() )
            {
                searcher.dispose( true );
            }
            indexSearchers.clear();
        }
    }

    private synchronized IndexReference[] getAllIndexes()
    {
        Collection indexReferences = indexSearchers.values();
        return indexReferences.toArray( new IndexReference[indexReferences.size()] );
    }

    void force()
    {
        for ( IndexReference index : getAllIndexes() )
        {
            try
            {
                index.getWriter().commit();
            }
            catch ( IOException e )
            {
                throw new RuntimeException( "Unable to commit changes to " + index.getIdentifier(), e );
            }
        }
    }

    void getReadLock()
    {
        lock.readLock().lock();
    }

    void releaseReadLock()
    {
        lock.readLock().unlock();
    }

    void getWriteLock()
    {
        lock.writeLock().lock();
    }

    void releaseWriteLock()
    {
        lock.writeLock().unlock();
    }

    /**
     * If nothing has changed underneath (since the searcher was last created
     * or refreshed) {@code searcher} is returned. But if something has changed a
     * refreshed searcher is returned. It makes use if the
     * {@link DirectoryReader#openIfChanged(DirectoryReader, IndexWriter, boolean)} which faster than opening an index
     * from
     * scratch.
     *
     * @param searcher the {@link IndexSearcher} to refresh.
     * @return a refreshed version of the searcher or, if nothing has changed,
     *         {@code null}.
     * @throws RuntimeException if there's a problem with the index.
     */
    private IndexReference refreshSearcher( IndexReference searcher )
    {
        try
        {
            // TODO: this cast should always succeed, maybe check nonetheless?
            DirectoryReader reader = (DirectoryReader) searcher.getSearcher().getIndexReader();
            IndexWriter writer = searcher.getWriter();
            IndexReader reopened = DirectoryReader.openIfChanged( reader, writer );
            if ( reopened != null )
            {
                IndexSearcher newSearcher = newIndexSearcher( searcher.getIdentifier(), reopened );
                searcher.detachOrClose();
                return new IndexReference( searcher.getIdentifier(), newSearcher, writer );
            }
            return searcher;
        }
        catch ( IOException e )
        {
            throw new RuntimeException( e );
        }
    }

    static File getFileDirectory( File storeDir, IndexEntityType type )
    {
        File path = new File( storeDir, "lucene" );
        String extra = type.nameToLowerCase();
        return new File( path, extra );
    }

    static File getFileDirectory( File storeDir, IndexIdentifier identifier )
    {
        return new File( getFileDirectory( storeDir, identifier.entityType ), identifier.indexName );
    }

    static Directory getDirectory( File storeDir, IndexIdentifier identifier ) throws IOException
    {
        return FSDirectory.open( getFileDirectory( storeDir, identifier ).toPath() );
    }

    static TopFieldCollector scoringCollector( Sort sorting, int n ) throws IOException
    {
        return TopFieldCollector.create( sorting, n, false, true, false );
    }

    IndexReference getIndexSearcher( IndexIdentifier identifier )
    {
        assertNotClosed();
        IndexReference searcher = indexSearchers.get( identifier );
        if ( searcher == null )
        {
            return syncGetIndexSearcher( identifier );
        }
        synchronized ( searcher )
        {
            /*
             * We need to get again a reference to the searcher because it might be so that
             * it was refreshed while we waited. Once in here though no one will mess with
             * our searcher
             */
            searcher = indexSearchers.get( identifier );
            if ( searcher == null || searcher.isClosed() )
            {
                return syncGetIndexSearcher( identifier );
            }
            searcher = refreshSearcherIfNeeded( searcher );
            searcher.incRef();
            return searcher;
        }
    }

    private void assertNotClosed()
    {
        if ( closed )
        {
            throw new IllegalStateException( "Lucene index provider has been shut down" );
        }
    }

    synchronized IndexReference syncGetIndexSearcher( IndexIdentifier identifier )
    {
        try
        {
            IndexReference searcher = indexSearchers.get( identifier );
            if ( searcher == null )
            {
                IndexWriter writer = newIndexWriter( identifier );
                IndexReader reader = DirectoryReader.open( writer );
                IndexSearcher indexSearcher = newIndexSearcher( identifier, reader );
                searcher = new IndexReference( identifier, indexSearcher, writer );
                indexSearchers.put( identifier, searcher );
            }
            else
            {
                synchronized ( searcher )
                {
                    searcher = refreshSearcherIfNeeded( searcher );
                }
            }
            searcher.incRef();
            return searcher;
        }
        catch ( IOException e )
        {
            throw new RuntimeException( e );
        }
    }

    private IndexSearcher newIndexSearcher( IndexIdentifier identifier, IndexReader reader )
    {
        IndexSearcher searcher = new IndexSearcher( reader );
        IndexType type = getType( identifier, false );
        if ( type.getSimilarity() != null )
        {
            searcher.setSimilarity( type.getSimilarity() );
        }
        return searcher;
    }

    private IndexReference refreshSearcherIfNeeded( IndexReference searcher )
    {
        if ( searcher.checkAndClearStale() )
        {
            searcher = refreshSearcher( searcher );
            if ( searcher != null )
            {
                indexSearchers.put( searcher.getIdentifier(), searcher );
            }
        }
        return searcher;
    }

    void invalidateIndexSearcher( IndexIdentifier identifier )
    {
        IndexReference searcher = indexSearchers.get( identifier );
        if ( searcher != null )
        {
            searcher.setStale();
        }
    }

    void deleteIndex( IndexIdentifier identifier, boolean recovery )
    {
        closeIndex( identifier );
        deleteFileOrDirectory( getFileDirectory( baseStorePath, identifier ) );
        boolean removeFromIndexStore =
                !recovery || (indexStore.has( identifier.entityType.entityClass(), identifier.indexName ));
        if ( removeFromIndexStore )
        {
            indexStore.remove( identifier.entityType.entityClass(), identifier.indexName );
        }
        typeCache.invalidate( identifier );
    }

    private static void deleteFileOrDirectory( File file )
    {
        if ( file.exists() )
        {
            if ( file.isDirectory() )
            {
                for ( File child : file.listFiles() )
                {
                    deleteFileOrDirectory( child );
                }
            }
            file.delete();
        }
    }

    private/*synchronized elsewhere*/IndexWriter newIndexWriter( IndexIdentifier identifier )
    {
        assertNotClosed();
        try
        {
            Directory dir = filesystemFacade.getDirectory( baseStorePath, identifier ); //getDirectory(
            // baseStorePath, identifier );
            directoryExists( dir );
            IndexType type = getType( identifier, false );
            IndexWriterConfig writerConfig = new IndexWriterConfig( type.analyzer );
            writerConfig.setIndexDeletionPolicy( new MultipleBackupDeletionPolicy() );
            Similarity similarity = type.getSimilarity();
            if ( similarity != null )
            {
                writerConfig.setSimilarity( similarity );
            }
            IndexWriter indexWriter = new IndexWriter( dir, writerConfig );
            // TODO We should tamper with this value and see how it affects the
            // general performance. Lucene docs says rather <10 for mixed
            // reads/writes
            //            writer.setMergeFactor( 8 );
            return indexWriter;
        }
        catch ( IOException e )
        {
            throw new RuntimeException( e );
        }
    }

    private boolean directoryExists( Directory dir )
    {
        try
        {
            String[] files = dir.listAll();
            return files != null && files.length > 0;
        }
        catch ( IOException e )
        {
            return false;
        }
    }

    static Document findDocument( IndexType type, IndexSearcher searcher, long entityId )
    {
        try
        {
            TopDocs docs = searcher.search( type.idTermQuery( entityId ), 1 );
            if ( docs.scoreDocs.length > 0 )
            {
                return searcher.doc( docs.scoreDocs[0].doc );
            }
            return null;
        }
        catch ( IOException e )
        {
            throw new RuntimeException( e );
        }
    }

    static boolean documentIsEmpty( Document document )
    {
        List fields = document.getFields();
        for ( IndexableField field : fields )
        {
            if ( !(LuceneLegacyIndex.KEY_DOC_ID.equals( field.name() ) || LuceneLegacyIndex.KEY_END_NODE_ID.equals( field.name() ) || LuceneLegacyIndex.KEY_START_NODE_ID
                    .equals( field.name() )) )
            {
                return false;
            }
        }
        return true;
    }

    private synchronized void closeIndex( IndexIdentifier identifier )
    {
        try
        {
            IndexReference searcher = indexSearchers.remove( identifier );
            if ( searcher != null )
            {
                searcher.dispose( true );
            }
        }
        catch ( IOException e )
        {
            throw new RuntimeException( "Unable to close lucene writer " + identifier, e );
        }
    }

    public ResourceIterator listStoreFiles( boolean includeLogicalLogs ) throws IOException
    { // Never include logical logs since they are of little importance
        final Collection files = new ArrayList<>();
        final Collection> snapshots = new ArrayList<>();
        makeSureAllIndexesAreInstantiated();
        for ( IndexReference writer : getAllIndexes() )
        {
            SnapshotDeletionPolicy deletionPolicy = (SnapshotDeletionPolicy) writer.getWriter().getConfig()
                    .getIndexDeletionPolicy();
            File indexDirectory = getFileDirectory( baseStorePath, writer.getIdentifier() );
            IndexCommit commit;
            try
            {
                // Throws IllegalStateException if no commits yet
                commit = deletionPolicy.snapshot();
            }
            catch ( IllegalStateException e )
            {
                /*
                 * This is insane but happens if we try to snapshot an existing index
                 * that has no commits. This is a bad API design - it should return null
                 * or something. This is not exceptional.
                 *
                 * For the time being we just do a commit and try again.
                 */
                writer.getWriter().commit();
                commit = deletionPolicy.snapshot();
            }

            for ( String fileName : commit.getFileNames() )
            {
                files.add( new File( indexDirectory, fileName ) );
            }
            snapshots.add( Pair.of( deletionPolicy, commit ) );
        }
        return new PrefetchingResourceIterator()
        {
            private final Iterator filesIterator = files.iterator();

            @Override
            protected File fetchNextOrNull()
            {
                return filesIterator.hasNext() ? filesIterator.next() : null;
            }

            @Override
            public void close()
            {
                for ( Pair policyAndCommit : snapshots )
                {
                    try
                    {
                        policyAndCommit.first().release( policyAndCommit.other() );
                    }
                    catch ( IOException e )
                    {
                        // TODO What to do?
                        e.printStackTrace();
                    }
                }
            }
        };
    }

    public ResourceIterator listStoreFiles() throws IOException
    {
        return listStoreFiles( false );
    }

    private void makeSureAllIndexesAreInstantiated()
    {
        for ( String name : indexStore.getNames( Node.class ) )
        {
            Map config = indexStore.get( Node.class, name );
            if ( config.get( IndexManager.PROVIDER ).equals( LuceneIndexImplementation.SERVICE_NAME ) )
            {
                IndexIdentifier identifier = new IndexIdentifier( IndexEntityType.Node, name );
                getIndexSearcher( identifier ).close();
            }
        }
        for ( String name : indexStore.getNames( Relationship.class ) )
        {
            Map config = indexStore.get( Relationship.class, name );
            if ( config.get( IndexManager.PROVIDER ).equals( LuceneIndexImplementation.SERVICE_NAME ) )
            {
                IndexIdentifier identifier = new IndexIdentifier( IndexEntityType.Relationship, name );
                getIndexSearcher( identifier ).close();
            }
        }
    }

    private enum LuceneFilesystemFacade
    {
        FS
        {
            @Override
            Directory getDirectory( File baseStorePath, IndexIdentifier identifier ) throws IOException
            {
                return FSDirectory.open( getFileDirectory( baseStorePath, identifier ).toPath() );
            }

            @Override
            void cleanWriteLocks( File dir )
            {
                if ( !dir.isDirectory() )
                {
                    return;
                }
                for ( File file : dir.listFiles() )
                {
                    if ( file.isDirectory() )
                    {
                        cleanWriteLocks( file );
                    }
                    else if ( file.getName().equals( "write.lock" ) )
                    {
                        boolean success = file.delete();
                        assert success;
                    }
                }
            }

            @Override
            File ensureDirectoryExists( FileSystemAbstraction fileSystem, File dir )
            {
                if ( !dir.exists() && !dir.mkdirs() )
                {
                    String message = String.format( "Unable to create directory path[%s] for Neo4j store" + ".",
                            dir.getAbsolutePath() );
                    throw new RuntimeException( message );
                }
                return dir;
            }
        },
        MEMORY
        {
            @Override
            Directory getDirectory( File baseStorePath, IndexIdentifier identifier )
            {
                return new RAMDirectory();
            }

            @Override
            void cleanWriteLocks( File path )
            {
            }

            @Override
            File ensureDirectoryExists( FileSystemAbstraction fileSystem, File path )
            {
                try
                {
                    fileSystem.mkdirs( path );
                }
                catch ( IOException e )
                {
                    throw new RuntimeException( e );
                }
                return path;
            }
        };
        abstract Directory getDirectory( File baseStorePath, IndexIdentifier identifier ) throws IOException;

        abstract File ensureDirectoryExists( FileSystemAbstraction fileSystem, File path );

        abstract void cleanWriteLocks( File path );
    }
}