All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.pageseeder.flint.lucene.LuceneIndexIO Maven / Gradle / Ivy

The newest version!
/*
 * Copyright 2015 Allette Systems (Australia)
 * http://www.allette.com.au
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.pageseeder.flint.lucene;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.*;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.SearcherFactory;
import org.apache.lucene.search.SearcherManager;
import org.apache.lucene.store.AlreadyClosedException;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.LockObtainFailedException;
import org.pageseeder.flint.IndexException;
import org.pageseeder.flint.IndexIO;
import org.pageseeder.flint.IndexOpenException;
import org.pageseeder.flint.OpenIndexManager;
import org.pageseeder.flint.content.DeleteRule;
import org.pageseeder.flint.indexing.FlintDocument;
import org.pageseeder.flint.indexing.IndexJob;
import org.pageseeder.flint.indexing.IndexListener;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.File;
import java.io.IOException;
import java.nio.file.AccessDeniedException;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.atomic.AtomicLong;

/**
 * Provides a set of utility methods to deal with IO operations on an Index.
 *
 * 

This class is useful to centralise all operations on an index because it will * create one writer and share it with other classes if needed. * *

This is a lower level API. * * @author Jean-Baptiste Reure * @author Christophe Lauret * * @version 27 February 2013 */ public final class LuceneIndexIO implements IndexIO { /** * private logger */ private final static Logger LOGGER = LoggerFactory.getLogger(LuceneIndexIO.class); public final static String LAST_COMMIT_DATE = "lastCommitDate"; /** * Describes the state of an index. */ private enum State { /** The index is in a clean state, ready to use. */ CLEAN, /** The index needs to be opened again. */ DIRTY, /** The index is closing. */ CLOSING, /** The index is closed. */ CLOSED } /** * State of this index. */ private volatile LuceneIndexIO.State state = State.CLEAN; /** * The last time this reader was used */ private final AtomicLong lastTimeUsed = new AtomicLong(0); /** * The underlying index writer used by Flint for this index (there should only be one). */ private IndexWriter _writer; /** * The underlying index writer used by Flint for this index (there should only be one). */ private ReaderManager _reader; /** * The index directory */ private final Directory _directory; /** * The analyzer used for the writer */ private final Analyzer _analyzer; /** * A search manager using this writer. */ private SearcherManager _searcher; private Integer writing = 0; private Integer committing = 0; private final Object lock = new Object(); // simple searcherfactory for now private final static SearcherFactory FACTORY = new SearcherFactory(); /** * Sole constructor. * * @param dir The index's directory * @param analyzer The analyzer * * @throws IndexException if opening the index failed */ public LuceneIndexIO(Directory dir, Analyzer analyzer) throws IndexException { this._analyzer = analyzer; this._directory = dir; open(); // get last commit data as last time used try { List commits = DirectoryReader.listCommits(dir); if (!commits.isEmpty()) { String lastCommitDate = commits.get(commits.size()-1).getUserData().get(LuceneIndexIO.LAST_COMMIT_DATE); if (lastCommitDate != null) { this.lastTimeUsed.set(Long.parseLong(lastCommitDate)); } } } catch (IOException ex) { LOGGER.error("Failed to load last index commit date", ex); } } public long getLastTimeUsed() { return this.lastTimeUsed.get(); } /** * @return true if closed. */ public boolean isClosed() { return isState(State.CLOSED); } /** * Closes the writer on this index. * * @throws IndexException Wrapping an {@link CorruptIndexException} or an {@link IOException}. */ public synchronized void stop() throws IndexException { if (this._writer == null || isClosed() || isState(State.CLOSING)) return; LOGGER.debug("Stopping IO"); // try to commit if needed maybeCommit(); if (this._writer == null || isClosed() || isState(State.CLOSING)) return; startClosing(); try { try { this._writer.close(); } catch (AlreadyClosedException ex) { // all good then! } try { this._searcher.close(); } catch (AlreadyClosedException ex) { // all good then! } try { this._reader.close(); } catch (AlreadyClosedException ex) { // all good then! } state(State.CLOSED); OpenIndexManager.remove(this); } catch (final CorruptIndexException ex) { throw new IndexException("Failed to close Index because it is corrupted", ex); } catch (final IOException ex) { throw new IndexException("Failed to close Index because of an I/O error", ex); } } /** * Commit any changes if the state of the index requires it. */ public synchronized void maybeRefresh() { if (this._writer == null || !this._writer.isOpen() || !isState(State.DIRTY)) return; try { LOGGER.debug("Reopen reader and searcher"); this._reader.maybeRefresh(); this._searcher.maybeRefresh(); state(State.CLEAN); } catch (AlreadyClosedException ex) { // must be closing, ignore then } catch (Exception ex) { LOGGER.error("Failed to reopen Index Searcher because of an I/O error", ex); } } /** * Commit any changes if the state of the index requires it. */ public synchronized void maybeCommit() { if (this._writer == null|| isState(State.CLOSING) || isClosed() || this.committing > 0 || (!this._writer.hasDeletions() && !this._writer.hasUncommittedChanges() && !this._writer.hasPendingMerges())) return; // force refresh state(State.DIRTY); maybeRefresh(); // closed? if (this._writer == null || !this._writer.isOpen()|| this.committing > 0 || isState(State.CLOSING) || isClosed()) return; startCommitting(); try { LOGGER.debug("Committing index changes"); long now = System.currentTimeMillis(); Map commitUserData = new HashMap<>(); commitUserData.put(LAST_COMMIT_DATE, String.valueOf(now)); this._writer.setLiveCommitData(commitUserData.entrySet()); this._writer.commit(); this.lastTimeUsed.set(now); } catch (final CorruptIndexException ex) { LOGGER.error("Failed to commit Index because it is corrupted", ex); } catch (final IOException ex) { LOGGER.error("Failed to commit Index because of an I/O error", ex); } finally { endCommitting(); } } /** * Clears the index as soon as possible (asynchronously). * * @return true if the indexed could be scheduled for clearing; * false otherwise. * @throws IndexException should any error be thrown by Lucene. */ public synchronized boolean clearIndex() throws IndexException { if (this._writer == null|| isState(State.CLOSING)) return false; try { if (isClosed()) open(); startWriting(); this._writer.deleteAll(); this.lastTimeUsed.set(System.currentTimeMillis()); state(State.DIRTY); } catch (Exception ex) { // try to delete all files then if possible if (this._directory != null) try { for (String n : this._directory.listAll()) { try { this._directory.deleteFile(n); } catch (AccessDeniedException ex2) { // file must be used by lucene, try other files } } } catch (IOException ex2) { throw new IndexException("Failed to clear Index", ex); } } finally { this.endWriting(); } return true; } /** * Delete the documents defined in the delete rule as soon as possible * (asynchronously). * * @param rule the rule to identify the items to delete * @return true if the item could be scheduled for deletion; * false * @throws IndexException should any error be thrown by Lucene. */ public synchronized boolean deleteDocuments(DeleteRule rule) throws IndexException { if (this._writer == null|| isState(State.CLOSING)) return false; if (!(rule instanceof LuceneDeleteRule)) return false; LuceneDeleteRule drule = (LuceneDeleteRule) rule; try { if (isClosed()) open(); startWriting(); if (drule.useTerm()) { this._writer.deleteDocuments(drule.toTerm()); } else { this._writer.deleteDocuments(drule.toQuery()); } this.lastTimeUsed.set(System.currentTimeMillis()); state(State.DIRTY); } catch (IOException ex) { throw new IndexException("Failed to clear Index", ex); } finally { endWriting(); } return true; } /** * Update the documents defined in the delete rule as soon as possible * (asynchronously). * *

* It is not possible to update an item in Lucene, instead it is first deleted * then inserted again. * * @param rule the rule to identify the items to delete before update. * @param documents the list of documents to replace with. * @return true if the item could be scheduled for update; * false * @throws IndexException should any error be thrown by Lucene */ public synchronized boolean updateDocuments(DeleteRule rule, List documents, IndexListener listener, IndexJob job) throws IndexException { if (this._writer == null || isState(State.CLOSING)) return false; LuceneDeleteRule drule; if (rule == null) drule = null; else { if (!(rule instanceof LuceneDeleteRule)) return false; drule = (LuceneDeleteRule) rule; } try { if (isClosed()) open(); startWriting(); FlintDocumentConverter converter = new FlintDocumentConverter(); List docs = converter.convert(documents); if (converter.hasWarnings()) { for (String fieldname : converter.fieldsWithWarnings()) { listener.warn(job, "Warning for field '"+fieldname+"': "+converter.getWarning(fieldname)); } } // delete? if (rule != null) { if (drule.useTerm()) { // use update this._writer.updateDocuments(drule.toTerm(), docs); } else { // delete then add this._writer.deleteDocuments(drule.toQuery()); this._writer.addDocuments(docs); } } else { // add this._writer.addDocuments(docs); } this.lastTimeUsed.set(System.currentTimeMillis()); state(State.DIRTY); } catch (final IOException e) { throw new IndexException("Failed to update document in Index because of an I/O error", e); } finally { endWriting(); } return true; } /** * Updates documents' DocValues fields to the given values. * Each field update is applied to the set of documents that are associated with the Term to the same value. * All updates are atomically applied and flushed together. * * @param term the term defining the document(s) to update * @param newFields the new fields * * @return true if the update was done successfully * * @throws IndexException if updating the doc values failed */ public synchronized boolean updateDocValues(Term term, Field... newFields) throws IndexException { // check state if (this._writer == null || isState(State.CLOSING)) return false; try { if (isClosed()) open(); startWriting(); this._writer.updateDocValues(term, newFields); // set state this.lastTimeUsed.set(System.currentTimeMillis()); state(State.DIRTY); } catch (IOException ex) { throw new IndexException("Failed to update docvalues in Index because of an I/O error", ex); } finally { endWriting(); } return true; } public synchronized IndexSearcher bookSearcher() { while (this.isState(State.CLOSING)) { try { Thread.sleep(100); } catch (InterruptedException ex) { LOGGER.error("Interrupted while waiting for closing to finish", ex); Thread.currentThread().interrupt(); } } try { if (isClosed()) open(); return this._searcher.acquire(); } catch (IndexException | IOException ex) { LOGGER.error("Failed to book searcher", ex); return null; } } public void releaseSearcher(IndexSearcher searcher) { if (isClosed()|| isState(State.CLOSING)) return; try { this._searcher.release(searcher); } catch (IOException ex) { LOGGER.error("Failed to release searcher", ex); } } public synchronized IndexReader bookReader() { while (this.isState(State.CLOSING)) { try { Thread.sleep(100); } catch (InterruptedException ex) { LOGGER.error("Interrupted while waiting for closing to finish", ex); Thread.currentThread().interrupt(); } } try { if (isClosed()) open(); return this._reader.acquire(); } catch (IndexException | IOException ex) { LOGGER.error("Failed to book reader", ex); return null; } } public void releaseReader(IndexReader reader) { if (isClosed() || isState(State.CLOSING)) return; if (!(reader instanceof DirectoryReader)) throw new IllegalArgumentException("Reader must be a DirectoryReader"); try { this._reader.release((DirectoryReader) reader); } catch (IOException ex) { LOGGER.error("Failed to release reader", ex); } } // private helpers // ---------------------------------------------------------------------------------------------- private void state(State s) { synchronized (this.lock) { this.state = s; } } private boolean isState(State s) { synchronized (this.lock) { return this.state == s; } } private void startClosing() { state(State.CLOSING); while (this.writing > 0 || this.committing > 0) { try { Thread.sleep(100); } catch (InterruptedException ex) { LOGGER.error("Interrupted while waiting for writing to finish", ex); Thread.currentThread().interrupt(); } } } private void startCommitting() { while (this.writing > 0) { try { Thread.sleep(100); } catch (InterruptedException ex) { LOGGER.error("Interrupted while waiting for writing to finish", ex); Thread.currentThread().interrupt(); } } synchronized(this.lock) { this.committing++; } } private void endCommitting() { synchronized(this.lock) { this.committing--; } } private void startWriting() { while (this.committing > 0) { try { Thread.sleep(100); } catch (InterruptedException ex) { LOGGER.error("Interrupted while waiting for commit to finish", ex); Thread.currentThread().interrupt(); } } synchronized(this.lock) { this.writing++; } } private void endWriting() { synchronized(this.lock) { this.writing--; } } private void open() throws IndexException { open(true); } private void open(boolean firsttime) throws IndexException { try { // create it? boolean createIt = !DirectoryReader.indexExists(this._directory); // read only? boolean readonly = isReadOnly(this._directory); if (readonly) { this._writer = null; this._reader = new ReaderManager(this._directory); this._searcher = new SearcherManager(this._directory, FACTORY); } else { // create writer IndexWriterConfig config = new IndexWriterConfig(this._analyzer); ConcurrentMergeScheduler merger = new ConcurrentMergeScheduler(); // merger.setMaxMergesAndThreads(maxMergeCount, maxThreadCount); config.setMergeScheduler(merger); if (createIt) config.setOpenMode(OpenMode.CREATE); this._writer = new IndexWriter(this._directory, config); if (createIt) this._writer.commit(); boolean applyAllDeletes = true; boolean writeAllDeletes = false; // create searcher this._searcher = new SearcherManager(this._writer, applyAllDeletes, writeAllDeletes, FACTORY); // create reader this._reader = new ReaderManager(this._writer, applyAllDeletes, writeAllDeletes); } // add it to list of opened indexes OpenIndexManager.add(this); // set state to clean state(State.CLEAN); } catch (IndexFormatTooOldException ex) { if (firsttime) { // try to delete all files and retry try { for (String n : this._directory.listAll()) { this._directory.deleteFile(n); } } catch (IOException ex2) { throw new IndexException("Failed to delete index files from old index", ex); } // retry open(false); } else { throw new IndexOpenException("Failed to create index: format is too old!", ex); } } catch (LockObtainFailedException ex) { throw new IndexOpenException("Failed to create index: there's already a writer on this index", ex); } catch (IOException ex) { throw new IndexException("Failed to create writer on index", ex); } } // static helpers // ---------------------------------------------------------------------------------------------- /** * Generate the appropriate IndexIO implementation to use based on the underlying {@link Directory} * used. * *

This method will try to return a read/write IndexIO instance if possible; * otherwise it will return a read only instance. * * @param directory The index location. * * @return The most appropriate IndexIO implementation to use. */ private static boolean isReadOnly(Directory directory) { // not using file system? not read only if (!(directory instanceof FSDirectory)) return false; // Detect if we can write on the files. try { File f = ((FSDirectory) directory).getDirectory().toFile(); // ensure all files can write. if (!f.canWrite()) return true; File[] files = f.listFiles(); if (files == null) return true; for (File tf : files) { if (!tf.canWrite()) return true; } } catch (Exception ex) { // any error means readonly LOGGER.error("Index is readonly", ex); return true; } // not read only then return false; } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy