org.metaeffekt.artifact.resolver.generic.index.lucene.SimpleLuceneIndex Maven / Gradle / Ivy
The newest version!
package org.metaeffekt.artifact.resolver.generic.index.lucene;
import lombok.NonNull;
import lombok.extern.slf4j.Slf4j;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.*;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import java.io.File;
import java.io.IOException;
import java.util.Collection;
import java.util.List;
import java.util.Map;
// FIXME: split in read and write index
// FIXME: may be prone to chnanges; prefer dedicated index implementation to control index change.
/**
* A simple index made with lucene.
*
* Persists to the given path. Note that this means you manually need to delete or clear the index to fully rebuild it.
*
* Due to its abstract interface, it may be useful for a variety of scenarios, akin to an "external memory hashmap".
*/
@Slf4j
public class SimpleLuceneIndex extends AbstractLuceneIndex {
private static final String DEDUPLICATE = "DEDUPLICATE";
private static final int DEFAULT_ANALYZER_MAX_WORD_LENGTH = 1024;
private static final int WRITER_MAX_RAM_BUFFER_SIZE_MB = 128;
private final IndexWriter writer;
public SimpleLuceneIndex(@NonNull File indexDir, @NonNull Analyzer analyzer) throws IOException {
super(indexDir, analyzer);
final IndexWriterConfig conf = new IndexWriterConfig(analyzer);
conf.setCommitOnClose(true);
conf.setRAMBufferSizeMB(WRITER_MAX_RAM_BUFFER_SIZE_MB);
conf.setUseCompoundFile(false);
this.writer = new IndexWriter(getDirectory(), conf);
}
public SimpleLuceneIndex(@NonNull File indexDir) throws IOException {
this(indexDir, new WhitespaceAnalyzer(DEFAULT_ANALYZER_MAX_WORD_LENGTH));
}
public void addEntry(@NonNull Map> keyValueMap) throws IOException {
final Document document = new Document();
if (keyValueMap.containsKey(DEDUPLICATE)) {
log.error("Can't support key named [DEDUPLICATE]: key reserved by this class.");
throw new IllegalArgumentException("Can't support key named [DEDUPLICATE]: key reserved by this class.");
}
for (Map.Entry> entry : keyValueMap.entrySet()) {
for (String value : entry.getValue()) {
document.add(new TextField(entry.getKey(), value, Field.Store.YES));
}
}
this.writer.addDocument(document);
}
// TODO: reevaluate the feasibility and requorement of a deduplicating interface given lucene's lack of version
// interoparability: might be better to use another data store for long-term storage.
// TODO: whip up interface with support for deduplication via writer.updateDocument and user-constructed "Term"s.
// trying to do deduplication inside this class isn't viable; it would limit its use case as accounting for all
// deduplication strategies is impossible. User-constructed and passed-in Term objects allow users to steer
// dedup behaviour via construction rules for said Term objects while also keeping this class truly "Simple".
// This would then allow for persistent indices that can be updated whenever.
public void clear() throws IOException {
this.writer.deleteAll();
this.writer.forceMergeDeletes(false);
this.writer.commit();
}
/**
* Simple passthrough method to commit changes to storage.
* @throws IOException throws on write failure
*/
public void commit() throws IOException {
this.writer.commit();
}
/**
* Lookup value (being whatever substring shall be included) in key.
* @param key field name to search
* @param value value to search for
* @param n maximum number of documents to output
* @return found documents
* @throws IOException on failure to open / search index
*/
@NonNull
public List lookupContains(@NonNull String key, @NonNull String value, int n) throws IOException {
final Query query = new TermQuery(new Term(key, value));
try (final IndexReader reader = DirectoryReader.open(this.writer)) {
final IndexSearcher searcher = new IndexSearcher(reader);
return runQuery(query, n, searcher);
}
}
public long size() {
try (final IndexReader reader = DirectoryReader.open(this.writer)) {
return reader.numDocs();
} catch (IOException e) {
throw new RuntimeException(e);
}
}
/**
* Closes underlying index interfaces.
*/
@Override
public void close() throws Exception {
writer.close();
super.close();
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy