All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.metaeffekt.artifact.resolver.generic.index.lucene.SimpleLuceneIndex Maven / Gradle / Ivy

The newest version!
package org.metaeffekt.artifact.resolver.generic.index.lucene;

import lombok.NonNull;
import lombok.extern.slf4j.Slf4j;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.*;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;

import java.io.File;
import java.io.IOException;
import java.util.Collection;
import java.util.List;
import java.util.Map;

// FIXME: split in read and write index
// FIXME: may be prone to chnanges; prefer dedicated index implementation to control index change.

/**
 * A simple index made with lucene.
 * 
* Persists to the given path. Note that this means you manually need to delete or clear the index to fully rebuild it. *
* Due to its abstract interface, it may be useful for a variety of scenarios, akin to an "external memory hashmap". */ @Slf4j public class SimpleLuceneIndex extends AbstractLuceneIndex { private static final String DEDUPLICATE = "DEDUPLICATE"; private static final int DEFAULT_ANALYZER_MAX_WORD_LENGTH = 1024; private static final int WRITER_MAX_RAM_BUFFER_SIZE_MB = 128; private final IndexWriter writer; public SimpleLuceneIndex(@NonNull File indexDir, @NonNull Analyzer analyzer) throws IOException { super(indexDir, analyzer); final IndexWriterConfig conf = new IndexWriterConfig(analyzer); conf.setCommitOnClose(true); conf.setRAMBufferSizeMB(WRITER_MAX_RAM_BUFFER_SIZE_MB); conf.setUseCompoundFile(false); this.writer = new IndexWriter(getDirectory(), conf); } public SimpleLuceneIndex(@NonNull File indexDir) throws IOException { this(indexDir, new WhitespaceAnalyzer(DEFAULT_ANALYZER_MAX_WORD_LENGTH)); } public void addEntry(@NonNull Map> keyValueMap) throws IOException { final Document document = new Document(); if (keyValueMap.containsKey(DEDUPLICATE)) { log.error("Can't support key named [DEDUPLICATE]: key reserved by this class."); throw new IllegalArgumentException("Can't support key named [DEDUPLICATE]: key reserved by this class."); } for (Map.Entry> entry : keyValueMap.entrySet()) { for (String value : entry.getValue()) { document.add(new TextField(entry.getKey(), value, Field.Store.YES)); } } this.writer.addDocument(document); } // TODO: reevaluate the feasibility and requorement of a deduplicating interface given lucene's lack of version // interoparability: might be better to use another data store for long-term storage. // TODO: whip up interface with support for deduplication via writer.updateDocument and user-constructed "Term"s. // trying to do deduplication inside this class isn't viable; it would limit its use case as accounting for all // deduplication strategies is impossible. User-constructed and passed-in Term objects allow users to steer // dedup behaviour via construction rules for said Term objects while also keeping this class truly "Simple". // This would then allow for persistent indices that can be updated whenever. public void clear() throws IOException { this.writer.deleteAll(); this.writer.forceMergeDeletes(false); this.writer.commit(); } /** * Simple passthrough method to commit changes to storage. * @throws IOException throws on write failure */ public void commit() throws IOException { this.writer.commit(); } /** * Lookup value (being whatever substring shall be included) in key. * @param key field name to search * @param value value to search for * @param n maximum number of documents to output * @return found documents * @throws IOException on failure to open / search index */ @NonNull public List lookupContains(@NonNull String key, @NonNull String value, int n) throws IOException { final Query query = new TermQuery(new Term(key, value)); try (final IndexReader reader = DirectoryReader.open(this.writer)) { final IndexSearcher searcher = new IndexSearcher(reader); return runQuery(query, n, searcher); } } public long size() { try (final IndexReader reader = DirectoryReader.open(this.writer)) { return reader.numDocs(); } catch (IOException e) { throw new RuntimeException(e); } } /** * Closes underlying index interfaces. */ @Override public void close() throws Exception { writer.close(); super.close(); } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy