de.julielab.genemapper.resources.SpellCheckerIndexGenerator Maven / Gradle / Ivy

Go to download

Show more of this group Show more artifacts with this name
Show all versions of gene-mapper-resources Show documentation

This project assembles code and files required to build the dictionaries and indexes used by the JCoRe Gene Mapper.

The newest version!

package de.julielab.genemapper.resources;

import de.julielab.gene.candidateretrieval.SynonymIndexFieldNames;
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.search.spell.LuceneDictionary;
import org.apache.lucene.search.spell.SpellChecker;
import org.apache.lucene.store.FSDirectory;

import java.io.File;
import java.io.IOException;

public class SpellCheckerIndexGenerator {

	public static void main(String[] args) throws Exception {
		
		if (args.length != 2) {
			System.err.println(
					"Usage: SpellCheckerIndexGenerator  ");
			System.exit(1);
		}
		
		String resPath = args[0];
		File resDir = new File(resPath);
		if (!resDir.isDirectory()) {
			System.err.println("Could not find resources directory");
			System.exit(1);
		}
		if (!resPath.endsWith(File.separator)) {
			resPath = resPath + File.separator;
		}

		String indexPath = args[1];
		if (!indexPath.endsWith("/")) {
			indexPath = indexPath + "/";
		}
		
		File geneIndexDir = new File(indexPath + "geneSynonymIndex");
		File proteinIndexDir = new File(indexPath + "proteinSynonymIndex");
		File geneSpellingIndexDir = new File(indexPath + "geneSpellingIndex");
		File proteinSpellingIndexDir = new File(indexPath + "proteinSpellingIndex");
		
		System.out.println("Writing gene spelling index to " + geneSpellingIndexDir.getAbsolutePath());
		createSpellingIndex(geneIndexDir, geneSpellingIndexDir);
//		System.out.println("Writing protein spelling index to " + proteinSpellingIndexDir.getAbsolutePath());
//		createSpellingIndex(proteinIndexDir, proteinSpellingIndexDir);
		System.out.println("Done.");
	}

	public static void createSpellingIndex(File mentionIndexDir, File spellingIndexDir) throws IOException {
		IndexReader reader = DirectoryReader.open(FSDirectory.open(mentionIndexDir.toPath()));
		LuceneDictionary dictionary = new LuceneDictionary(reader,
				SynonymIndexFieldNames.LOOKUP_SYN_FIELD);
		WhitespaceAnalyzer wsAnalyzer = new WhitespaceAnalyzer();
		IndexWriterConfig iwc = new IndexWriterConfig(wsAnalyzer);
		iwc.setOpenMode(OpenMode.CREATE);
		try (SpellChecker sc = new SpellChecker(FSDirectory.open(spellingIndexDir.toPath()))) {
			sc.indexDictionary(dictionary, iwc, true);
		}
	}

}