All Downloads are FREE. Search and download functionalities are using the official Maven repository.

prerna.wikidata.WikiLogicalNameExtractor Maven / Gradle / Ivy

The newest version!
package prerna.wikidata;

import java.util.List;
import java.util.Vector;
import java.util.concurrent.Callable;
import java.util.concurrent.CompletionService;
import java.util.concurrent.ExecutorCompletionService;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;

import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.wikidata.wdtk.wikibaseapi.WbSearchEntitiesResult;
import org.wikidata.wdtk.wikibaseapi.WikibaseDataFetcher;

import prerna.util.Utility;

public class WikiLogicalNameExtractor {

	public static final Logger LOGGER = LogManager.getLogger(WikiLogicalNameExtractor.class);
	private Logger logger;

	public WikiLogicalNameExtractor() {
		
	}
	
	public List getLogicalNames(String searchTerm) throws Exception {
		Logger logger = getLogger();
		List logicalNames = new Vector<>();

		WikibaseDataFetcher wbdf = WikibaseDataFetcher.getWikidataDataFetcher();
		searchTerm = splitCamelCase(searchTerm.trim().replace("_", " ")).replaceAll("\\s+", " ");
		List searchResults = wbdf.searchEntities(searchTerm, new Long(10));
		int numReturns = searchResults.size();
		if(numReturns == 0) {
			logger.info("Found no results searching for " + Utility.cleanLogString(searchTerm));
			return logicalNames;
		}
		logger.info("Querying wikidata returned " + numReturns + " results for " + Utility.cleanLogString(searchTerm));
		List>> logicalNamesExtractors = new Vector<>();
		for(int i = 0; i < searchResults.size(); i++) {
			WbSearchEntitiesResult res = searchResults.get(i);
			WikiLogicalNameCallable callable = new WikiLogicalNameCallable(wbdf, res);
			callable.setLogger(this.logger);
			logicalNamesExtractors.add(callable);
		}
		
		ExecutorService executorService = Executors.newFixedThreadPool(searchResults.size());
		CompletionService> completionService = new ExecutorCompletionService<>(executorService);
		for (Callable> logicalNameExtractor : logicalNamesExtractors) {
			completionService.submit(logicalNameExtractor);
		}
		
		// Continue until all have completed
		while (numReturns > 0) {
			try {
				List foundLogicalNames = completionService.take().get();
				logicalNames.addAll(foundLogicalNames);
				numReturns--;
			} catch (Exception e) {
				logger.error("StackTrace: ", e);
				throw e;
			} finally {
				executorService.shutdownNow();
			}
		}
		
		return logicalNames;
	}
	
	public void setLogger(Logger logger) {
		this.logger = logger;
	}
	
	/**
	 * Get the correct logger
	 * @return
	 */
	private Logger getLogger() {
		if(this.logger == null) {
			return LOGGER;
		}
		return this.logger;
	}
	
	static String splitCamelCase(String s) {
		   return s.replaceAll(
		      String.format("%s|%s|%s",
		         "(?<=[A-Z])(?=[A-Z][a-z])",
		         "(?<=[^A-Z])(?=[A-Z])",
		         "(?<=[A-Za-z])(?=[^A-Za-z])"
		      ),
		      " "
		   );
		}
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy