All Downloads are FREE. Search and download functionalities are using the official Maven repository.

prerna.reactor.frame.gaas.qa.CreateFAISSIndexReactor Maven / Gradle / Ivy

The newest version!
//package prerna.reactor.frame.gaas.qa;
//
//import java.io.File;
//import java.io.IOException;
//import java.net.URLConnection;
//import java.nio.file.Files;
//import java.nio.file.Path;
//import java.nio.file.Paths;
//import java.nio.file.attribute.BasicFileAttributes;
//import java.util.ArrayList;
//import java.util.HashMap;
//import java.util.List;
//import java.util.Map;
//
//import org.apache.commons.lang.SystemUtils;
//import org.apache.logging.log4j.Logger;
//
//import com.google.gson.Gson;
//
//import net.snowflake.client.jdbc.internal.apache.commons.io.FileUtils;
//import prerna.cluster.util.ClusterUtil;
//import prerna.ds.py.PyTranslator;
//import prerna.project.api.IProject;
//import prerna.reactor.frame.gaas.AbstractGaasBaseReactor;
//import prerna.reactor.frame.gaas.processors.CSVWriter;
//import prerna.reactor.frame.gaas.processors.DocProcessor;
//import prerna.reactor.frame.gaas.processors.PDFProcessor;
//import prerna.reactor.frame.gaas.processors.PPTProcessor;
//import prerna.sablecc2.om.PixelDataType;
//import prerna.sablecc2.om.ReactorKeysEnum;
//import prerna.sablecc2.om.nounmeta.NounMetadata;
//import prerna.util.AssetUtility;
//import prerna.util.Utility;
//
//import prerna.util.Constants;
//
//import org.apache.logging.log4j.LogManager;
//import org.apache.logging.log4j.Logger;
//
//public class CreateFAISSIndexReactor extends AbstractGaasBaseReactor {
//
//	private static final Logger classLogger = LogManager.getLogger(CreateFAISSIndexReactor.class);
//
//	public CreateFAISSIndexReactor() {
//		// fileName - List of files
//		// Name - the name for this config.json
//		// baseFolder relative to the project
//		this.keysToGet = new String [] {ReactorKeysEnum.FILE_NAME.getKey(), 
//										ReactorKeysEnum.NAME.getKey(),
//										ReactorKeysEnum.PROJECT.getKey(),
//										ReactorKeysEnum.BASE_URL.getKey(),
//										ReactorKeysEnum.DESCRIPTION.getKey(),
//										ReactorKeysEnum.CONTENT_LENGTH.getKey(),
//										ReactorKeysEnum.CONTENT_OVERLAP.getKey()
//										};
//		this.keyRequired = new int[] {1,1,1,1, 0};
//	}
//	
//	@Override
//	public NounMetadata execute() {
//		Logger logger = getLogger(this.getClass().getName());
//		String projectId = getProjectId();
//		
//		Map configMap = new HashMap();
//		
//		List  processedList = new ArrayList();
//		List  unProcessedList = new ArrayList();
//		
//		if(projectId == null)
//		{
//			return NounMetadata.getErrorNounMessage("Project is required for creating index ");
//		}
//		
//		configMap.put("Project", projectId);
//		
//		String basePath = null;
//		if(projectId.equalsIgnoreCase("temp"))
//			basePath = "c:/temp";
//		else
//			basePath = AssetUtility.getProjectAssetFolder(projectId);
//		
//		basePath = basePath.replace("\\", "/");
//
//		
//		List  fileNames = this.store.getNoun(keysToGet[0]).getAllStrValues();
//		String configName = this.store.getNoun(keysToGet[1]).get(0) +"";
//		
//		String baseURL = "";
//		if(this.store.getNoun(keysToGet[3]) != null)
//		{
//			baseURL = this.store.getNoun(keysToGet[3]).get(0) + "";
//		}
//		if(this.store.getNoun(keysToGet[4]) != null)
//		{
//			String description = this.store.getNoun(keysToGet[4]).get(0) +"";
//			configMap.put("Description", description);
//		}
//		if(baseURL == null)
//			baseURL = "";
//		
//		String indexingFolder = basePath + baseURL;
//
//		configMap.put("Name", configName);
//		String csvFileName = indexingFolder + "/" + configName + ".csv";
//		String configFile = indexingFolder + "/" + configName + ".json";
//		String faiss_index = indexingFolder + "/" + configName + "_faiss.index";
//		
//		int contentLength = 512;
//		if(this.store.getNoun(keysToGet[5]) != null)
//			contentLength = Integer.parseInt(this.store.getNoun(keysToGet[5]).get(0) + "");
//		
//		int contentOverlap = 0;
//		if(this.store.getNoun(keysToGet[6]) != null)
//			contentOverlap = Integer.parseInt(this.store.getNoun(keysToGet[6]).get(0) + "");
//		
//		CSVWriter writer = new CSVWriter(csvFileName);
//		try {
//			writer.setContentLength(contentLength);
//			writer.overlapLength(contentOverlap);
//	
//			logger.info("Starting file conversions ");
//			
//			// pick up the files and convert them to CSV
//			for(int fileIndex = 0;fileIndex < fileNames.size();fileIndex++)
//			{
//				String thisFile = fileNames.get(fileIndex);
//				logger.info("Processing file : " + thisFile);
//	
//				String fileLocation = indexingFolder + "/" + thisFile;		
//				// process this file
//				Map fileMap = addFileProperties(fileLocation);
//				if(fileMap != null)
//				{
//					String mimeType = fileMap.get("mime_type") + "";
//					if(mimeType.equalsIgnoreCase("application/vnd.openxmlformats-officedocument.wordprocessingml.document"))
//					{
//						// document
//						DocProcessor dp = new DocProcessor(fileLocation, writer);
//						dp.process();
//						configMap.put(thisFile, fileMap);
//						processedList.add(thisFile);
//					}
//					else if(mimeType.equalsIgnoreCase("application/vnd.openxmlformats-officedocument.presentationml.presentation")) {
//						// powerpoint
//						PPTProcessor pp = new PPTProcessor(fileLocation, writer);
//						pp.process();
//						configMap.put(thisFile, fileMap);
//						processedList.add(thisFile);
//					}
//					else if(mimeType.equalsIgnoreCase("application/pdf")) {
//						PDFProcessor pdf = new PDFProcessor(fileLocation, writer);
//						pdf.process();
//						configMap.put(thisFile, fileMap);
//						processedList.add(thisFile);
//					}
//					else {
//						String message = "We Currently do not support mime-type" + mimeType;
//						configMap.put(thisFile, message);
//						unProcessedList.add(thisFile);
//					}
//					logger.info("Completed Processing file : " + thisFile);
//				}
//			}
//			
//			configMap.put("Processed", processedList);
//			configMap.put("Unprocessed", unProcessedList);
//			
//			logger.info("Creating index ");
//			indexData(projectId, baseURL, configName, csvFileName, faiss_index);
//			
//			// write this to a json
//			Gson gson = new Gson();
//			String json = gson.toJson(configMap);
//			try {
//				FileUtils.writeStringToFile(new File(configFile), json);
//			} catch (IOException e) {
//				classLogger.error(Constants.STACKTRACE, e);
//			}
//			
//			//push to cloud
//			if (ClusterUtil.IS_CLUSTER) {
//				IProject project = Utility.getProject(projectId);
//				String projectFolderPath = AssetUtility.getProjectBaseFolder(project.getProjectName(), projectId).replace("\\", "/");
//				ClusterUtil.pushProjectFolder(project, projectFolderPath);
//			}
//		} finally {
//			writer.close();
//		}
//		
//		return new NounMetadata(configMap, PixelDataType.MAP);
//	}
//	
//	// base folder is relative to the project
//	
//	private void indexData(String projectId, String baseFolder, String indexName, String inputFile, String indexFile)
//	{
//		// create the index on CSV
//		// import gaas_simple_faiss as fa
//		// from datasets import DataSet
//		// ds = Dataset.from_csv(csvfileName, encoding="iso-8859-1")
//		// f1 = fa.FAISSSearcher(ds=ds)
//		// f1.custom_faiss_index()
//		// f1.save_index(faiss_index)
//		// delete everything
//		projectId = projectId.replace("-", "_");
//		baseFolder = baseFolder.replace("-", "_");
//		baseFolder = baseFolder.replace("/", "_");
//		baseFolder = baseFolder.replace(" ", "_");
//		indexName=indexName.replace(" ","_");
//
//		PyTranslator pyt = insight.getPyTranslator();
//		String baseVarName = "a_" + projectId + "_" + baseFolder + "_" + indexName; 
//		String [] commands = new String[] {
//				"import gaas_simple_faiss as fa",
//				"from datasets import Dataset",
//				baseVarName + "_ds = Dataset.from_csv('" + inputFile + "', encoding='iso-8859-1')",
//				baseVarName + "_faiss = fa.FAISSSearcher(ds=" + baseVarName + "_ds)", 
//				baseVarName + "_faiss.custom_faiss_index()",
//				baseVarName + "_faiss.save_index('" + indexFile + "')",
//				//"del " + baseVarName + "_faiss",
//				//"del " + baseVarName + "_ds",
//				//"del " + baseVarName + " fa",
//				//"del " + baseVarName + " Dataset"
//		};
//		
//		pyt.runPyAndReturnOutput(commands);
//	}
//	
//	/**
//	 * 
//	 * @param fileLocation
//	 * @return
//	 */
//	private Map addFileProperties(String fileLocation) {
//		Map fileMap = new HashMap<>();
//		Path path = Paths.get(fileLocation);
//		try {
//			BasicFileAttributes attr = Files.readAttributes(path, BasicFileAttributes.class);
//			fileMap.put("create_time", attr.creationTime());
//			fileMap.put("last_modified_time", attr.lastModifiedTime());
//			fileMap.put("last_access_time", attr.lastAccessTime());
//			fileMap.put("size", attr.size());
//			String mimeType = null;
//			if (SystemUtils.IS_OS_MAC) {
//			     mimeType = URLConnection.guessContentTypeFromName(path.toFile().getName());
//			} else {
//				 mimeType = Files.probeContentType(path);
//			}
//	
//			fileMap.put("mime_type", mimeType);
//			fileMap.put("name", path.getFileName() +"");
//		} catch(Exception ex) {
//			return null;
//		}
//		System.err.println(fileMap);
//		return fileMap;
//	}
//	
//}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy