All Downloads are FREE. Search and download functionalities are using the official Maven repository.

prerna.poi.specific.D3CSVLoader Maven / Gradle / Ivy

There is a newer version: 4.2.2
Show newest version
/*******************************************************************************
 * Copyright 2015 Defense Health Agency (DHA)
 *
 * If your use of this software does not include any GPLv2 components:
 * 	Licensed under the Apache License, Version 2.0 (the "License");
 * 	you may not use this file except in compliance with the License.
 * 	You may obtain a copy of the License at
 *
 * 	  http://www.apache.org/licenses/LICENSE-2.0
 *
 * 	Unless required by applicable law or agreed to in writing, software
 * 	distributed under the License is distributed on an "AS IS" BASIS,
 * 	WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * 	See the License for the specific language governing permissions and
 * 	limitations under the License.
 * ----------------------------------------------------------------------------
 * If your use of this software includes any GPLv2 components:
 * 	This program is free software; you can redistribute it and/or
 * 	modify it under the terms of the GNU General Public License
 * 	as published by the Free Software Foundation; either version 2
 * 	of the License, or (at your option) any later version.
 *
 * 	This program is distributed in the hope that it will be useful,
 * 	but WITHOUT ANY WARRANTY; without even the implied warranty of
 * 	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * 	GNU General Public License for more details.
 *******************************************************************************/
package prerna.poi.specific;

import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStream;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Date;
import java.util.Hashtable;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.StringTokenizer;

import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
import org.apache.log4j.PropertyConfigurator;
import org.openrdf.model.Literal;
import org.openrdf.model.Statement;
import org.openrdf.model.URI;
import org.openrdf.model.Value;
import org.openrdf.model.ValueFactory;
import org.openrdf.model.impl.LiteralImpl;
import org.openrdf.model.vocabulary.RDF;
import org.openrdf.model.vocabulary.RDFS;
import org.openrdf.repository.Repository;
import org.openrdf.repository.RepositoryConnection;
import org.openrdf.repository.RepositoryException;
import org.openrdf.repository.RepositoryResult;
import org.openrdf.repository.sail.SailRepository;
import org.openrdf.repository.sail.SailRepositoryConnection;
import org.openrdf.rio.RDFHandlerException;
import org.openrdf.rio.rdfxml.util.RDFXMLPrettyWriter;
import org.openrdf.sail.Sail;
import org.openrdf.sail.SailConnection;
import org.openrdf.sail.SailException;
import org.openrdf.sail.memory.MemoryStore;
import org.supercsv.cellprocessor.Optional;
import org.supercsv.cellprocessor.ParseBool;
import org.supercsv.cellprocessor.ParseDate;
import org.supercsv.cellprocessor.ParseDouble;
import org.supercsv.cellprocessor.ParseInt;
import org.supercsv.cellprocessor.constraint.NotNull;
import org.supercsv.cellprocessor.ift.CellProcessor;
import org.supercsv.io.CsvMapReader;
import org.supercsv.io.ICsvMapReader;
import org.supercsv.prefs.CsvPreference;

import com.bigdata.rdf.sail.BigdataSail;
import com.bigdata.rdf.sail.BigdataSailRepository;

import prerna.engine.api.IEngine;
import prerna.engine.impl.AbstractEngine;
import prerna.engine.impl.rdf.BigDataEngine;
import prerna.engine.impl.rdf.RDFFileSesameEngine;
import prerna.util.Constants;
import prerna.util.DIHelper;
import prerna.util.Utility;

/**
 * Loading data into SEMOSS using comma separated value (CSV) files
 */
public class D3CSVLoader {

	public final static String CONTAINS = "Contains";
	public final static String NUMCOL = "NUM_COLUMNS";
	public final static String NOT_OPTIONAL = "NOT_OPTIONAL";
	BigDataEngine bigEngine;
	
	private static final Logger logger = LogManager.getLogger(D3CSVLoader.class.getName());
	
	private Properties rdfMap;
	private ICsvMapReader mapReader;
	private String [] header;
	private List headerList;
	private CellProcessor[] processors;
	private Properties bdProp = new Properties(); // properties for big data
	private Sail bdSail;
	private ValueFactory vf;
	private Hashtable  typeHash = new Hashtable();
	private String customBaseURI = "";
	private ArrayList relationArrayList, nodePropArrayList, relPropArrayList;
	private int count = 0;
	
	public String semossURI;
	public Hashtable baseConceptURIHash = new Hashtable(); 
	public Hashtable conceptURIHash = new Hashtable();
	public Hashtable baseRelationURIHash = new Hashtable(); 
	public Hashtable relationURIHash = new Hashtable();
	public Hashtable basePropURIHash = new Hashtable();
	public String basePropURI= "";

	// OWL variables
	private RepositoryConnection rcOWL;
	private SailConnection scOWL;
	private String owlFile;
	private Hashtable uriHash = new Hashtable();

	/**
	 * The main method is never called within SEMOSS
	 * Used to load data without having to start SEMOSS
	 * User must specify location of all files manually inside the method
	 * @param args String[]
	 */
	public static void main(String[] args) throws Exception
	{
		D3CSVLoader reader = new D3CSVLoader();
		String workingDir = DIHelper.getInstance().getProperty(Constants.BASE_FOLDER);
		reader.customBaseURI = "http://health.mil/ontologies";
		reader.semossURI = "http://semoss.org/ontologies";
		reader.createTypes();
		String bdPropFile = workingDir + "/db/D3.smss";
		reader.owlFile = workingDir + "/db/D3/D3.OWL";

		String propFile = workingDir + "/RDF_Map.prop";
		DIHelper.getInstance().loadCoreProp(propFile);

		PropertyConfigurator.configure(workingDir + "/log4j.prop");

		reader.loadBDProperties(bdPropFile);
		reader.openDB();

		reader.openOWLWithOutConnection();
		ArrayList files = new ArrayList();
		files.add(workingDir+System.getProperty("file.separator")+"db"+System.getProperty("file.separator")+"D3"+System.getProperty("file.separator")+"Ships2.csv");
		for(int i = 0; i csvMap;
			while( (csvMap = mapReader.read(header, processors)) != null)
			{
				// get the column called object
				String object = (String)csvMap.get("Object");
				String objectType = (String)csvMap.get("Type");
				String secondaryObject = (String)csvMap.get("Type");
				String predicate = (String)csvMap.get("Predicate");
				String subject = (String) csvMap.get("Subject");
				String subjectTypeURI = uriHash.get("Organization");
				String subjectURI = subjectTypeURI + "/" + Utility.cleanString(subject, true);
				if(predicate != null && uriHash.containsKey(predicate))
				{
					String predicateTypeURI = uriHash.get(predicate);
					String predicateURI = predicateTypeURI + "/" + Utility.cleanString(subject, false) + "_" + Utility.cleanString(object, true);
					printTriple(predicateURI, RDFS.SUBPROPERTYOF +"" , predicateTypeURI);
					if(predicate.contains("address"))
					{
						objectType = "Location";
						printTriple(subjectURI, uriHash.get("RGeo") +"/" + Utility.cleanString(subject, true)+ "_" + Utility.cleanString(secondaryObject, true), uriHash.get("Geo") + "/" + Utility.cleanString(secondaryObject, true));
						printTriple(uriHash.get("RGeo") +"/" + Utility.cleanString(subject, true)+ "_" + Utility.cleanString(secondaryObject, true), RDFS.SUBPROPERTYOF+"", uriHash.get("RGeo"));
						printTriple(uriHash.get("Geo") + "/" + Utility.cleanString(secondaryObject, true), RDF.TYPE+"", uriHash.get("Geo"));
					}
					if(objectType != null && uriHash.containsKey(objectType))
					{
						String objectTypeURI = uriHash.get(objectType);
						String objectURI = objectTypeURI + "/" + Utility.cleanString(object, true);
						// object type
						printTriple(subjectURI, RDF.TYPE + "", subjectTypeURI);
						printTriple(objectURI, RDF.TYPE+"", objectTypeURI);
						printTriple(objectURI, RDFS.LABEL+"", new LiteralImpl(object));
						// the core triple
						//printTriple(objectURI, predicateURI, subjectURI);
						printTriple(subjectURI, predicateURI, objectURI);
						//printTriple(object, predicate, subject);
					}
					else
						printTriple(subjectURI, predicateURI, new LiteralImpl(object));
				}
			}
		} catch (IOException e) {
			e.printStackTrace();
		}
	}

	public void printTriple(String subject, String predicate, Object object)
	{
		logger.error(subject + "<>" + predicate + "<>" + object);

			if(object instanceof Literal)
				this.addStatement(vf.createURI(subject), vf.createURI(predicate), (Literal)object);
			else
				this.addStatement(vf.createURI(subject), vf.createURI(predicate), vf.createURI(object+""));
		
	}

	/**
	 * Loading data into SEMOSS to create a new database
	 * @param dbName 		String grabbed from the user interface that would be used as the name for the database
	 * @param fileNames		Absolute paths of files the user wants to load into SEMOSS, paths are separated by ";"
	 * @param customBase	String grabbed from the user interface that is used as the URI base for all instances 
	 * @param customMap		
	 * @param owlFile		String automatically generated within SEMOSS to determine the location of the OWL file that is produced
	 * @throws IOException 
	 * @throws FileNotFoundException 
	 * @throws RepositoryException 
	 * @throws RDFHandlerException 
	 * @throws SailException 
	 */
	public void importFileWithOutConnection(String dbName, String fileNames, String customBase, String owlFile) throws FileNotFoundException, IOException, RepositoryException, SailException, RDFHandlerException  
	{
		String[] files = fileNames.split(";");
		this.semossURI = (String) DIHelper.getInstance().getLocalProp(Constants.SEMOSS_URI);
		//make location of the owl file in the dbname folder
		this.owlFile = owlFile; 
		String bdPropFile = dbName;
		if(!customBase.equals(""))
		{
			customBaseURI = customBase;
		}
		semossURI = DIHelper.getInstance().getProperty(Constants.SEMOSS_URI);
		createTypes();
		loadBDProperties(bdPropFile);
		openDB();
		openOWLWithOutConnection();
		for(int i = 0; i baseHashIt = baseConceptURIHash.keySet().iterator();
		//now add all of the base relations that have been stored in the hash.
		while(baseHashIt.hasNext()){
			String subjectInstance = baseHashIt.next() +"";
			String predicate = Constants.SUBCLASS_URI;
			//convert instances to URIs
			String subject = baseConceptURIHash.get(subjectInstance) +"";
			String object = semossURI + "/Concept";
			// create the statement now
			createStatement(vf.createURI(subject), vf.createURI(predicate), vf.createURI(object));
			// add base relations URIs to OWL
			scOWL.addStatement(vf.createURI(subject), vf.createURI(predicate), vf.createURI(object));
			scOWL.commit();
		}
		baseHashIt = baseRelationURIHash.keySet().iterator();
		while(baseHashIt.hasNext()){
			String subjectInstance = baseHashIt.next() +"";
			String predicate = Constants.SUBPROPERTY_URI;
			//convert instances to URIs
			String subject = baseRelationURIHash.get(subjectInstance) +"";
			String object = semossURI + "/Relation";
			// create the statement now
			createStatement(vf.createURI(subject), vf.createURI(predicate), vf.createURI(object));
			// add base relationship URIs to OWL
			scOWL.addStatement(vf.createURI(subject), vf.createURI(predicate), vf.createURI(object));
			scOWL.commit();
		}

		// create the OWL File
		FileWriter fWrite = new FileWriter(owlFile);
		RDFXMLPrettyWriter owlWriter  = new RDFXMLPrettyWriter(fWrite); 
		rcOWL.export(owlWriter);
		fWrite.close();
		owlWriter.close();

		closeOWL();
	}

	/**
	 * Stores all possible variable types that the user can input from the CSV file into hashtable
	 * Hashtable is then used to match each column in CSV to a specific type based on user input in prop file
	 */
	public void createTypes()
	{
		typeHash.put("DECIMAL", new ParseDouble());
		typeHash.put("STRING", new NotNull());
		typeHash.put("DATE", new ParseDate("yyyy-mm-dd hh:mm:ss"));
		typeHash.put("SIMPLEDATE", new ParseDate("mm/dd/yyyy"));
		typeHash.put("NUMBER", new ParseInt());
		typeHash.put("BOOLEAN", new ParseBool());

		// now the optionals
		typeHash.put("DECIMAL_OPTIONAL", new Optional(new ParseDouble()));
		typeHash.put("STRING_OPTIONAL", new Optional());
		typeHash.put("DATE_OPTIONAL", new Optional(new ParseDate("yyyy-MM-dd HH:mm:ss")));
		typeHash.put("SIMPLEDATE_OPTIONAL", new Optional(new ParseDate("mm/dd/yyyy")));
		typeHash.put("NUMBER_OPTIONAL", new Optional(new ParseInt()));
		typeHash.put("BOOLEAN_OPTIONAL", new Optional(new ParseBool()));
	}

	/**
	 * Matches user inputed column type in prop file to the specific variable type name within Java SuperCSV API
	 */
	public void createProcessors()
	{
		// get the number columns in CSV file
		int numColumns = Integer.parseInt(rdfMap.getProperty(NUMCOL));
		// Columns in prop file that are NON_OPTIMAL must contain a value
		String optional  = rdfMap.getProperty(NOT_OPTIONAL);
		processors = new CellProcessor[numColumns];
		for(int procIndex = 1;procIndex <= numColumns;procIndex++)
		{
			// find the type for each column
			String type = rdfMap.getProperty(procIndex+"");
			boolean opt = true;
			if(optional.indexOf(";" + procIndex + ";") > 1)
				opt = false;

			if(type != null && opt)
				processors[procIndex-1] = typeHash.get(type.toUpperCase() + "_OPTIONAL");
			else if(type != null)
				processors[procIndex-1] = typeHash.get(type.toUpperCase());
			else if(type == null)
				processors[procIndex-1] = typeHash.get("STRING_OPTIONAL");
		}
	}

	/**
	 * Specifies which rows in the CSV to load based on user input in the prop file
	 * @throws IOException 
	 */
	public void skipRows() throws IOException
	{
		//start count at 1 just row 1 is the header
		count = 1;
		int startRow = 2;
		if (rdfMap.getProperty("START_ROW") != null)
			startRow = Integer.parseInt(rdfMap.getProperty("START_ROW")); 
		while( count jcrMap;
		// max row predetermined value
		int maxRows = 10000;
		// overwrite this value if user specified the max rows to load
		if (rdfMap.getProperty("END_ROW") != null)
			maxRows =  Integer.parseInt(rdfMap.getProperty("END_ROW"));
		// only start from the maxRow - the startRow
		// added -1 is because of index nature
		// the earlier rows should already have been skipped
		while( (jcrMap = mapReader.read(header, processors)) != null && count<(maxRows))
		{
			count++;
			logger.info("Process line: " +count);

			// process all relationships in row
			for(int relIndex = 0;relIndex();
		// process each relationship
		while(relationTokens.hasMoreElements())
		{
			String relation = relationTokens.nextToken();
			// just in case the end of the prop string is empty string or spaces
			if(!relation.contains("@"))
				break;

			relationArrayList.add(relation);
			logger.info("Loading relation " + relation);            	
			String[] strSplit = relation.split("@");
			// get the subject and object for triple (the two indexes)
			String subject = strSplit[0];
			String predicate = strSplit[1];
			String object = strSplit[2];

			// check to see if the predicate has 
			if(predicate.startsWith("dynamic"))
				System.err.println("Creating Dynamic ");

			// check if prop file entries are not in excel and if nodes are concatenations
			// throw exception if prop file entries not in excel
			boolean headException = true;
			if(subject.contains("+"))
			{
				headException = isProperConcatHeader(subject);
			}
			else
			{
				if(!headerList.contains(subject))
					headException = false;
			}
			if(headException == false)
				throw new IOException(subject + " cannot be found as a header");

			if(object.contains("+"))
			{
				headException = isProperConcatHeader(object);
			}
			else
			{
				if(!headerList.contains(object))
					headException = false;
			}
			if(headException == false)
				throw new IOException(subject + " cannot be found as a header");

			// create concept uris
			String relURI = "";
			String relBaseURI = "";
			String idxBaseURI = "";
			String idxURI = "";

			// see if subject node SEMOSS base URI exist in prop file first
			if(rdfMap.containsKey(subject+Constants.CLASS))
			{
				baseConceptURIHash.put(subject+Constants.CLASS,rdfMap.getProperty(subject+Constants.CLASS));
			}
			// if no user specific URI, use generic SEMOSS base URI
			else
			{
				if(subject.contains("+"))
				{
					String processedSubject = processAutoConcat(subject);
					idxBaseURI = semossURI + "/" + Constants.DEFAULT_NODE_CLASS +"/"+ processedSubject;
				}
				else
				{
					idxBaseURI = semossURI + "/" + Constants.DEFAULT_NODE_CLASS +"/"+ subject;
				}
				baseConceptURIHash.put(subject+Constants.CLASS, idxBaseURI);
			}
			// see if subject node instance URI exists in prop file
			if(rdfMap.containsKey(subject))
			{
				conceptURIHash.put(subject, rdfMap.getProperty(subject));
			}
			// if no user specified URI, use generic custombaseURI
			else
			{
				if(subject.contains("+"))
				{
					String processedSubject = processAutoConcat(subject);
					idxURI = customBaseURI + "/" + Constants.DEFAULT_NODE_CLASS +"/"+ processedSubject;
				}
				else
				{
					idxURI = customBaseURI + "/" + Constants.DEFAULT_NODE_CLASS +"/"+ subject;
				}
				conceptURIHash.put(subject, idxURI);
			}
			// see if object node SEMOSS base URI exists in prop file
			if(rdfMap.containsKey(object+Constants.CLASS))
			{
				baseConceptURIHash.put(object+Constants.CLASS,rdfMap.getProperty(object+Constants.CLASS));
			}
			// if no user specified URI, use generic SEMOSS base URI
			else
			{
				if(object.contains("+"))
				{
					String processedObject = processAutoConcat(object);
					idxBaseURI = semossURI + "/" + Constants.DEFAULT_NODE_CLASS +"/"+ processedObject;
				}
				else
				{
					idxBaseURI = semossURI + "/" + Constants.DEFAULT_NODE_CLASS +"/"+ object;
				}
				baseConceptURIHash.put(object+Constants.CLASS, idxBaseURI);
			}
			// see if object node instance URI exists in prop file
			if(rdfMap.containsKey(object))
			{
				conceptURIHash.put(object, rdfMap.getProperty(object));
			}
			// if no user specified URI, use generic custombaseURI
			else
			{
				if(object.contains("+"))
				{
					String processedObject = processAutoConcat(object);
					idxURI = customBaseURI + "/" + Constants.DEFAULT_NODE_CLASS +"/"+ processedObject;
				}
				else
				{
					idxURI = customBaseURI + "/" + Constants.DEFAULT_NODE_CLASS +"/"+ object;
				}
				conceptURIHash.put(object, idxURI);
			}
			// add relation uri into basehash and urihash
			String relPropString = subject + "_"+ predicate + "_" + object; //this string concat shows up in prop file

			// see if relationship SEMOSS base URI exists in prop file
			if(rdfMap.containsKey(relPropString+Constants.CLASS)) {
				baseRelationURIHash.put(relPropString+Constants.CLASS,rdfMap.getProperty(relPropString+Constants.CLASS));
			}
			// if no user specified URI, use generic SEMOSS base URI
			else
			{
				relBaseURI = semossURI + "/" + Constants.DEFAULT_RELATION_CLASS + "/" + predicate;
				baseRelationURIHash.put(relPropString+Constants.CLASS, relBaseURI);
			}
			// see if relationship URI exists in prop file
			if(rdfMap.containsKey(relPropString)) {
				relationURIHash.put(relPropString,rdfMap.getProperty(relPropString));
			}
			// if no user specified URI, use generic custombaseURI
			else {
				relURI = customBaseURI + "/" + Constants.DEFAULT_RELATION_CLASS + "/" + predicate;
				relationURIHash.put(relPropString, relURI);
			}
		}		
	}

	/**
	 * Create and store node property URIs at the SEMOSS base and instance levels 
	 * @throws IOException 
	 */
	public void processNodePropURIs() throws IOException
	{
		String nodePropNames = rdfMap.getProperty("NODE_PROP");
		StringTokenizer nodePropTokens = new StringTokenizer(nodePropNames, ";");
		nodePropArrayList = new ArrayList();
		if(basePropURI.equals("")){
			basePropURI = semossURI + "/" + Constants.DEFAULT_RELATION_CLASS + "/" + CONTAINS;
		}
		createStatement(vf.createURI(basePropURI),vf.createURI(Constants.SUBPROPERTY_URI),vf.createURI(basePropURI));

		while(nodePropTokens.hasMoreElements())
		{
			String relation = nodePropTokens.nextToken();
			// in case the end of the prop string is empty string or spaces
			if(!relation.contains("%"))
				break;

			nodePropArrayList.add(relation);
			logger.info("Loading Node Prop " + relation);            	
			String[] strSplit = relation.split("%");
			// get the subject and object for triple (the two indexes)
			String subject = strSplit[0];
			// loop through all properties on the node
			for(int i = 1; i < strSplit.length; i++)
			{
				String prop = strSplit[i];
				String idxBaseURI = "";
				String idxURI = "";
				String propURI = "";

				boolean headException = true;
				if(subject.contains("+"))
				{
					headException = isProperConcatHeader(subject);
				}
				else
				{
					if(!headerList.contains(subject))
						headException = false;
				}
				if(headException == false)
					throw new IOException(subject + " cannot be found as a header");

				if(prop.contains("+"))
				{
					headException = isProperConcatHeader(prop);
				}
				else
				{
					if(!headerList.contains(prop))
						headException = false;
				}
				if(headException == false)
					throw new IOException(subject + " cannot be found as a header");

				// see if subject node SEMOSS base URI exists in prop file
				if(rdfMap.containsKey(subject+Constants.CLASS))
				{
					baseConceptURIHash.put(subject+Constants.CLASS,rdfMap.getProperty(subject));
				}
				// if no user specified URI, use generic SEMOSS base URI
				else
				{
					idxBaseURI = semossURI + "/" + Constants.DEFAULT_NODE_CLASS +"/"+ subject;
					baseConceptURIHash.put(subject+Constants.CLASS, idxBaseURI);
				}
				// see if subject node instance URI exists in prop file
				if(rdfMap.containsKey(subject))
				{
					conceptURIHash.put(subject, rdfMap.getProperty(subject));
				}
				// if no user specified URI, use generic custombaseURI
				else
				{
					idxURI = customBaseURI + "/" + Constants.DEFAULT_NODE_CLASS +"/"+ subject;
					conceptURIHash.put(subject, idxURI);
				}

				propURI = basePropURI+"/" + prop;
				createStatement(vf.createURI(propURI),RDF.TYPE,vf.createURI(basePropURI));
				basePropURIHash.put(prop,  propURI);
			}
		}
	}

	/**
	 * Create and store relationship property URIs at the SEMOSS base and instance levels 
	 * @throws IOException 
	 */
	public void processRelationPropURIs() throws IOException
	{
		String propNames = rdfMap.getProperty("RELATION_PROP");
		StringTokenizer propTokens = new StringTokenizer(propNames, ";");
		relPropArrayList = new ArrayList();
		if(basePropURI.equals("")){
			basePropURI = semossURI + "/" + Constants.DEFAULT_RELATION_CLASS + "/" + CONTAINS;
		}
		while(propTokens.hasMoreElements())
		{
			String relation = propTokens.nextToken();
			//just in case the end of the prop string is empty string or spaces
			if(!relation.contains("%"))
				break;

			relPropArrayList.add(relation);
			logger.info("Loading relation prop " + relation);            	
			String[] strSplit = relation.split("%");
			// get the subject (index 0) and all objects for triple
			// loop through all properties on the relationship
			for(int i = 1; i < strSplit.length; i++)
			{
				String prop = strSplit[i];
				boolean headException = true;
				if(prop.contains("+"))
				{
					headException = isProperConcatHeader(prop);
				}
				else
				{
					if(!headerList.contains(prop))
						headException = false;
				}
				if(headException == false)
					throw new IOException(prop + " cannot be found as a header");

				String propURI = "";
				propURI = basePropURI+"/"+prop;
				createStatement(vf.createURI(propURI),RDF.TYPE,vf.createURI( basePropURI));
				basePropURIHash.put(prop,  propURI);
			}
		}
	}

	/**
	 * Change the name of nodes that are concatenations of multiple CSV columns
	 * Example: changes the string "Cat+Dog" into "CatDog"
	 * @param 	input String name of the node that is a concatenation
	 * @return 	String name of the node removing the "+" to indicate a concatenation
	 */
	public String processAutoConcat(String input)
	{
		String[] split = input.split("\\+");
		String output = "";
		for (int i=0;i jcrMap) throws SailException
	{
		if(jcrMap.containsKey(propName) && jcrMap.get(propName)!= null)
		{
			Object oInstance = createObject(propName, jcrMap);
			if(oInstance instanceof Double)
			{
				createStatement(vf.createURI(subjectURI), vf.createURI(propPredBaseURI), vf.createLiteral(((Double)oInstance).doubleValue()));
			}
			else if(oInstance instanceof Date)
			{
				DateFormat df = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss");
				if(oInstance.toString().length()<=10)
				{
					df=new SimpleDateFormat("MM-dd-yyyy");
				}
				String date = df.format(oInstance);
				URI datatype = vf.createURI("http://www.w3.org/2001/XMLSchema#dateTime");
				createStatement(vf.createURI(subjectURI), vf.createURI(propPredBaseURI), vf.createLiteral(date, datatype));
			}
			else
			{
				String value = oInstance + "";
				// try to see if it already has properties then add to it
				String cleanValue = value.replaceAll("/", "-").replaceAll("\"", "'");			
				createStatement(vf.createURI(subjectURI), vf.createURI(propPredBaseURI), vf.createLiteral(cleanValue));
			}		
		}

	}

	/**
	 * Creates and adds the triple into the repository connection
	 * @param subject		URI for the subject of the triple
	 * @param predicate		URI for the predicate of the triple
	 * @param object		Value for the object of the triple, this param is not a URI since objects can be literals and literals do not have URIs
	 * @throws SailException 
	 */
	protected void createStatement(URI subject, URI predicate, Value object)
	{
		URI newSub;
		URI newPred;
		Value newObj;
		String subString;
		String predString;
		String objString;
		String sub = subject.stringValue().trim();
		String pred = predicate.stringValue().trim();

		subString = Utility.cleanString(sub, false);
		newSub = vf.createURI(subString);

		predString = Utility.cleanString(pred, false);
		newPred = vf.createURI(predString);

		if(object instanceof Literal) 
			newObj = object;
		else {
			objString = Utility.cleanString(object.stringValue(), false);
			newObj = vf.createURI(objString);
		}
		this.addStatement(newSub, newPred, newObj);
	}

	/**
	 * Constructs the node instance name
	 * @param subject 		String containing the node type name
	 * @param jcrMap 		Map containing the data in the CSV file
	 * @return retString 	String containing the instance level name
	 */
	public String createInstanceValue(String subject, Map  jcrMap)
	{
		String retString ="";
		// if node is a concatenation
		if(subject.contains("+")) 
		{
			String elements[] = subject.split("\\+");
			for (int i = 0; i jcrMap)
	{
		return jcrMap.get(object);
	}

	/**
	 * Loading engine properties in order to create the database 
	 * @param fileName String containing the fileName of the temp file that contains the information of the smss file
	 * @throws IOException 
	 */
	public void loadBDProperties(String fileName) throws FileNotFoundException, IOException
	{
		InputStream fis = new FileInputStream(fileName);
		bdProp.load(fis);
		fis.close();
	}

	/**
	 * Creates the database based on the engine properties 
	 * @throws RepositoryException 
	 */
	public void openDB() throws RepositoryException 
	{
		// create database based on engine properties
		String baseFolder = DIHelper.getInstance().getProperty("BaseFolder");
		String fileName = baseFolder + "/" + bdProp.getProperty("com.bigdata.journal.AbstractJournal.file");
		bdProp.put("com.bigdata.journal.AbstractJournal.file", fileName);
		bdSail = new BigdataSail(bdProp);
		Repository repo = new BigdataSailRepository((BigdataSail) bdSail);
		repo.initialize();
		SailRepositoryConnection src = (SailRepositoryConnection) repo.getConnection();
		vf = bdSail.getValueFactory();
	}

	/**
	 * Loads the prop file for the CSV file
	 * @param fileName	Absolute path to the prop file specified in the last column of the CSV file
	 * @throws IOException 
	 * @throws FileNotFoundException 
	 */
	public void openProp(String fileName) throws FileNotFoundException, IOException
	{
		rdfMap = new Properties();
		rdfMap.load(new FileInputStream(fileName));
	}

	/**
	 * Load the CSV file
	 * Gets the headers for each column and reads the property file
	 * @param fileName String
	 * @throws FileNotFoundException 
	 */
	public void openCSVFile(String fileName) throws FileNotFoundException, IOException
	{
		mapReader = new CsvMapReader(new FileReader(fileName), CsvPreference.STANDARD_PREFERENCE);		
		header = mapReader.getHeader(true);
		headerList = Arrays.asList(header);
		String propFileName = "db/d3/csvload.prop";
		openProp(propFileName);
		createProcessors();
	}

	/**
	 * Creates a repository connection to be put all the base relationship data to create the OWL file
	 */
	public void openOWLWithOutConnection() throws RepositoryException
	{
		Repository myRepository = new SailRepository(new MemoryStore());
		myRepository.initialize();
		rcOWL = myRepository.getConnection();
		scOWL = ((SailRepositoryConnection) rcOWL).getSailConnection();
		rcOWL.getValueFactory();
	}

	/**
	 * Creates a repository connection and puts all the existing base relationships to create an updated OWL file
	 * @param engine	The database engine used to get all the existing base relationships
	 */
	public void openOWLWithConnection(IEngine engine) throws RepositoryException
	{
		Repository myRepository = new SailRepository(new MemoryStore());
		myRepository.initialize();
		rcOWL = myRepository.getConnection();
		scOWL = ((SailRepositoryConnection) rcOWL).getSailConnection();
		rcOWL.getValueFactory();

		AbstractEngine baseRelEngine = ((AbstractEngine)engine).getBaseDataEngine();
		RepositoryConnection existingRC = ((RDFFileSesameEngine) baseRelEngine).getRc();
		// load pre-existing base data
		RepositoryResult rcBase = existingRC.getStatements(null, null, null, false);
		List rcBaseList = rcBase.asList();
		Iterator iterator = rcBaseList.iterator();
		while(iterator.hasNext()){
			logger.info(iterator.next());
		}
		rcOWL.add(rcBaseList);		
	}

	/**
	 * Close the OWL engine
	 * @throws SailException 
	 * @throws RepositoryException 
	 */
	protected void closeOWL() throws SailException, RepositoryException
	{
		scOWL.close();
		rcOWL.close();
	}

	/**
	 * Close the database engine
	 * @throws SailException 
	 */
	public void closeDB() throws SailException
	{
		logger.warn("Closing....");
		bigEngine.closeDB();
	}	
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy