All Downloads are FREE. Search and download functionalities are using the official Maven repository.

prerna.rdf.util.SPARQLQueryParser Maven / Gradle / Ivy

The newest version!
/*******************************************************************************
 * Copyright 2015 Defense Health Agency (DHA)
 *
 * If your use of this software does not include any GPLv2 components:
 * 	Licensed under the Apache License, Version 2.0 (the "License");
 * 	you may not use this file except in compliance with the License.
 * 	You may obtain a copy of the License at
 *
 * 	  http://www.apache.org/licenses/LICENSE-2.0
 *
 * 	Unless required by applicable law or agreed to in writing, software
 * 	distributed under the License is distributed on an "AS IS" BASIS,
 * 	WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * 	See the License for the specific language governing permissions and
 * 	limitations under the License.
 * ----------------------------------------------------------------------------
 * If your use of this software includes any GPLv2 components:
 * 	This program is free software; you can redistribute it and/or
 * 	modify it under the terms of the GNU General Public License
 * 	as published by the Free Software Foundation; either version 2
 * 	of the License, or (at your option) any later version.
 *
 * 	This program is distributed in the hope that it will be useful,
 * 	but WITHOUT ANY WARRANTY; without even the implied warranty of
 * 	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * 	GNU General Public License for more details.
 *******************************************************************************/
package prerna.rdf.util;

import java.util.ArrayList;
import java.util.Enumeration;
import java.util.Hashtable;
import java.util.List;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.openrdf.query.algebra.StatementPattern;
import org.openrdf.query.algebra.Var;
import org.openrdf.query.parser.ParsedQuery;
import org.openrdf.query.parser.sparql.SPARQLParser;

import prerna.util.Constants;
import prerna.util.Utility;

public class SPARQLQueryParser extends AbstractQueryParser {
	
	private static final Logger classLogger = LogManager.getLogger(SPARQLQueryParser.class);

	private final double GOLDEN_RATIO = 0.618;
	private Hashtable finalHash;
	private Hashtable countHash;
	private Hashtable variableMapping;
	private List patterns;

	public SPARQLQueryParser(){
		super();
	}
	
	public SPARQLQueryParser(String query){
		super(query);
	}
	
//	public static void main(String[] args) throws Exception {
//		basicParseTest();
//	}
	
	@Override
	public void parseQuery(){
		
		variableMapping = new Hashtable();
		
		final String regex = "\\(\\?([^(\\s|\\)|,)]*)|\\?([^(\\s|\\)|,)]*)\\)";
		final Pattern pattern = Pattern.compile(regex);
		Matcher matcher = null;
		if(query.toLowerCase().contains("select")){
			matcher = pattern.matcher(query.substring(query.indexOf("SELECT"), query.indexOf("WHERE")));
		}
		else {
			matcher = pattern.matcher(query.substring(query.indexOf("CONSTRUCT"), query.indexOf("WHERE")));
		}
		while(matcher.find()) {
			String orig = matcher.group();
			orig = processString(orig);
			String ret = "";
			if(matcher.find()) {
				ret = matcher.group(2);
				ret = processString(ret);
			}
			variableMapping.put(orig, ret);
		}
		
		countHash = new Hashtable();
		try {
			SPARQLParser parser = new SPARQLParser();
			ParsedQuery parsedQuery = parser.parseQuery(query, null);
			

			StatementCollector collector = new StatementCollector();
			parsedQuery.getTupleExpr().visit(collector);

			FunctionCallCollector aggregateFunctionsCollector = new FunctionCallCollector();
			parsedQuery.getTupleExpr().visit(aggregateFunctionsCollector);
			if(aggregateFunctionsCollector.getValue() !=null){
				hasColumnAggregatorFunction = true;
			}
			
			returnVariables = parsedQuery.getTupleExpr().getBindingNames(); 
			
			patterns = collector.getPatterns();
			
			getURIList(); // populates finalHash, types, and props
		} catch (Exception e) {
			classLogger.error(Constants.STACKTRACE, e);
		}
		
		// calculate weights for all return variables
		finalHash = new Hashtable();
		int max = 0;
		for(String key : countHash.keySet()) {
			int c = countHash.get(key);
			if(c > max) {
				max = c;
			}
		}
		
		
		for(String key : countHash.keySet()) {
			double weight = GOLDEN_RATIO * countHash.get(key) / max;
			if(types.containsValue(key)) {
				List possibleVariableNames = getKeyFromVal(key, types);
				for(String variableName : possibleVariableNames) {
					if(returnVariables.contains(variableName) || variableMapping.containsKey(variableName)) {
						weight += 1;
						break;
					}
				}
			} else if(props.containsValue(key)) {
				List possibleVariableNames = getKeyFromVal(key, props);
				for(String variableName : possibleVariableNames) {
					if(returnVariables.contains(variableName) || variableMapping.containsKey(variableName)) {
						weight += 1;
						break;
					}
				}
			}
			finalHash.put(key, weight);
		}
		
		//System.out.println(finalHash);
	}
	
	private List getKeyFromVal(String val, Hashtable map) {
		List retList = new ArrayList();
		for(String key : map.keySet()) {
			if(map.get(key).equals(val)) {
				retList.add(key);
			}
		}
		return retList;
	}

	private void getURIList() {

		Hashtable  dataHash = new Hashtable();
		types = new Hashtable();
		props = new Hashtable();

		//run through the types first, you need to do this first so that you can get all of the types their variables,
		//then when you get the properties you can use the mapping you create between variables and types to properly map the variables to their respective type
		for(int patIndex = 0;patIndex < patterns.size();patIndex++)
		{
			StatementPattern thisPattern = patterns.get(patIndex);

			Var subjectVar = thisPattern.getSubjectVar(); //cant use this, its the alias.
			Var objectVar = thisPattern.getObjectVar();
			Var predicateVar = thisPattern.getPredicateVar();//cant use this, its the alias.
			
			dataHash = recordVar(subjectVar, dataHash);
			dataHash = recordVar(objectVar, dataHash);
			
			if(predicateVar.isConstant() && (predicateVar.getValue()+"").equalsIgnoreCase("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")) {
				types.put(Utility.getInstanceName(objectVar.getValue().toString()) + "", objectVar.getValue() + "");//types.put(subjectVar.getName() + "", objectVar.getValue() + "");
				aliasTableMap.put(subjectVar.getName(), Utility.getInstanceName(objectVar.getValue().toString()));
			}
		}
		
		//now that you have their types and their aliases, run through the properties and generate the triples list	
		for(int patIndex = 0;patIndex < patterns.size();patIndex++)
		{
			StatementPattern thisPattern = patterns.get(patIndex);

			Var subjectVar = thisPattern.getSubjectVar(); //cant use this, its the alias.
			Var objectVar = thisPattern.getObjectVar();
			Var predicateVar = thisPattern.getPredicateVar();//cant use this, its the alias.
			
			if(predicateVar.isConstant() && (predicateVar.getValue()+"").contains("ontologies/Relation/Contains/")) {
				String propPlainText = Utility.getInstanceName(predicateVar.getValue().toString());
				props.put(propPlainText  + "", predicateVar.getValue() + "");//props.put(objectVar.getName() + "", predicateVar.getValue() + "");
				String nodeType = aliasTableMap.get(subjectVar.getName());
				addToVariablesMap(typePropVariables, nodeType, objectVar.getName(), predicateVar.getValue().toString());
				if(returnVariables.contains(objectVar.getName())){
					addToVariablesMap(typeReturnVariables, nodeType, objectVar.getName(), predicateVar.getValue().toString());
				}
			} else if(predicateVar.isConstant() && (predicateVar.getValue()+"").contains("ontologies/Relation")) {
				//must a triple!
				String[] triple = new String[3];
				triple[0] = types.get(aliasTableMap.get(subjectVar.getName()));
				triple[1] = predicateVar.getValue().toString();
				triple[2] = types.get(aliasTableMap.get(objectVar.getName()));
				triplesData.add(triple);
			}
		}

		// synchronize it
		Enumeration keys = dataHash.keys();
		while(keys.hasMoreElements())
		{
			String key = "" + keys.nextElement();
			if(key.contains(":")) // namespaced let it go
			{
				Integer typeProxyCount = dataHash.get(key);
				if(countHash.containsKey(key)) {
					typeProxyCount = typeProxyCount + countHash.get(key);
				}
				countHash.put(key, typeProxyCount);
			}else
			{
				String typeName = types.get(key);
				Integer typeProxyCount = dataHash.get(key);
				if(typeName != null) {
					if(countHash.containsKey(typeName)) {
						typeProxyCount = typeProxyCount + countHash.get(typeName);
					}
					countHash.put(typeName, typeProxyCount);
				} else {
					String propName = props.get(key);
					Integer propProxyCount = dataHash.get(key);
					if(propName != null) {
						if(countHash.containsKey(propName)) {
							propProxyCount = propProxyCount + countHash.get(typeName);
						}
						countHash.put(propName, propProxyCount);
					}
				}
			}
		}
	}

	private Hashtable  recordVar(Var var, Hashtable  inputHash) {
		if(var.hasValue()) {
			Integer count = inputHash.get(var.getValue()+"");
			if(count == null) {
				count = 0;
			}
			count++;
			inputHash.put(var.getValue()+"", count);
		} else {
			Integer count = inputHash.get(var.getName()+"");
			if(count == null) {
				count = 0;
			}
			count++;
			inputHash.put(var.getName()+"", count);
		}
		return inputHash;
	}
	
	private String processString(String s) {
		return s.replaceAll("\\?", "").replaceAll("\\)", "").replaceAll("\\(", "");
	}

	public List getPatterns() {
		return patterns;
	}

	private void setPatterns(List patterns) {
		this.patterns = patterns;
	}

	private void setReturnVariables(Set returnVariables) {
		this.returnVariables = returnVariables;
	}

	@Override
	public List getTriplesData() {
		return triplesData;
	}
	

	
	////tester methods
	private static void basicParseTest(){
		
		String query = "SELECT DISTINCT ?Director (AVG(?Title__MovieBudget) AS ?x) (SUM(?Title__MovieBudget) AS ?y) WHERE { BIND(<@Studio-http://semoss.org/ontologies/Concept/Studio@> AS ?Studio) {?Title <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://semoss.org/ontologies/Concept/Title>} {?Director <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://semoss.org/ontologies/Concept/Director>} {?Studio <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://semoss.org/ontologies/Concept/Studio>} {?Title <http://semoss.org/ontologies/Relation/DirectedBy> ?Director} {?Title <http://semoss.org/ontologies/Relation/DirectedAt> ?Studio} {?Title <http://semoss.org/ontologies/Relation/Contains/MovieBudget> ?Title__MovieBudget} {?Title <http://semoss.org/ontologies/Relation/Contains/Revenue-International> ?Title__Revenue_International} {?Title <http://semoss.org/ontologies/Relation/Contains/Revenue-Domestic> ?Title__Revenue_Domestic} {?Title <http://semoss.org/ontologies/Relation/Contains/RottenTomatoes-Audience> ?Title__RottenTomatoes_Audience} {?Title <http://semoss.org/ontologies/Relation/Contains/RottenTomatoes-Critics> ?Title__RottenTomatoes_Critics}  }  GROUP BY ?Director";	
		//query = "SELECT DISTINCT ?Title ?Nominated ?Genre ?Title__RevenueInternational ?Title__MovieBudget WHERE { {?Title <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://semoss.org/ontologies/Concept/Title>} {?Nominated <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://semoss.org/ontologies/Concept/Nominated>} {?Genre <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://semoss.org/ontologies/Concept/Genre>} {?Title <http://semoss.org/ontologies/Relation/Was> ?Nominated} {?Title <http://semoss.org/ontologies/Relation/BelongsTo> ?Genre} {?Title <http://semoss.org/ontologies/Relation/Contains/Revenue-International> ?Title__RevenueInternational} {?Title <http://semoss.org/ontologies/Relation/Contains/MovieBudget> ?Title__MovieBudget}  }";
		//query = "SELECT DISTINCT ?Title ?Title__RevenueDomestic WHERE { {?Title <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://semoss.org/ontologies/Concept/Title>} {?Studio <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://semoss.org/ontologies/Concept/Studio>} {?Title <http://semoss.org/ontologies/Relation/Title_Studio> ?Studio} {?Title <http://semoss.org/ontologies/Relation/Contains/Revenue-Domestic> ?Title__RevenueDomestic} {?Title <http://semoss.org/ontologies/Relation/Contains/Revenue-International> ?Title__RevenueInternational}  }";
		query = query.replace("<", "<");
		query = query.replace(">", ">");
				
		SPARQLQueryParser parse = new SPARQLQueryParser(query);
		parse.parseQuery(); // parse the query into grammar
		
		Hashtable > returnVariables1 = parse.getReturnVariables();
		Hashtable  types1 = parse.getNodesFromQuery();
		Hashtable > props1 = parse.getPropertiesFromQuery();
		List mytrips = parse.getTriplesData();
		boolean hasAggregate = parse.hasAggregateFunction();
		System.out.println("Aggregate function " + hasAggregate);
	}
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy