
prerna.rdf.util.SPARQLQueryParser Maven / Gradle / Ivy
The newest version!
/*******************************************************************************
* Copyright 2015 Defense Health Agency (DHA)
*
* If your use of this software does not include any GPLv2 components:
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* ----------------------------------------------------------------------------
* If your use of this software includes any GPLv2 components:
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*******************************************************************************/
package prerna.rdf.util;
import java.util.ArrayList;
import java.util.Enumeration;
import java.util.Hashtable;
import java.util.List;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.openrdf.query.algebra.StatementPattern;
import org.openrdf.query.algebra.Var;
import org.openrdf.query.parser.ParsedQuery;
import org.openrdf.query.parser.sparql.SPARQLParser;
import prerna.util.Constants;
import prerna.util.Utility;
public class SPARQLQueryParser extends AbstractQueryParser {
private static final Logger classLogger = LogManager.getLogger(SPARQLQueryParser.class);
private final double GOLDEN_RATIO = 0.618;
private Hashtable finalHash;
private Hashtable countHash;
private Hashtable variableMapping;
private List patterns;
public SPARQLQueryParser(){
super();
}
public SPARQLQueryParser(String query){
super(query);
}
// public static void main(String[] args) throws Exception {
// basicParseTest();
// }
@Override
public void parseQuery(){
variableMapping = new Hashtable();
final String regex = "\\(\\?([^(\\s|\\)|,)]*)|\\?([^(\\s|\\)|,)]*)\\)";
final Pattern pattern = Pattern.compile(regex);
Matcher matcher = null;
if(query.toLowerCase().contains("select")){
matcher = pattern.matcher(query.substring(query.indexOf("SELECT"), query.indexOf("WHERE")));
}
else {
matcher = pattern.matcher(query.substring(query.indexOf("CONSTRUCT"), query.indexOf("WHERE")));
}
while(matcher.find()) {
String orig = matcher.group();
orig = processString(orig);
String ret = "";
if(matcher.find()) {
ret = matcher.group(2);
ret = processString(ret);
}
variableMapping.put(orig, ret);
}
countHash = new Hashtable();
try {
SPARQLParser parser = new SPARQLParser();
ParsedQuery parsedQuery = parser.parseQuery(query, null);
StatementCollector collector = new StatementCollector();
parsedQuery.getTupleExpr().visit(collector);
FunctionCallCollector aggregateFunctionsCollector = new FunctionCallCollector();
parsedQuery.getTupleExpr().visit(aggregateFunctionsCollector);
if(aggregateFunctionsCollector.getValue() !=null){
hasColumnAggregatorFunction = true;
}
returnVariables = parsedQuery.getTupleExpr().getBindingNames();
patterns = collector.getPatterns();
getURIList(); // populates finalHash, types, and props
} catch (Exception e) {
classLogger.error(Constants.STACKTRACE, e);
}
// calculate weights for all return variables
finalHash = new Hashtable();
int max = 0;
for(String key : countHash.keySet()) {
int c = countHash.get(key);
if(c > max) {
max = c;
}
}
for(String key : countHash.keySet()) {
double weight = GOLDEN_RATIO * countHash.get(key) / max;
if(types.containsValue(key)) {
List possibleVariableNames = getKeyFromVal(key, types);
for(String variableName : possibleVariableNames) {
if(returnVariables.contains(variableName) || variableMapping.containsKey(variableName)) {
weight += 1;
break;
}
}
} else if(props.containsValue(key)) {
List possibleVariableNames = getKeyFromVal(key, props);
for(String variableName : possibleVariableNames) {
if(returnVariables.contains(variableName) || variableMapping.containsKey(variableName)) {
weight += 1;
break;
}
}
}
finalHash.put(key, weight);
}
//System.out.println(finalHash);
}
private List getKeyFromVal(String val, Hashtable map) {
List retList = new ArrayList();
for(String key : map.keySet()) {
if(map.get(key).equals(val)) {
retList.add(key);
}
}
return retList;
}
private void getURIList() {
Hashtable dataHash = new Hashtable();
types = new Hashtable();
props = new Hashtable();
//run through the types first, you need to do this first so that you can get all of the types their variables,
//then when you get the properties you can use the mapping you create between variables and types to properly map the variables to their respective type
for(int patIndex = 0;patIndex < patterns.size();patIndex++)
{
StatementPattern thisPattern = patterns.get(patIndex);
Var subjectVar = thisPattern.getSubjectVar(); //cant use this, its the alias.
Var objectVar = thisPattern.getObjectVar();
Var predicateVar = thisPattern.getPredicateVar();//cant use this, its the alias.
dataHash = recordVar(subjectVar, dataHash);
dataHash = recordVar(objectVar, dataHash);
if(predicateVar.isConstant() && (predicateVar.getValue()+"").equalsIgnoreCase("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")) {
types.put(Utility.getInstanceName(objectVar.getValue().toString()) + "", objectVar.getValue() + "");//types.put(subjectVar.getName() + "", objectVar.getValue() + "");
aliasTableMap.put(subjectVar.getName(), Utility.getInstanceName(objectVar.getValue().toString()));
}
}
//now that you have their types and their aliases, run through the properties and generate the triples list
for(int patIndex = 0;patIndex < patterns.size();patIndex++)
{
StatementPattern thisPattern = patterns.get(patIndex);
Var subjectVar = thisPattern.getSubjectVar(); //cant use this, its the alias.
Var objectVar = thisPattern.getObjectVar();
Var predicateVar = thisPattern.getPredicateVar();//cant use this, its the alias.
if(predicateVar.isConstant() && (predicateVar.getValue()+"").contains("ontologies/Relation/Contains/")) {
String propPlainText = Utility.getInstanceName(predicateVar.getValue().toString());
props.put(propPlainText + "", predicateVar.getValue() + "");//props.put(objectVar.getName() + "", predicateVar.getValue() + "");
String nodeType = aliasTableMap.get(subjectVar.getName());
addToVariablesMap(typePropVariables, nodeType, objectVar.getName(), predicateVar.getValue().toString());
if(returnVariables.contains(objectVar.getName())){
addToVariablesMap(typeReturnVariables, nodeType, objectVar.getName(), predicateVar.getValue().toString());
}
} else if(predicateVar.isConstant() && (predicateVar.getValue()+"").contains("ontologies/Relation")) {
//must a triple!
String[] triple = new String[3];
triple[0] = types.get(aliasTableMap.get(subjectVar.getName()));
triple[1] = predicateVar.getValue().toString();
triple[2] = types.get(aliasTableMap.get(objectVar.getName()));
triplesData.add(triple);
}
}
// synchronize it
Enumeration keys = dataHash.keys();
while(keys.hasMoreElements())
{
String key = "" + keys.nextElement();
if(key.contains(":")) // namespaced let it go
{
Integer typeProxyCount = dataHash.get(key);
if(countHash.containsKey(key)) {
typeProxyCount = typeProxyCount + countHash.get(key);
}
countHash.put(key, typeProxyCount);
}else
{
String typeName = types.get(key);
Integer typeProxyCount = dataHash.get(key);
if(typeName != null) {
if(countHash.containsKey(typeName)) {
typeProxyCount = typeProxyCount + countHash.get(typeName);
}
countHash.put(typeName, typeProxyCount);
} else {
String propName = props.get(key);
Integer propProxyCount = dataHash.get(key);
if(propName != null) {
if(countHash.containsKey(propName)) {
propProxyCount = propProxyCount + countHash.get(typeName);
}
countHash.put(propName, propProxyCount);
}
}
}
}
}
private Hashtable recordVar(Var var, Hashtable inputHash) {
if(var.hasValue()) {
Integer count = inputHash.get(var.getValue()+"");
if(count == null) {
count = 0;
}
count++;
inputHash.put(var.getValue()+"", count);
} else {
Integer count = inputHash.get(var.getName()+"");
if(count == null) {
count = 0;
}
count++;
inputHash.put(var.getName()+"", count);
}
return inputHash;
}
private String processString(String s) {
return s.replaceAll("\\?", "").replaceAll("\\)", "").replaceAll("\\(", "");
}
public List getPatterns() {
return patterns;
}
private void setPatterns(List patterns) {
this.patterns = patterns;
}
private void setReturnVariables(Set returnVariables) {
this.returnVariables = returnVariables;
}
@Override
public List getTriplesData() {
return triplesData;
}
////tester methods
private static void basicParseTest(){
String query = "SELECT DISTINCT ?Director (AVG(?Title__MovieBudget) AS ?x) (SUM(?Title__MovieBudget) AS ?y) WHERE { BIND(<@Studio-http://semoss.org/ontologies/Concept/Studio@> AS ?Studio) {?Title <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://semoss.org/ontologies/Concept/Title>} {?Director <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://semoss.org/ontologies/Concept/Director>} {?Studio <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://semoss.org/ontologies/Concept/Studio>} {?Title <http://semoss.org/ontologies/Relation/DirectedBy> ?Director} {?Title <http://semoss.org/ontologies/Relation/DirectedAt> ?Studio} {?Title <http://semoss.org/ontologies/Relation/Contains/MovieBudget> ?Title__MovieBudget} {?Title <http://semoss.org/ontologies/Relation/Contains/Revenue-International> ?Title__Revenue_International} {?Title <http://semoss.org/ontologies/Relation/Contains/Revenue-Domestic> ?Title__Revenue_Domestic} {?Title <http://semoss.org/ontologies/Relation/Contains/RottenTomatoes-Audience> ?Title__RottenTomatoes_Audience} {?Title <http://semoss.org/ontologies/Relation/Contains/RottenTomatoes-Critics> ?Title__RottenTomatoes_Critics} } GROUP BY ?Director";
//query = "SELECT DISTINCT ?Title ?Nominated ?Genre ?Title__RevenueInternational ?Title__MovieBudget WHERE { {?Title <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://semoss.org/ontologies/Concept/Title>} {?Nominated <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://semoss.org/ontologies/Concept/Nominated>} {?Genre <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://semoss.org/ontologies/Concept/Genre>} {?Title <http://semoss.org/ontologies/Relation/Was> ?Nominated} {?Title <http://semoss.org/ontologies/Relation/BelongsTo> ?Genre} {?Title <http://semoss.org/ontologies/Relation/Contains/Revenue-International> ?Title__RevenueInternational} {?Title <http://semoss.org/ontologies/Relation/Contains/MovieBudget> ?Title__MovieBudget} }";
//query = "SELECT DISTINCT ?Title ?Title__RevenueDomestic WHERE { {?Title <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://semoss.org/ontologies/Concept/Title>} {?Studio <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://semoss.org/ontologies/Concept/Studio>} {?Title <http://semoss.org/ontologies/Relation/Title_Studio> ?Studio} {?Title <http://semoss.org/ontologies/Relation/Contains/Revenue-Domestic> ?Title__RevenueDomestic} {?Title <http://semoss.org/ontologies/Relation/Contains/Revenue-International> ?Title__RevenueInternational} }";
query = query.replace("<", "<");
query = query.replace(">", ">");
SPARQLQueryParser parse = new SPARQLQueryParser(query);
parse.parseQuery(); // parse the query into grammar
Hashtable > returnVariables1 = parse.getReturnVariables();
Hashtable types1 = parse.getNodesFromQuery();
Hashtable > props1 = parse.getPropertiesFromQuery();
List mytrips = parse.getTriplesData();
boolean hasAggregate = parse.hasAggregateFunction();
System.out.println("Aggregate function " + hasAggregate);
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy