com.bigdata.rdf.internal.constraints.RegexBOp Maven / Gradle / Ivy
/*
Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016. All rights reserved.
Contact:
SYSTAP, LLC DBA Blazegraph
2501 Calvert ST NW #106
Washington, DC 20008
[email protected]
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
package com.bigdata.rdf.internal.constraints;
import java.util.Map;
import java.util.regex.Pattern;
import org.apache.log4j.Logger;
import org.openrdf.model.Literal;
import org.openrdf.model.Value;
import org.openrdf.query.algebra.evaluation.util.QueryEvaluationUtil;
import com.bigdata.bop.BOp;
import com.bigdata.bop.IBindingSet;
import com.bigdata.bop.IConstant;
import com.bigdata.bop.IValueExpression;
import com.bigdata.bop.NV;
import com.bigdata.rdf.error.SparqlTypeErrorException;
import com.bigdata.rdf.internal.IV;
import com.bigdata.rdf.sparql.ast.FilterNode;
import com.bigdata.rdf.sparql.ast.QueryHints;
/**
* SPARQL REGEX operator.
*/
public class RegexBOp extends XSDBooleanIVValueExpression
implements INeedsMaterialization {
/**
*
*/
private static final long serialVersionUID = 1357420268214930143L;
private static final transient Logger log = Logger.getLogger(RegexBOp.class);
private static final boolean debug = log.isDebugEnabled();
private static final boolean info = log.isInfoEnabled();
/**
*
* Local member to implement {@link QueryHints.REGEX_MATCH_NON_STRING}
*
* {@link BLZG-1780}
*
*/
private boolean matchNonString = QueryHints.DEFAULT_REGEX_MATCH_NON_STRING;
public interface Annotations extends XSDBooleanIVValueExpression.Annotations {
/**
* The cached regex pattern.
*/
public String PATTERN = RegexBOp.class.getName()
+ ".pattern";
}
private static Map anns(
final IValueExpression extends IV> pattern,
final IValueExpression extends IV> flags) {
try {
if (pattern instanceof IConstant &&
(flags == null || flags instanceof IConstant)) {
final IV parg = ((IConstant) pattern).get();
final IV farg = flags != null ?
((IConstant) flags).get() : null;
if (parg.hasValue() && (farg == null || farg.hasValue())) {
final Value pargVal = parg.getValue();
final Value fargVal = farg != null ? farg.getValue() : null;
return NV.asMap(
new NV(Annotations.PATTERN,
getPattern(pargVal, fargVal)));
}
}
} catch (Exception ex) {
if (info) {
log.info("could not create pattern for: " + pattern + ", " + flags);
}
}
return BOp.NOANNS;
}
/**
* Construct a regex bop without flags.
*/
@SuppressWarnings("rawtypes")
public RegexBOp(
final IValueExpression extends IV> var,
final IValueExpression extends IV> pattern) {
this(new BOp[] { var, pattern }, anns(pattern, null));
}
/**
* Construct a regex bop with flags.
*/
@SuppressWarnings("rawtypes")
public RegexBOp(
final IValueExpression extends IV> var,
final IValueExpression extends IV> pattern,
final IValueExpression extends IV> flags) {
this(new BOp[] { var, pattern, flags }, anns(pattern, flags));
}
/**
* Required shallow copy constructor.
*/
public RegexBOp(final BOp[] args, final Map anns) {
super(args, anns);
if (args.length < 2 || args[0] == null || args[1] == null)
throw new IllegalArgumentException();
}
/**
* Constructor required for {@link com.bigdata.bop.BOpUtility#deepCopy(FilterNode)}.
*/
public RegexBOp(final RegexBOp op) {
super(op);
}
@Override
public Requirement getRequirement() {
return INeedsMaterialization.Requirement.SOMETIMES;
}
@Override
public boolean accept(final IBindingSet bs) {
final Value var = asValue(getAndCheckBound(0, bs));
@SuppressWarnings("rawtypes")
final IV pattern = getAndCheckBound(1, bs);
@SuppressWarnings("rawtypes")
final IV flags = arity() > 2 ? get(2).get(bs) : null;
if (debug) {
log.debug("regex var: " + var);
log.debug("regex pattern: " + pattern);
log.debug("regex flags: " + flags);
}
return accept(var, pattern.getValue(), flags != null ? flags.getValue()
: null);
}
/**
* Lifted directly from Sesame's EvaluationStrategyImpl.
*
* FIXME The Pattern should be cached if the pattern argument and flags are
* constants.
*
* @see
* REGEXBOp should cache the Pattern when it is a constant
*/
private boolean accept(final Value arg, final Value parg, final Value farg) {
if (debug) {
log.debug("regex var: " + arg);
log.debug("regex pattern: " + parg);
log.debug("regex flags: " + farg);
//Fixme not sure why we weren't able pick up via properties
log.debug(QueryHints.REGEX_MATCH_NON_STRING
+ ": "
+ this.getProperty(QueryHints.REGEX_MATCH_NON_STRING,
QueryHints.DEFAULT_REGEX_MATCH_NON_STRING));
log.debug("matchNonString: " + this.matchNonString);
}
//BLZG-1200 changed to isPlainLiteral
if (QueryEvaluationUtil.isPlainLiteral(arg)
// BLZG-1780: Query Hint to cast to string
|| matchNonString ) {
final String text;
if(QueryEvaluationUtil.isPlainLiteral(arg)) {
text = ((Literal) arg).getLabel();
} else { //Query Hint Override with explicit conversion
text = arg.stringValue();
}
if(debug) {
log.debug("regex text: " + text);
}
try {
// first check for cached pattern
Pattern pattern = (Pattern) getProperty(Annotations.PATTERN);
if (pattern == null) {
// resolve the pattern. NB: NOT cached.
pattern = getPattern(parg, farg);
}
if (Thread.interrupted()) {
/*
* Eagerly notice if the operator is interrupted.
*
* Note: Regex can be a high latency operation for a large
* RDF Literal. Therefore we want to check for an interrupt
* before each regex test. The Pattern code itself will not
* notice an interrupt....
*/
throw new RuntimeException(new InterruptedException());
}
final boolean result = pattern.matcher(text).find();
return result;
} catch (IllegalArgumentException ex) {
throw new SparqlTypeErrorException();
}
} else {
if(debug) {
log.debug("Unknown type: " + arg);
}
throw new SparqlTypeErrorException();
}
}
private static Pattern getPattern(final Value parg, final Value farg)
throws IllegalArgumentException {
if (debug) {
log.debug("regex pattern: " + parg);
log.debug("regex flags: " + farg);
}
//BLZG-1200 Literals with language types are not included in REGEX
if (QueryEvaluationUtil.isPlainLiteral(parg)
&& (farg == null || QueryEvaluationUtil.isPlainLiteral(farg))) {
final String ptn = ((Literal) parg).getLabel();
String flags = "";
if (farg != null) {
flags = ((Literal)farg).getLabel();
}
int f = 0;
for (char c : flags.toCharArray()) {
switch (c) {
case 's':
f |= Pattern.DOTALL;
break;
case 'm':
f |= Pattern.MULTILINE;
break;
case 'i': {
/*
* The SPARQL REGEX operator is based on the XQuery REGEX
* operator. That operator should be Unicode clean by
* default. Therefore, when case-folding is specified, we
* also need to include the UNICODE_CASE option.
*
* @see SPARQL REGEX operator does not perform case-folding
* correctly for Unicode data
*/
f |= Pattern.CASE_INSENSITIVE;
f |= Pattern.UNICODE_CASE;
break;
}
case 'x':
f |= Pattern.COMMENTS;
break;
case 'd':
f |= Pattern.UNIX_LINES;
break;
case 'u': // Implicit with 'i' flag.
// f |= Pattern.UNICODE_CASE;
break;
default:
throw new IllegalArgumentException();
}
}
final Pattern pattern = Pattern.compile(ptn, f);
return pattern;
}
throw new IllegalArgumentException();
}
public boolean isMatchNonString() {
return matchNonString;
}
public void setMatchNonString(boolean matchNonString) {
this.matchNonString = matchNonString;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy