io.virtdata.templates.ParsedTemplate Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of virtdata-lib-curves4 Show documentation
Show all versions of virtdata-lib-curves4 Show documentation
Statistical sampling library for use in virtualdataset libraries, based on apache commons math 4
/*
*
* Copyright 2016 jshook
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* /
*/
package io.virtdata.templates;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.security.InvalidParameterException;
import java.util.*;
import java.util.function.Function;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import java.util.stream.StreamSupport;
/**
* A parsed template is a form of a raw template which has been parsed for its
* named anchors and sanity checked against a set of provided bindings.
*
* Once the parsed template is constructed, the method {@link ParsedTemplate#orError()}
* should always called before it is used.
*
* Validity Checks
*
* A parsed template is considered to be valid if and only if the raw template contained only
* named anchors which were defined in the provided bindings. Extra bindings are not presumed
* to make a template invalid, but this interpretation is left to the caller for extra checks
* if needed.
*
* Parsed Details
* After parsing, the following details are available:
*
* Parsed Spans
* This is an alternating list of the literal sections of the raw template
* interspersed with the anchor names. This list always starts and ends with a literal section, so
* will always contain an odd number of elements. (some span sections may be empty if necessary, but
* not null)
*
* Specific Bindings
* These are the binding names and definitions which were used
* in a named anchor and also found in the provided bindings. If an anchor references
* a binding which is not provided, then it will not be in this map.
*
* Missing Bindings
* This is a list of binding names which were found in the
* raw template but which were not found in the provided bindings by name.
*
* Extra Bindings
* This is a list of binding names which were provided by the user, but which were not used in the raw template by name.
*/
public class ParsedTemplate {
/**
* The default patterns match one of two forms:
*
* - an opening curly brace, followed by a word character, followed by any contiguous
* combination of dashes, underscores, digits, words, and dots, followed by
* a closing curly brace.
* - A question mark, followed by a word character, followed by any contiguous
* combination of dashes, underscores, digits, word characters, or dots.
*
*
* Examples
*
* {var1}
* {var2.var3__-var5}
* ?var6
* ?var7.__var8-var9
*
*/
private final static Pattern[] DEFAULT_PATTERNS = new Pattern[]{
Pattern.compile("\\{(?\\w+[-_\\d\\w.]*)}"),
Pattern.compile("\\?(?\\w+[-_\\d\\w.]*)")
};
private final static Logger logger = LoggerFactory.getLogger(ParsedTemplate.class);
private final Pattern[] patterns;
// Spans is an even-odd form of (literal, variable, ..., ..., literal)
private final String rawtemplate;
private final String[] spans;
private final Set missingBindings = new HashSet<>();
private final Set extraBindings = new HashSet<>();
private final Map bindings = new LinkedHashMap<>();
private final Map specificBindings = new LinkedHashMap<>();
/**
* Construct a new ParsedTemplate from the provided statement template.
*
* @param rawtemplate The string that contains literal sections and anchor sections interspersed
* @param providedBindings The bindings that are provided for the template to be parsed
*/
public ParsedTemplate(String rawtemplate, Map providedBindings) {
this(rawtemplate, providedBindings, DEFAULT_PATTERNS);
}
/**
* Parse the given raw template, check the bind points against the provide bindings, and
* provide detailed template checks for validity.
*
* Overriding Patterns
*
* If patterns are not provided then {@link ParsedTemplate#DEFAULT_PATTERNS} are used, which includes
* the ability to match {var1} and ?var1 style anchors. If patterns are
* provided, then they must be compatible with the {@link Matcher#find()} method, and must also
* have a named group with the name 'anchor', as in (?<anchor>...)
*
*
* @param rawtemplate A string template which contains optionally embedded named anchors
* @param providedBindings The bindings which are provided by the user to fulfill the named anchors in this raw template
* @param providedPatterns The patterns which match the named anchor format and extract anchor names from the raw template
*/
public ParsedTemplate(String rawtemplate, Map providedBindings, Pattern... providedPatterns) {
this.rawtemplate = rawtemplate;
this.bindings.putAll(providedBindings);
this.patterns = providedPatterns;
this.spans = parse();
}
public ParsedTemplate orError() {
if (hasError()) {
throw new RuntimeException("Unable to parse statement: " + this.toString());
}
return this;
}
/**
* After this method runs, the following conditions should apply:
*
* - spans will contain all the literal and variable sections in order, starting a literal, even if it is empty
* - spans will be an odd number in length, meaning that the last section will also be a literal, even if it is empty
* - specificBindings will contain an ordered map of the binding definitions
*
*/
private String[] parse() {
List spans = new ArrayList<>();
Set usedAnchors = new HashSet<>();
extraBindings.addAll(bindings.keySet());
String statement = rawtemplate;
int patternsMatched = 0;
int lastMatch = 0;
for (Pattern pattern : patterns) {
if (!pattern.toString().contains("?")) {
throw new InvalidParameterException("The provided pattern '" + pattern.toString() + "' must contain a named group called anchor," +
"as in '(?...)'");
}
Matcher m = pattern.matcher(rawtemplate);
if (!m.find()) { // sanity check that this matcher works at all or go to the next pattern
continue;
}
while (m.find(lastMatch)) {
String pre = statement.substring(lastMatch, m.start());
spans.add(pre);
String tokenName = m.group("anchor");
lastMatch = m.end();
spans.add(tokenName);
if (extraBindings.contains(tokenName)) {
usedAnchors.add(tokenName);
specificBindings.put(tokenName, bindings.get(tokenName));
} else {
missingBindings.add(tokenName);
}
}
usedAnchors.forEach(extraBindings::remove);
break; // If the last matcher worked at all, only do one cycle
}
if (lastMatch >= 0) {
spans.add(statement.substring(lastMatch));
} else {
spans.add(statement);
}
return spans.toArray(new String[0]);
//
// //Matcher m = stmtToken.matcher(statement);
// int lastMatch = 0;
// String remainder = "";
// while (m.find(lastMatch)) {
// String pre = statement.substring(lastMatch, m.start());
//
// String form1 = m.group(1);
// String form2 = m.group(2);
// String tokenName = (form1 != null && !form1.isEmpty()) ? form1 : form2;
// lastMatch = m.end();
// spans.add(pre);
//
// if (extraBindings.contains(tokenName)) {
// anchors.add(tokenName);
// bindspecs.add(stmtDef.getBindings().get(tokenName));
// usedAnchors.add(tokenName);
//// specificBindings.put(tokenName, stmtDef.getBindings().get(tokenName));
// } else {
// missingBindings.add(tokenName);
// }
// }
// usedAnchors.forEach(extraBindings::remove);
//
// if (lastMatch >= 0) {
// spans.add(statement.substring(lastMatch));
// } else {
// spans.add(statement);
// }
//
// return spans.toArray(new String[0]);
}
public String toString() {
StringBuilder sb = new StringBuilder();
sb.append("template: '").append(rawtemplate).append("'");
sb.append("\n parsed: ");
sb.append(StreamSupport.stream(Arrays.spliterator(spans), false)
.map(s -> "[" + s + "]").collect(Collectors.joining(",")));
sb.append("\n missing bindings: ")
.append(missingBindings.stream().collect(Collectors.joining(",", "[", "]")));
sb.append(" extra bindings: ");
sb.append("\n extra bindings: ")
.append(extraBindings.stream().collect(Collectors.joining(",", "[", "]")));
return sb.toString();
}
/**
* @return true if the parsed statement is not usable.
*/
public boolean hasError() {
return missingBindings.size() > 0;
}
/**
* The list of binding names returned by this method does not
* constitute an error. They may be used for
* for informational purposes in error handlers, for example.
*
* @return a set of bindings names which were provided to
* this parsed statement, but which were not referenced
* in either {anchor}
or ?anchor
form.
*/
public Set getExtraBindings() {
return extraBindings;
}
/**
* Returns a list of binding names which were referenced
* in either {anchor}
or ?anchor
form,
* but which were not present in the provided bindings map.
* If any binding names are present in the returned set, then
* this binding will not be usable.
*
* @return A list of binding names which were referenced but not defined*
*/
public Set getMissingBindings() {
return missingBindings;
}
/**
* Return a map of bindings which were referenced in the statement.
* This is an easy way to get the list of effective bindings for
* a statement for diagnostic purposes without including a potentially
* long list of library bindings. This method does not
* represent all of the binding points, as when anchor names are
* used more than once.
*
* @return a bindings map of referenced bindings in the statement
*/
public Map getSpecificBindings() {
return specificBindings;
}
/**
* @return a list of anchors as fou nd in the raw template.
*/
public List getAnchors() {
List anchors = new ArrayList<>();
for (int i = 1; i < spans.length; i += 2) {
anchors.add(spans[i]);
}
return anchors;
}
/**
* Get the named anchors and their associated binding specifiers as found
* in the raw template.
*
* @return A list of bind points
* @throws InvalidParameterException if the template has an error,
* such as an anchor which has no provided binding.
*/
public List getBindPoints() {
List bindpoints = new ArrayList<>();
for (int i = 1; i < spans.length; i += 2) {
if (!bindings.containsKey(spans[i])) {
throw new InvalidParameterException("Binding named '" + spans[i] + "' is not provided for template '" + rawtemplate + "'");
}
bindpoints.add(new BindPoint(spans[i], bindings.get(spans[i])));
}
return bindpoints;
}
/**
* Return the statement that can be used as-is by any driver specific version.
* This uses the anchor token as provided to yield a version of the statement
* which contains positional anchors, but no named bindings.
*
* @param tokenFormatter The mapping from a token name to a place holder
* @return A driver or usage-specific format of the statement, with anchors
*/
public String getPositionalStatement(Function tokenFormatter) {
StringBuilder sb = new StringBuilder(spans[0]);
for (int i = 1; i < spans.length; i += 2) {
sb.append(tokenFormatter.apply(spans[i]));
sb.append(spans[i + 1]);
}
return sb.toString();
}
/**
* Return the parsed template in (literal, variable, ..., ..., literal) form.
*
* @return A list of spans
*/
public String[] getSpans() {
return spans;
}
}