All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.nosqlbench.nb.api.config.params.ParamsParser Maven / Gradle / Ivy

Go to download

The top level API module for NoSQLBench. This module should have no internal module dependencies other than the mvn-default module. All modules within NoSQLBench can safely depend on this module with circular dependencies. This module provides cross-cutting code infrastracture, such as path utilities and ways of describing services used between modules. It is also the transitive aggregation point for system-wide library dependencies for logging and testing or similar needs.

There is a newer version: 5.17.0
Show newest version
/*
 *
 *    Copyright 2016 jshook
 *    Licensed under the Apache License, Version 2.0 (the "License");
 *    you may not use this file except in compliance with the License.
 *    You may obtain a copy of the License at
 *
 *        http://www.apache.org/licenses/LICENSE-2.0
 *
 *    Unless required by applicable law or agreed to in writing, software
 *    distributed under the License is distributed on an "AS IS" BASIS,
 *    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *    See the License for the specific language governing permissions and
 *    limitations under the License.
 * /
 */

package io.nosqlbench.nb.api.config.params;

import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;

import java.util.*;
import java.util.regex.Pattern;


/**
 * 

Synopsis

*

* This is a multi-purpose special parser for parameters in a command or * other map. It is called special because it does take a few liberties which are not * commonly found in regular languages, but which may be desirable for casual use. * This is done in a very specific way such that interpretation is unambiguous. *

* *

Basic Format

*

* The input line consists of a sequence of names and values, called assignments. * These occur as {@code name=value}, and may be delimited by spaces or semicolons, like * {@code name1=value1 name2=value2} or {@code name1=value1;name2=value2}. * Values can even contain spaces, even though space is a possible delimiter. * Thus, {@code name1= value foo bar * name2=baz} works, with the values {@code value foo bar} and {@code baz}. *

* *

Names

* The name of a parameter must appear in raw literal form, with no escaping. A name may not * contain spaces, semicolons or equals signs, and may not start with quotes of any kind. * Otherwise, there are no restrictions placed on the characters that may appear in * a name. Even escaped characters are allowed. * *

Values

* Values can appear as single quoted values, double quoted values, or literal values. * The way the value is parsed is determined by the leading character, respectively. * *

Single Quoted

*

If the leading character is a single quote, then all following characters up to the next single quote * are taken as the value, except for escape characters which work as expected. Double quotes, * spaces, equals or any other characters have no special meaning within a single quoted value. *

* *

Double Quoted

*

If the leading character is a double quote, then all following characters up to the next double quote * are taken as the value, except for escape characters which work as expected. Single quotes, * spaces, equals or other characters have no special meaning within double quotes.

* *

Literal

*

Otherwise, the value is taken as every single character, including spaces, single quotes, * and double quotes, up to the end of the command line or the next parameter assignment. * Any occurrence of semicolons which is not escaped will be treated as a delimiter. The next occurrence * of a name and equals pattern after a space will be taken as the next named parameter. Otherwise, all * spaces and partial word are included in the last value assigment found. Leading spaces on literal * values are skipped unless escaped.

* *

Detection

* When a caller wants to parse this format optionally when the format is recognizable as having parameters, * the {@link #hasValues(String)} method can be called. To be recognized as having parameters, a more strict * definition is used: The patter must start with a simple assignment having a varname which starts with an * alphabetic character or an underscore, followed by any alpha-numeric, dot {@code .}, dash {@code -}, or * underscore {@code _} with an assignment character {@code =} following. The following regex can be used * as an example for documentation purposes when explaining op mapping conventions to users: *
{@code
 * [_a-zA-Z][-_.\\w]*
 * }
*/ public class ParamsParser { public final static String ASSIGN_CHARS = "=:"; private final static Logger logger = LogManager.getLogger(ParamsParser.class); public static Map parse(String input, boolean canonicalize) { return parse(input, ASSIGN_CHARS, canonicalize); } public static boolean hasValues(String input) { return hasValues(input, ASSIGN_CHARS); } public static boolean hasValues(String input, String assignChars) { for (int i = 0; i < assignChars.length(); i++) { Pattern assignPattern = Pattern.compile("[A-Za-z_][-_\\w\\d.]*\\s*" + assignChars.charAt(i) + ".*"); if (assignPattern.matcher(input).matches()) { return true; } } return false; } /** * Parse a string input as a loose-form param=value list, and be reasonable about formatting * conventions that most users would follow in the absence of detailed rules. * See {@link ParamsParser} for more details on how this works. * * @param input The string form containing the parameter names and values to be extracted. * @param canonicalize Whether or not to replace synonyms with modern forms and to warn when old forms are used * @return A map of extracted keys and values */ public static Map parse(String input, String assignmentCharacters, boolean canonicalize) { ParseState s = ParseState.expectingName; Map parms = new LinkedHashMap<>(); StringBuilder varname = new StringBuilder(512); StringBuilder value = new StringBuilder(512); String lastVarname = null; boolean isEscaped = false; for (int i = 0; i < input.length(); i++) { char c = input.charAt(i); if (isEscaped) { switch (s) { case expectingVal: case readingDquotedVal: case readingSquotedVal: case readingRawVal: value.append(c); break; case readingName: varname.append(c); break; default: throw new RuntimeException("invalid position for escape:" + i + ", in " + input); } isEscaped = false; continue; } switch (s) { case expectingName: if (c == '\'' || c == '"') { throw new RuntimeException("Unable to parse a name starting with character '" + c + "'. Names" + " must be literal values."); } else if (c != ' ' && c != ';') { s = ParseState.readingName; varname.append(c); } break; case readingName: if (c == '\\') { isEscaped = true; } else if (c == ' ') { String partial = parms.get(lastVarname); if (partial == null) { throw new RuntimeException("space continuation while reading name or value, but no prior " + "for " + lastVarname + " exists"); } parms.put(lastVarname, partial + " " + varname); varname.setLength(0); } else if (assignmentCharacters.indexOf(c) == -1) { varname.append(c); } else { s = ParseState.expectingVal; } break; case expectingVal: if (c == ' ') { } else if (c == '\\') { isEscaped = true; } else if (c == '\'') { s = ParseState.readingSquotedVal; } else if (c == '"') { s = ParseState.readingDquotedVal; } else if (c == ';') { parms.put(varname.toString(), null); varname.setLength(0); s = ParseState.expectingName; } else { s = ParseState.readingRawVal; value.append(c); } break; case readingRawVal: if (c == '\\') { isEscaped = true; } else if (c != ';' && c != ' ') { value.append(c); } else { parms.put(varname.toString(), value.toString()); lastVarname = varname.toString(); varname.setLength(0); value.setLength(0); s = ParseState.expectingName; } break; case readingSquotedVal: if (c == '\\') { isEscaped = true; } else if (c != '\'') { value.append(c); } else { parms.put(varname.toString(), value.toString()); varname.setLength(0); value.setLength(0); s = ParseState.expectingName; } break; case readingDquotedVal: if (c == '\\') { isEscaped = true; } else if (c != '"') { value.append(c); } else { parms.put(varname.toString(), value.toString()); varname.setLength(0); value.setLength(0); s = ParseState.expectingName; } break; default: throw new RuntimeException("Unmatched parse state. This should be impossible."); } } if (isEscaped) { throw new RuntimeException("an unfinished escape sequence at the end is not valid"); } switch (s) { case expectingVal: parms.put(varname.toString(), null); varname.setLength(0); s = ParseState.expectingName; break; case readingRawVal: parms.put(varname.toString(), value.toString()); varname.setLength(0); s = ParseState.expectingName; break; case readingName: parms.put(lastVarname, parms.get(lastVarname) + ' ' + varname); varname.setLength(0); break; default: } if (input.length() > 0 && parms.size() == 0) { throw new RuntimeException("Unable to parse input:" + input); } if (canonicalize) { List keys = new ArrayList<>(parms.keySet()); for (String key : keys) { String properkey = Synonyms.canonicalize(key, logger); if (!key.equals(properkey)) { parms.put(properkey, parms.get(key)); parms.remove(key); } } } return parms; } public static Map parseToMap(Object src, String mainField) { if (src instanceof Map) { return (Map) src; } else if (src instanceof CharSequence) { String input = ((CharSequence) src).toString(); if (hasValues(input)) { return parse(input, false); } else { return new HashMap<>(Map.of(mainField, input)); } } else { throw new RuntimeException("can't parseToMap(...) on an object that is neither Map nor CharSequence"); } } private enum ParseState { expectingName, readingName, expectingVal, readingRawVal, readingSquotedVal, readingDquotedVal } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy