All Downloads are FREE. Search and download functionalities are using the official Maven repository.

javax0.jamal.tools.InputHandler Maven / Gradle / Ivy

There is a newer version: 2.8.1
Show newest version
package javax0.jamal.tools;

import javax0.jamal.api.BadSyntax;
import javax0.jamal.api.BadSyntaxAt;
import javax0.jamal.api.Input;
import javax0.jamal.api.Position;
import javax0.jamal.api.Processor;
import javax0.jamal.api.SpecialCharacters;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.LinkedHashMap;
import java.util.regex.Pattern;

import static java.util.Objects.requireNonNull;
import static javax0.jamal.api.Macro.validId1stChar;
import static javax0.jamal.api.Macro.validIdChar;

/**
 * Utility class with some simple static methods that fetch characters from an input buffer.
 */
public class InputHandler {
    final static private int DOES_NOT_CONTAIN = -1;
    private static final String[] EMPTY_STRING_ARRAY = new String[0];


    private InputHandler() {
    }

    /**
     * Checks that the first character of the input is one of the characters listed.
     *
     * @param s     a character sequence of which the first character is checked
     * @param chars the characters we are looking for
     * @return {@code true} if the first character of {@code s} is one of the {@code chars}. Returns {@code false} if
     * the character sequence is empty or the first character is none of the {@code chars}.
     */
    public static boolean firstCharIs(CharSequence s, char... chars) {
        if (s.length() == 0) {
            return false;
        }
        for (final var c : chars) {
            if (s.charAt(0) == c) {
                return true;
            }
        }
        return false;
    }

    /**
     * @param s       a character sequence of which the start is checked
     * @param strings the possible strings to check that the sequence starts with
     * @return the index of the string that the sequence starts with or -1 if the sequence does not start with any of
     * the strings
     */
    public static int startsWith(CharSequence s, String... strings) {
        int i = 0;
        for (final var string : strings) {
            if (s.length() >= string.length() && s.subSequence(0, string.length()).equals(string)) {
                return i;
            }
            i++;
        }
        return -1;
    }

    /**
     * Delete the start of the input. It is an error trying to delete more character than the number of characters there
     * are in the input.
     *
     * @param input              from which the first characters are deleted
     * @param numberOfCharacters the number of characters to be deleted from the start of {@code input}
     */
    public static void skip(Input input, int numberOfCharacters) {
        input.delete(numberOfCharacters);
    }

    /**
     * Same as {@link #skip(Input, int)} but it also appends the deleted characters to the string builder.
     *
     * @param input              from which the first characters are deleted
     * @param numberOfCharacters the number of characters to be deleted from the start of {@code input}
     * @param sb                 where the characters will be appended
     * @deprecated Use the method {@link #move(Input, int, Input)} method instead. The interface `Input` extends {@link
     * CharSequence} and can give access to an underlying {@link StringBuilder}. Instead of creating a {@link
     * StringBuilder}, invoke {@link javax0.jamal.tools.Input#makeInput()} and use that instead of the {@link
     * StringBuilder}.
     * 

* The reason to deprecate this method is to avoid use when the characters are first moved to a {@link * StringBuilder} and then a new {@link Input} is created from the {@link StringBuilder}. This approach may loose * the position information that later implementations of {@link InputHandler#move(Input, int, Input)} may also * copy. */ @Deprecated public static void move(Input input, int numberOfCharacters, StringBuilder sb) { sb.append(input.substring(0, numberOfCharacters)); input.delete(numberOfCharacters); } /** * Same as {@link #skip(Input, int)} but it also appends the deleted characters to the output. * * @param input from which the first characters are deleted * @param numberOfCharacters the number of characters to be deleted from the start of {@code input} * @param output where the characters will be appended */ public static void move(Input input, int numberOfCharacters, Input output) { output.append(input.substring(0, numberOfCharacters)); input.delete(numberOfCharacters); } /** * Copy the string from the start of the input to the end of the output. *

* Note that this is a convenience method for {@link #move(Input, int, Input)}. There is no check that the input * really starts with the characters contained by {@code s}. * * @param input from which the string will be removed * @param s the string. There is no check that the input really starts with the string. The string is {@link * #skip(Input, String)}-ped in the {@code input} and is appended to the output. * @param output to which the string will be appended */ public static void move(Input input, String s, Input output) { move(input, s.length(), output); } /** * Delete the start of the input. * * @param input from which the first characters are deleted * @param s is a string that is supposed to be on the start of the input and this string is going to be deleted * from the start of the {@code input}. The actual implementation does not check that the string is * really there at the start of the input, it just skips so many characters as many the string has. */ public static void skip(Input input, String s) { skip(input, s.length()); } /** * @param i the result of {@link String#indexOf(int)} * @return {@code true} the value is a valid character code and not a signal that the string does not contain the * character we are looking for. */ public static boolean contains(int i) { return i != DOES_NOT_CONTAIN; } /** * Fetch an id from the start of the {@code input}. *

* An identifier is a string that starts with a character accepted by {@link javax0.jamal.api.Macro#validId1stChar(char)} and contain only * characters that are accepted {@link javax0.jamal.api.Macro#validIdChar(char)} *

* or *

* a string that starts with some special character, which usually can not be part of an identifier and does not * contain space. This way you can have macros like *

{@code
     *
     *        {@define =hatto (x)=belxanto}{#define {=hatto /1}(x) =tttxttt}{bel1anto/_}
     *
     * }
* which is an experimental feature and is deliberately not documented except here. (So much not documented that * I am just watching and debugging it five years later... realising that I did that deliberately.) *

* Just turned out that this is a very useful feature. It is used to use emojis as macro names... Who would have * thought? * * @param input that contains the identifier at the start. The identifier will be removed at the end of the method. * @return the identifier string that was found and removed from the start of the input. */ public static String fetchId(Input input) { final var output = new StringBuilder(); if (input.length() > 0 && validId1stChar(input.charAt(0))) { while (input.length() > 0 && validIdChar(input.charAt(0))) { output.append(input.charAt(0)); skip(input, 1); } } else { while (input.length() > 0 && !Character.isWhitespace(input.charAt(0))) { output.append(input.charAt(0)); skip(input, 1); } } return output.toString(); } public static String fetchNumber(Input input) { final var output = new StringBuilder(); while (input.length() > 0 && Character.isDigit(input.charAt(0))) { output.append(input.charAt(0)); skip(input, 1); } return output.toString(); } /** * Checks that the identifier is global or not. The check simply looks for embedded '{@code :}' character in the * identifier. * * @param id the identifier to check * @return {@code true} if the identifier is a global identifier. */ public static boolean isGlobalMacro(String id) { return id.contains(":"); } /** * Converts a global macro name. *

* Macro names that contain the '{@code :}' character are considered global macros and are automatically defined at the * top level. This provides a way for macro package developers to use namespacing. Although Jamal does not handle * namespaces directly, the names can be treated as 'namespace:localName', and namespace notations can be nested. *

* A global macro without a namespace starts with a '{@code :}' character when defined, but this character is removed * by this conversion. Later the macro can be referred to using the name without the '{@code :}' character. * * @param id the identifier of the macro. * @return the converted identifier. */ public static String convertGlobal(String id) { if (!id.isEmpty() && id.charAt(0) == SpecialCharacters.GLOBAL_NAME_CHAR) { return id.substring(1); } else { return id; } } /** * Delete the white space characters from the start of the input * * @param input from which the spaces should be deleted. */ public static void skipWhiteSpaces(Input input) { int i = 0; while (input.length() > i && Character.isWhitespace(input.charAt(i))) { i++; } input.delete(i); } /** * Delete the white space characters from the start of the input but do not delete {@code \n} characters. If it sees * a {@code \n} character then stop with the deletion. * * @param input from which the spaces (but no {@code \n})should be deleted. */ public static void skipWhiteSpacesNoNL(Input input) { while (input.length() > 0 && Character.isWhitespace(input.charAt(0)) && input.charAt(0) != '\n') { input.delete(1); } } /** * Delete the white space characters from the start of the input but only until after the first EOL * * @param input from which the spaces should be deleted. */ public static void skipWhiteSpaces2EOL(Input input) { skipWhiteSpacesNoNL(input); if (input.length() > 0 && input.charAt(0) == '\n') { input.delete(1); } } /** * Same as {@link #skipWhiteSpaces(Input)} but it also appends the deleted spaces to the string builder. * * @param input from which the spaces should be deleted. * @param sb where the spaces will be appended */ public static void moveWhiteSpaces(Input input, StringBuilder sb) { while (input.length() > 0 && Character.isWhitespace(input.charAt(0))) { sb.append(input.charAt(0)); input.delete(1); } } /** * Same as {@link #skipWhiteSpaces(Input)} but it also appends the deleted spaces to the output. * * @param input from which the spaces should be deleted. * @param output where the spaces will be appended */ public static void moveWhiteSpaces(Input input, Input output) { while (input.length() > 0 && Character.isWhitespace(input.charAt(0))) { output.append(input.charAt(0)); input.delete(1); } } /** * Delete the white space character from the start and from the end of the input. * * @param input from which the spaces should be deleted. */ public static void trim(Input input) { skipWhiteSpaces(input); rtrim(input); } /** * Delete the white space character from the end of the input. * * @param input from which the spaces should be deleted. */ public static void rtrim(Input input) { int i = input.length() - 1; while (i >= 0 && Character.isWhitespace(input.charAt(i))) { input.deleteCharAt(i); i--; } } /** * Delete the white space characters from the start of the input up to and including the next new-line character, * but only if there is a new-line character following zero or more non-new-line white space characters and the very * first character IS a back-slash {@code \}. *

* This method is used when the option {@code nl} is in effect that says that any new line character that follows a * macro closing string should be consumed and not put into the output. This helps writing better looking output * easier and not caring too much about the new lines. *

* If there are some spaces immediately before the new-line they will also be deleted, because they cannot easily be * recognized by the person editing the file and we want to avoid mysterious errors. * * @param input from which the spaces and the new-line should be deleted. */ public static void eatEscapedNL(Input input) { if (input.length() > 0 && input.charAt(0) == '\\') { int i = 1; while (i < input.length() && Character.isWhitespace(input.charAt(i)) && input.charAt(i) != '\n') { i++; } if (i < input.length() && input.charAt(i) == '\n') { skip(input, i + 1); } } } /** * Delete the characters from the start of the input until after the first EOL * * @param input from which the spaces should be deleted. */ public static void skip2EOL(Input input) { fetch2EOL(input); } /** * Delete the characters from the start of the input until after the first EOL and return the deleted start as a * String. The returned string does not contain the closing new line, but the closing new line is removed from the * input. * * @param input from which the spaces should be deleted. * @return the string from the original start of the input to the end of line, not including the end of line */ public static String fetch2EOL(Input input) { final var sb = new StringBuilder(); while (!input.isEmpty() && input.charAt(0) != '\n') { sb.append(input.charAt(0)); input.delete(1); } if (!input.isEmpty() && input.charAt(0) == '\n') { input.delete(1); } return sb.toString(); } /** * Get the parameter list that is at the start of the input. The parameter list has to start with a {@code (} * character and should be closed with a {@code )} character. The parameters are separated by {@code ,} characters, * and starting and ending spaces from the parameters are removed. * *

     *         ( a,b, c ,d)
     * 
*

* There is no restriction on what characters the parameter names can contain other than those implied by the * parsing algorithm: you cannot use {@code )} and {@code ,} characters in a parameter and you cannot have space at * the start and at the end of the parameter. It is recommended not to abuse this possibility. * * @param input that contains the parameter list * @param id the id of the macro that has this parameter list. This parameter is only used for error reporting. * @return the array containing the parameter, or an empty (zero length) array if there are no parameters (when the * first character is not {@code (} opening paren) * @throws BadSyntaxAt when the input starts with a {@code (} character, therefore it is supposed to have parameters * but the parameter list if any is not closed with a {@code )} character. */ public static String[] getParameters(Input input, String id) throws BadSyntaxAt { final var ref = input.getPosition(); if (firstCharIs(input, '(')) { skip(input, 1); var closingParen = input.indexOf(")"); BadSyntaxAt.when(!contains(closingParen), "'" + id + "' has parameters, but no ')'", ref); var param = input.substring(0, closingParen); skip(input, closingParen + 1); skipWhiteSpaces(input); if (param.isEmpty()) { return new String[0]; } else { return ensure(Arrays.stream(param.split(",")).map(String::trim).toArray(String[]::new), ref); } } else { return new String[0]; } } public static LinkedHashMap getParametersWithDefaults(Processor processor, Input input, String id) throws BadSyntax { final var ref = input.getPosition(); if (firstCharIs(input, '(')) { final var parser = Params.using(processor, true); final var map = new LinkedHashMap(); parser.parse(input, map::put, k -> true); if (map.isEmpty()) { return new LinkedHashMap<>(); } else { ensure(map.keySet().stream().map(String::trim).toArray(String[]::new), ref); return map; } } else { return new LinkedHashMap<>(); } } /** * Checks that a string is a valid identifier. * * @param id the string holding the identifier or something else * @return true if the string syntactically is a valid identifier */ public static boolean isIdentifier(final String id) { final char startCharacter = id.charAt(0); if (!validId1stChar(startCharacter)) { return false; } for (int i = 1; i < id.length(); i++) { if (!validIdChar(id.charAt(i))) { return false; } } return true; } public static boolean isNumber(final String id) { if (id.isEmpty()) { return false; } for (int i = id.charAt(0) == '-' || id.charAt(0) == '+' ? 1 : 0; i < id.length(); i++) { if (!Character.isDigit(id.charAt(i))) { return false; } } return true; } /** * Checks that no parameter name contains another parameter name. If there is any parameter name that contains * another parameter name then {@code BadSyntax} is thrown. *

* This restriction ensures that the parameter replacement with the actual values is definite and there are no * readability issues. * * @param parameters the parameters to check * @param ref the position in the input * @return the parameters themselves * @throws BadSyntaxAt is any of the parameter names contain another parameter name. */ public static String[] ensure(final String[] parameters, Position ref) throws BadSyntaxAt { final var exceptionParameters = new ArrayList(); for (int i = 0; i < parameters.length; i++) { BadSyntaxAt.when(requireNonNull(parameters[i]).isEmpty(), "User defined macro argument cannot be empty string.", ref); for (int j = 0; j < parameters.length; j++) { if (i != j) { if (parameters[i].contains(parameters[j])) { exceptionParameters.add(i + ". parameter '" + parameters[i] + "' contains the " + j + ". parameter '" + parameters[j] + "'"); } } } } if (!exceptionParameters.isEmpty()) { final var badSyntax = new BadSyntaxAt("User defined macro parameter name should not be a substring of another parameter.", ref); badSyntax.parameters(exceptionParameters); throw badSyntax; } return parameters; } /** * Parse the input and split it up into a String array. It can be used in many macros to provide a consistent syntax * and structure when the macro processing needs a list of strings. *

* The possible syntax variations are: *

     * macroName / a / b / c / ... /x
     * macroName   a   b   c   ...  x
     * macroName `regex` separator a separator b separator .... separator x
     * 
*

* where the separator character is the first non-whitespace character after the macro name, and it is not the * back-tick (`) character. If the first non-whitespace character after the name of the macro id is a backtick then * the parsing expects to be a regular expression till the next backtick. After the regular expression and after the * closing backtick the rest of the input is spit up and the separator is the regular expression. *

* Backtick was selected during the design of the syntax to enclose the regular expression because this character is * very rare in Java regular expression. In case you need one inside the regular expression then you have to simply * double it and the parsing will single it back. *

* If the first character after the white spaces is a digit or alpha character then the input will be split along * the spaces. * * @param input to be split up * @return the array of the strings created from the input */ //snipline getParts3 filter=(getParts.*?\)) public static String[] getParts(Input input) { return getParts(input, -1); } /** * Same as {@link #getParts(Input)} but we want at most {@code limit} number of parts. * * @param input the input from which we want to get the parts * @param limit the maximum number of parts we need * @return the parts of the input in an array */ //snipline getParts4 filter=(getParts.*?\)) public static String[] getParts(Input input, int limit) { skipWhiteSpaces(input); if (input.length() == 0) { return EMPTY_STRING_ARRAY; } final var separator = input.substring(0, 1); if (Character.isLetterOrDigit(separator.charAt(0))) { return input.toString().split("\\s+", limit); } skip(input, 1); if ("`".equals(separator)) { return getPartsRegex(input, limit); } return input.toString().split(Pattern.quote(separator), limit); } //snipline getParts1 filter=(getParts.*?\)) public static String[] getParts(Input input, Processor processor) throws BadSyntax { return getParts(input, processor, -1); } //snipline getParts2 filter=(getParts.*?\)) public static String[] getParts(Input input, Processor processor, int limit) throws BadSyntax { final var regex = MacroReader.macro(processor).readValue("$REGEX"); if (regex.isEmpty()) { return getParts(input, limit); } skipWhiteSpaces(input); if (input.length() == 0) { return EMPTY_STRING_ARRAY; } return skipEmptyFirst(input.toString().split(regex.get(), limit)); } private static String[] getPartsRegex(Input input, int limit) { final var regex = fetchRegex(input); return skipEmptyFirst(input.toString().split(regex, limit)); } /** * It is absolutely legal to start the list with a separator. * In this case, the list will start with an empty string. * It can happen when the split is done using a regular expression. *

* We do not want that empty element at the start. * * @param values the array of strings that may start which may start with an empty element * @return the array the first element removed when that element is an empty string */ private static String[] skipEmptyFirst(String[] values) { if (values.length > 0 && values[0].isEmpty()) { return Arrays.copyOfRange(values, 1, values.length); } else { return values; } } /** * Fetch a regular expression from the input. The regular expression starts at the start of the input, and it is * terminated by a {@code `} (backtick) character or by the end of the input. If there are backtick characters in * the regular expression they have to be doubled. After the fetching the first character on the input is the * character that follows the terminating backtick character. * * @param input that starts with the regular expression. It is also modified so that the regular expression at the * start and the terminating back-tick is removed. * @return the regular expression. The return string may contain single back-ticks, which are doubled in the input. */ private static String fetchRegex(Input input) { var sb = new StringBuilder(); while (input.length() > 0) { while (input.charAt(0) == '`' && input.length() > 1 && input.charAt(1) == '`') { sb.append('`'); skip(input, 2); } if (input.charAt(0) == '`') break; sb.append(input.charAt(0)); skip(input, 1); } if (input.length() > 0) { skip(input, 1); } return sb.toString(); } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy