org.klojang.templates.Regex Maven / Gradle / Ivy
Show all versions of klojang-templates Show documentation
package org.klojang.templates;
import java.util.regex.Pattern;
import static java.util.regex.Pattern.compile;
/**
* Tokens and regular expressions used by the template parser to parse templates. For all
* intents and purposes, this is an internal class. However, by making this class and the
* constants defined in it public, the API documentation as a whole becomes more
* self-contained as it is the only class from which you can infer which syntactical
* constructs are available in a Klojang template. This class might also be useful for
* toolmakers (e.g. when writing a syntax highlighting plugin).
*/
public final class Regex {
private static final int MULTILINE = Pattern.MULTILINE | Pattern.DOTALL;
/**
* Regular expression for {@linkplain VarGroup variable group} names. Variable groups
* can be specified inline (within the template) using this syntax:
* {@code ~%vargroup:varname%}. For example: {@code ~%html:firstName%}. Variable group
* names must start with a letter and be followed by zero or more letters, digits,
* underscores or hyphens. "begin" and "end" are illegal names for variable groups.
*/
public static final String REGEX_VAR_GROUP = "([a-zA-Z][a-zA-Z0-9_\\-]*)";
/**
* Regular expression for nested template names and path segments within a variable
* name. Since these names may correspond to keys in {@code Map} objects, there are very
* few constraints on what constitutes a valid name. They must consist of at least one
* character, and they must not contain any of the following characters:
* {@code ~%:.\n\r\0}. Of course, if the names are to correspond to, for example, bean
* properties, they are externally constrained: they must be valid Java identifiers.
*/
public static final String REGEX_NAME = "([^~%:.\\n\\r\u0000]+)";
/**
* Regular expression for path strings. Variable names are paths through an
* object graph. For example: {@code ~%company.address.city%}. This variable would map
* to the {@code city} property of the {@code Address} object within the {@code Company}
* object within the object that you populate the template with. Each of the name
* segments must match {@link #REGEX_NAME}. In practice, you are more likely to use
* nested and doubly-nested templates, and then use simple names at the appropriate
* nesting level (e.g. {@code ~%city%}).
*
*
Do not confuse this regular expression with
* {@link #REGEX_INCLUDE_PATH}). The latter is used for included templates, in which
* you specify a path to a file system or classpath resource.
*
* @see org.klojang.path.Path
*/
public static final String REGEX_PATH
= "("
+ REGEX_NAME
+ "(\\." + REGEX_NAME + ")*"
+ ")";
/**
* Regular expression for template variables. The pattern for a variable name is:
* {@code ~%[vargroup:]varname%}, where {@code vargroup} is {@link #REGEX_VAR_GROUP} and
* {@code varname} is {@link #REGEX_PATH}.
*/
public static final String REGEX_VARIABLE
= "~%"
+ "(" + REGEX_VAR_GROUP + ":)?"
+ REGEX_PATH
+ "%";
/**
*
Regular expression for a template variable that is placed inside an HTML
* comment. For example: {@code }. This is rendered just like
* {@code ~%firstName%}. However, when using HTML comments, the raw, unprocessed
* template still renders nicely in a browser — without "odd" tilde-percent
* sequences spoiling the HTML page. This works even better if you also provide a
* placeholder value, as in the following example:
* {@code John}. This, too, renders just like
* {@code ~%firstName%}. Now, when the browser renders the raw template, it will display
* the string "John", because it is outside any HTML comments. But when
* Klojang Templates renders the template, "John" will have
* disappeared, and the only thing that remains is the value of {@code firstName}.
*
*
Note that the entire construct ({@code John})
* must be on a single line. If you want to provide a placeholder value
* that spans multiple lines, use the syntax in the example below:
*
*
{@code
*
*
*
*
* This entire piece of text, and
* the placeholder tags on either
* side of it, will be gone when
* the template is rendered
*
*
*
* }
*
* However, contrary to the single-line syntax, this value is not recorded
* as the placeholder for the preceding variable. It is just something that
* will be visible in the raw template, but gone in the rendered version.
*
*
The space character surrounding the variable (as in
* {@code }) is optional. You may also omit it
* ({@code }). Multiple spaces or other characters are not allowed.
*
* @see VarGroup#DEF
* @see #REGEX_PLACEHOLDER
*/
public static final String REGEX_CMT_VARIABLE
= "((.*?))?";
/**
*
* Regular expression for inline templates begin tags. The following examples are all
* valid begin tags:
*
*
* - {@code ~%%begin:foo%}
*
- {@code }
*
*
* However, the parser enforces an extra symmetry:
*
*
* - {@code } must terminate with {@code }
*
- {@code }
*
- {@code ~%%begin:foo%} must terminate with {@code ~%%end:foo%}
*
*
* The space character following "<!--" and/or preceding "-->" is optional.
* Multiple spaces or other characters are not allowed.
*
*/
public static final String REGEX_INLINE_TEMPLATE_BEGIN
= "()?";
/**
*
* Regular expression for inline templates end tags. The following examples are all
* valid end tags:
*
*
* - {@code ~%%end:foo%}
*
- {@code ~%%end:foo% -->}
*
- {@code }
*
*
* However, the parser enforces an extra symmetry:
*
*
* - {@code } must terminate with {@code }
*
- {@code }
*
- {@code ~%%begin:foo%} must terminate with {@code ~%%end:foo%}
*
*
* The space character following "<!--" and/or preceding "-->" is optional.
* Multiple spaces or other characters are not allowed.
*
*/
public static final String REGEX_INLINE_TEMPLATE_END
= "()?";
/**
* Regular expression for the path specified in an included template. Templates are
* included in another template using this syntax:
* {@code ~%%include:/path/to/template.html%%} or
* {@code ~%%include:template-name:/path/to/template.html%%}. The path is a sequence of
* one more valid URL characters. So: letters, digits and:
* {@code _-~:;/?#!$&%,@+.=[]()}.
*/
public static final String REGEX_INCLUDE_PATH
= "([a-zA-Z0-9_~:;/?#!$&%,@+.=\\-\\[\\]()]+?)";
/**
* Regular expression for included templates. This is the basic pattern:
* {@code ~%%include:[template-name:]path%%}. If no name is provided, the template name
* will be the base name of the last path element. So for
* {@code ~%%include:/path/to/foo.html%%} that would be "foo".
*/
public static final String REGEX_INCLUDED_TEMPLATE
= "~%%include:"
+ "(" + REGEX_NAME + ":)?"
+ REGEX_INCLUDE_PATH
+ "%%";
/**
* Regular expression for an included template that is placed inside an HTML comment.
* For example: {@code }.
*/
public static final String REGEX_CMT_INCLUDED_TEMPLATE
= "";
/**
* Regular expression for ditch blocks. A ditch block consists of a pair of
* {@code } tokens and any text between them. A ditch block is the
* Klojang Templates equivalent of an HTML or Java comment. Ditch blocks
* can be used to "comment out" nested templates, template variables, static HTML, etc.
* They cannot themselves be nested inside any syntactical construct provided by
* Klojang Templates, including nested templates.
*/
public static final String REGEX_DITCH_BLOCK = "(.*?)";
// Used only for syntax error detection:
static final String DITCH_BLOCK_TOKEN = "";
static final String PLACEHOLDER_TOKEN = "";
/**
* Regular expression for placeholders. A placeholder consists of a pair of
* {@code } tokens and any text between them. When a template is rendered by
* Klojang Templates, these tokens, and any text between them are erased from
* the template. However, since {@code } is a self-closed HTML comment, a
* browser would display what is between these tokens when rendering the raw,
* unprocessed template. Contrary to {@link #REGEX_DITCH_BLOCK ditch blocks},
* placeholders may appear inside a nested template.
*/
public static final String REGEX_PLACEHOLDER = "(.*?)";
static final Pattern VARIABLE = compile(REGEX_VARIABLE);
static final Pattern CMT_VARIABLE = compile(REGEX_CMT_VARIABLE);
static final Pattern INLINE_TEMPLATE_BEGIN = compile(REGEX_INLINE_TEMPLATE_BEGIN);
static final Pattern INLINE_TEMPLATE_END = compile(REGEX_INLINE_TEMPLATE_END);
static final Pattern INCLUDED_TEMPLATE = compile(REGEX_INCLUDED_TEMPLATE);
static final Pattern CMT_INCLUDED_TEMPLATE = compile(REGEX_CMT_INCLUDED_TEMPLATE);
static final Pattern DITCH_BLOCK = compile(REGEX_DITCH_BLOCK, MULTILINE);
static final Pattern PLACEHOLDER = compile(REGEX_PLACEHOLDER, MULTILINE);
/**
* Prints the regular expressions.
*/
public static void printAll() {
System.out.println("VARIABLE ................: " + VARIABLE);
System.out.println("CMT_VARIABLE ............: " + CMT_VARIABLE);
System.out.println("INLINE_TEMPLATE_BEGIN ...: " + INLINE_TEMPLATE_BEGIN);
System.out.println("INLINE_TEMPLATE_END .....: " + INLINE_TEMPLATE_END);
System.out.println("INCLUDED_TEMPLATE .......: " + INCLUDED_TEMPLATE);
System.out.println("CMT_INCLUDED_TEMPLATE ...: " + CMT_INCLUDED_TEMPLATE);
System.out.println("DITCH_BLOCK .............: " + DITCH_BLOCK);
System.out.println("PLACEHOLDER .............: " + PLACEHOLDER);
}
private Regex() {
throw new UnsupportedOperationException();
}
}