All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.klojang.templates.Regex Maven / Gradle / Ivy

The newest version!
package org.klojang.templates;

import java.util.regex.Pattern;

import static java.util.regex.Pattern.compile;

/**
 * Tokens and regular expressions used by the template parser to parse templates. For all
 * intents and purposes, this is an internal class. However, by making this class and the
 * constants defined in it public, the API documentation as a whole becomes more
 * self-contained as it is the only class from which you can infer which syntactical
 * constructs are available in a Klojang template. This class might also be useful for
 * toolmakers (e.g. when writing a syntax highlighting plugin).
 */
public final class Regex {

  private static final int MULTILINE = Pattern.MULTILINE | Pattern.DOTALL;

  /**
   * Regular expression for {@linkplain VarGroup variable group} names. Variable groups
   * can be specified inline (within the template) using this syntax:
   * {@code ~%vargroup:varname%}. For example: {@code ~%html:firstName%}. Variable group
   * names must start with a letter and be followed by zero or more letters, digits,
   * underscores or hyphens. "begin" and "end" are illegal names for variable groups.
   */
  public static final String REGEX_VAR_GROUP = "([a-zA-Z][a-zA-Z0-9_\\-]*)";

  /**
   * Regular expression for nested template names and path segments within a variable
   * name. Since these names may correspond to keys in {@code Map} objects, there are very
   * few constraints on what constitutes a valid name. They must consist of at least one
   * character, and they must not contain any of the following characters:
   * {@code ~%:.\n\r\0}. Of course, if the names are to correspond to, for example, bean
   * properties, they are externally constrained: they must be valid Java identifiers.
   */
  public static final String REGEX_NAME = "([^~%:.\\n\\r\u0000]+)";

  /**
   * 

Regular expression for path strings. Variable names are paths through an * object graph. For example: {@code ~%company.address.city%}. This variable would map * to the {@code city} property of the {@code Address} object within the {@code Company} * object within the object that you populate the template with. Each of the name * segments must match {@link #REGEX_NAME}. In practice, you are more likely to use * nested and doubly-nested templates, and then use simple names at the appropriate * nesting level (e.g. {@code ~%city%}). * *

Do not confuse this regular expression with * {@link #REGEX_INCLUDE_PATH}). The latter is used for included templates, in which * you specify a path to a file system or classpath resource. * * @see org.klojang.path.Path */ public static final String REGEX_PATH = "(" + REGEX_NAME + "(\\." + REGEX_NAME + ")*" + ")"; /** * Regular expression for template variables. The pattern for a variable name is: * {@code ~%[vargroup:]varname%}, where {@code vargroup} is {@link #REGEX_VAR_GROUP} and * {@code varname} is {@link #REGEX_PATH}. */ public static final String REGEX_VARIABLE = "~%" + "(" + REGEX_VAR_GROUP + ":)?" + REGEX_PATH + "%"; /** *

Regular expression for a template variable that is placed inside an HTML * comment. For example: {@code }. This is rendered just like * {@code ~%firstName%}. However, when using HTML comments, the raw, unprocessed * template still renders nicely in a browser — without "odd" tilde-percent * sequences spoiling the HTML page. This works even better if you also provide a * placeholder value, as in the following example: * {@code John}. This, too, renders just like * {@code ~%firstName%}. Now, when the browser renders the raw template, it will display * the string "John", because it is outside any HTML comments. But when * Klojang Templates renders the template, "John" will have * disappeared, and the only thing that remains is the value of {@code firstName}. * *

Note that the entire construct ({@code John}) * must be on a single line. If you want to provide a placeholder value * that spans multiple lines, use the syntax in the example below: * *

{@code
   * 
   *   
   *   
   *   
   *       This entire piece of text, and
   *       the placeholder tags on either
   *       side of it, will be gone when
   *       the template is rendered
   *   
   *   
   * 
   * }
* *

However, contrary to the single-line syntax, this value is not recorded * as the placeholder for the preceding variable. It is just something that * will be visible in the raw template, but gone in the rendered version. * *

The space character surrounding the variable (as in * {@code }) is optional. You may also omit it * ({@code }). Multiple spaces or other characters are not allowed. * * @see VarGroup#DEF * @see #REGEX_PLACEHOLDER */ public static final String REGEX_CMT_VARIABLE = "((.*?))?"; /** *

* Regular expression for inline templates begin tags. The following examples are all * valid begin tags: *

*
    *
  • {@code ~%%begin:foo%} *
  • {@code } *
*

* However, the parser enforces an extra symmetry: *

*
    *
  • {@code } must terminate with {@code } *
  • {@code } *
  • {@code ~%%begin:foo%} must terminate with {@code ~%%end:foo%} *
*

* The space character following "<!--" and/or preceding "-->" is optional. * Multiple spaces or other characters are not allowed. *

*/ public static final String REGEX_INLINE_TEMPLATE_BEGIN = "()?"; /** *

* Regular expression for inline templates end tags. The following examples are all * valid end tags: *

*
    *
  • {@code ~%%end:foo%} *
  • {@code ~%%end:foo% -->} *
  • {@code } *
*

* However, the parser enforces an extra symmetry: *

*
    *
  • {@code } must terminate with {@code } *
  • {@code } *
  • {@code ~%%begin:foo%} must terminate with {@code ~%%end:foo%} *
*

* The space character following "<!--" and/or preceding "-->" is optional. * Multiple spaces or other characters are not allowed. *

*/ public static final String REGEX_INLINE_TEMPLATE_END = "()?"; /** * Regular expression for the path specified in an included template. Templates are * included in another template using this syntax: * {@code ~%%include:/path/to/template.html%%} or * {@code ~%%include:template-name:/path/to/template.html%%}. The path is a sequence of * one more valid URL characters. So: letters, digits and: * {@code _-~:;/?#!$&%,@+.=[]()}. */ public static final String REGEX_INCLUDE_PATH = "([a-zA-Z0-9_~:;/?#!$&%,@+.=\\-\\[\\]()]+?)"; /** * Regular expression for included templates. This is the basic pattern: * {@code ~%%include:[template-name:]path%%}. If no name is provided, the template name * will be the base name of the last path element. So for * {@code ~%%include:/path/to/foo.html%%} that would be "foo". */ public static final String REGEX_INCLUDED_TEMPLATE = "~%%include:" + "(" + REGEX_NAME + ":)?" + REGEX_INCLUDE_PATH + "%%"; /** * Regular expression for an included template that is placed inside an HTML comment. * For example: {@code }. */ public static final String REGEX_CMT_INCLUDED_TEMPLATE = ""; /** * Regular expression for ditch blocks. A ditch block consists of a pair of * {@code } tokens and any text between them. A ditch block is the * Klojang Templates equivalent of an HTML or Java comment. Ditch blocks * can be used to "comment out" nested templates, template variables, static HTML, etc. * They cannot themselves be nested inside any syntactical construct provided by * Klojang Templates, including nested templates. */ public static final String REGEX_DITCH_BLOCK = "(.*?)"; // Used only for syntax error detection: static final String DITCH_BLOCK_TOKEN = ""; static final String PLACEHOLDER_TOKEN = ""; /** * Regular expression for placeholders. A placeholder consists of a pair of * {@code } tokens and any text between them. When a template is rendered by * Klojang Templates, these tokens, and any text between them are erased from * the template. However, since {@code } is a self-closed HTML comment, a * browser would display what is between these tokens when rendering the raw, * unprocessed template. Contrary to {@link #REGEX_DITCH_BLOCK ditch blocks}, * placeholders may appear inside a nested template. */ public static final String REGEX_PLACEHOLDER = "(.*?)"; static final Pattern VARIABLE = compile(REGEX_VARIABLE); static final Pattern CMT_VARIABLE = compile(REGEX_CMT_VARIABLE); static final Pattern INLINE_TEMPLATE_BEGIN = compile(REGEX_INLINE_TEMPLATE_BEGIN); static final Pattern INLINE_TEMPLATE_END = compile(REGEX_INLINE_TEMPLATE_END); static final Pattern INCLUDED_TEMPLATE = compile(REGEX_INCLUDED_TEMPLATE); static final Pattern CMT_INCLUDED_TEMPLATE = compile(REGEX_CMT_INCLUDED_TEMPLATE); static final Pattern DITCH_BLOCK = compile(REGEX_DITCH_BLOCK, MULTILINE); static final Pattern PLACEHOLDER = compile(REGEX_PLACEHOLDER, MULTILINE); /** * Prints the regular expressions. */ public static void printAll() { System.out.println("VARIABLE ................: " + VARIABLE); System.out.println("CMT_VARIABLE ............: " + CMT_VARIABLE); System.out.println("INLINE_TEMPLATE_BEGIN ...: " + INLINE_TEMPLATE_BEGIN); System.out.println("INLINE_TEMPLATE_END .....: " + INLINE_TEMPLATE_END); System.out.println("INCLUDED_TEMPLATE .......: " + INCLUDED_TEMPLATE); System.out.println("CMT_INCLUDED_TEMPLATE ...: " + CMT_INCLUDED_TEMPLATE); System.out.println("DITCH_BLOCK .............: " + DITCH_BLOCK); System.out.println("PLACEHOLDER .............: " + PLACEHOLDER); } private Regex() { throw new UnsupportedOperationException(); } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy