All Downloads are FREE. Search and download functionalities are using the official Maven repository.

regexodus.regex.Pattern Maven / Gradle / Ivy

Go to download

JVM AOT compiler currently generating JavaScript, C++, Haxe, with initial focus on Kotlin and games.

There is a newer version: 0.6.8
Show newest version
package regexodus.regex;

import com.jtransc.annotation.JTranscInvisible;
import regexodus.REFlags;

import java.io.Serializable;
import java.util.ArrayList;

/**
 * Created by Tommy Ettinger on 6/7/2016.
 */
@SuppressWarnings("WeakerAccess")
@JTranscInvisible
public class Pattern implements Serializable {
    public regexodus.Pattern internal;
    private int flags;

    /**
     * Not used; present for compatibility.
     */
    public static final int UNIX_LINES = 0x01;

    /**
     * Enables case-insensitive matching.
     *
     * 

Unicode-aware case-insensitive matching is always enabled by this * flag, regardless of unicode flag status. * *

Case-insensitive matching can also be enabled via the embedded flag * expression (?i). */ public static final int CASE_INSENSITIVE = 0x02; /** * Permits whitespace and comments in pattern. * *

In this mode, whitespace is ignored, and embedded comments starting * with # are ignored until the end of a line. * *

Comments mode can also be enabled via the embedded flag * expression (?x). */ public static final int COMMENTS = 0x04; /** * Enables multiline mode. *
* In multiline mode the expressions ^ and $ match * just after or just before, respectively, a line terminator or the end of * the input sequence. By default these expressions only match at the * beginning and the end of the entire input sequence. *
* Multiline mode can also be enabled via the embedded flag * expression (?m). */ public static final int MULTILINE = 0x08; /** * Enables literal mode. *
* In literal mode, metacharacters are not interpreted at all, and they * match the exact, literal string used for the Pattern. This is done * by running {@link Pattern#quote(String)} on the regexp string when * this flag is specified. */ public static final int LITERAL = 0x10; /** * Enables dotall mode. *
* In dotall mode, the expression . matches any character, * including a line terminator. By default this expression does not match * line terminators. *
* Dotall mode can also be enabled via the embedded flag * expression (?s). (The s is a mnemonic for * "single-line" mode, which is what this is called in Perl.) */ public static final int DOTALL = 0x20; /** * Not used; present for compatibility. */ public static final int UNICODE_CASE = 0x40; /** * Not used; present for compatibility. */ public static final int CANON_EQ = 0x80; /** * Enables the Unicode version of Predefined character classes and * POSIX character classes. *
* When this flag is specified then the (US-ASCII only) * Predefined character classes and POSIX character classes * are in conformance with * Unicode Technical * Standard #18: Unicode Regular Expression * Annex C: Compatibility Properties. *
* The UNICODE_CHARACTER_CLASS mode can also be enabled via the embedded * flag expression (?u). *
* Specifying this flag may impose a performance penalty. */ public static final int UNICODE_CHARACTER_CLASS = 0x100; /** * Compiles the given regular expression into a pattern. * * @param regex * The expression to be compiled * * @throws PatternSyntaxException * If the expression's syntax is invalid */ public static Pattern compile(String regex) { return new Pattern(regex, 0); } /** * Compiles the given regular expression into a pattern with the given * flags. * * @param regex * The expression to be compiled * * @param flags * Match flags, a bit mask that may include * {@link #CASE_INSENSITIVE}, {@link #MULTILINE}, {@link #DOTALL}, * {@link #UNICODE_CASE}, {@link #CANON_EQ}, {@link #UNIX_LINES}, * {@link #LITERAL}, {@link #UNICODE_CHARACTER_CLASS} * and {@link #COMMENTS} * * @throws IllegalArgumentException * If bit values other than those corresponding to the defined * match flags are set in flags * * @throws PatternSyntaxException * If the expression's syntax is invalid */ public static Pattern compile(String regex, int flags) { return new Pattern(regex, flags); } /** * Returns the regular expression from which this pattern was compiled. * * @return The source of this pattern */ public String pattern() { return internal.toString(); } /** * Returns the string representation of this pattern. This * is the regular expression from which this pattern was * compiled. * @return The string representation of this pattern * @since 1.5 */ public String toString() { return internal.toString(); } /** * Creates a matcher that will match the given input against this pattern. * @param input * The character sequence to be matched * * @return A new matcher for this pattern */ public Matcher matcher(CharSequence input) { return new Matcher(this, input); } /** * Returns this pattern's match flags. * * @return The match flags specified when this pattern was compiled */ public int flags() { return flags; } /** * Compiles the given regular expression and attempts to match the given * input against it. *
* An invocation of this convenience method of the form * *

     * Pattern.matches(regex, input);
* * behaves in exactly the same way as the expression * *
     * Pattern.compile(regex).matcher(input).matches()
*
* If a pattern is to be used multiple times, compiling it once and reusing * it will be more efficient than invoking this method each time. * * @param regex * The expression to be compiled * * @param input * The character sequence to be matched * * @throws PatternSyntaxException * If the expression's syntax is invalid */ public static boolean matches(String regex, CharSequence input) { Pattern p = Pattern.compile(regex); return p.matcher(input).matches(); } /** * Splits the given input sequence around matches of this pattern. *
* The array returned by this method contains each substring of the * input sequence that is terminated by another subsequence that matches * this pattern or is terminated by the end of the input sequence. The * substrings in the array are in the order in which they occur in the * input. If this pattern does not match any subsequence of the input then * the resulting array has just one element, namely the input sequence in * string form. *
* The limit parameter controls the number of times the * pattern is applied and therefore affects the length of the resulting * array. If the limit n is greater than zero then the pattern * will be applied at most n - 1 times, the array's * length will be no greater than n, and the array's last entry * will contain all input beyond the last matched delimiter. If n * is non-positive then the pattern will be applied as many times as * possible and the array can have any length. If n is zero then * the pattern will be applied as many times as possible, the array can * have any length, and trailing empty strings will be discarded. *
* The input "boo:and:foo", for example, yields the following * results with these parameters: * *
* * * * * * * * * * * * * * * * * * * * * *

Regex    

Limit    

Result    

:2{ "boo", "and:foo" }
:5{ "boo", "and", "foo" }
:-2{ "boo", "and", "foo" }
o5{ "b", "", ":and:f", "", "" }
o-2{ "b", "", ":and:f", "", "" }
o0{ "b", "", ":and:f" }
* * * @param input * The character sequence to be split * * @param limit * The result threshold, as described above * * @return The array of strings computed by splitting the input * around matches of this pattern */ public String[] split(CharSequence input, int limit) { int index = 0; boolean matchLimited = limit > 0; ArrayList matchList = new ArrayList(); regexodus.Matcher m = new regexodus.Matcher(internal, input); // Add segments before each match found while(m.find()) { if (!matchLimited || matchList.size() < limit - 1) { String match = input.subSequence(index, m.start()).toString(); matchList.add(match); index = m.end(); } else if (matchList.size() == limit - 1) { // last one String match = input.subSequence(index, input.length()).toString(); matchList.add(match); index = m.end(); } } // If no match was found, return this if (index == 0) return new String[] {input.toString()}; // Add remaining segment if (!matchLimited || matchList.size() < limit) matchList.add(input.subSequence(index, input.length()).toString()); // Construct result int resultSize = matchList.size(); if (limit == 0) while (resultSize > 0 && matchList.get(resultSize-1).equals("")) resultSize--; String[] result = new String[resultSize]; return matchList.subList(0, resultSize).toArray(result); } /** * Splits the given input sequence around matches of this pattern. * * This method works as if by invoking the two-argument {@link * #split(java.lang.CharSequence, int) split} method with the given input * sequence and a limit argument of zero. Trailing empty strings are * therefore not included in the resulting array. *
* The input "boo:and:foo", for example, yields the following * results with these expressions: * *
* * * * * * *

Regex    

Result

:{ "boo", "and", "foo" }
o{ "b", "", ":and:f" }
* * @param input * The character sequence to be split * * @return The array of strings computed by splitting the input * around matches of this pattern */ public String[] split(CharSequence input) { return split(input, 0); } /** * Returns a literal pattern String for the specified * String. *
* This method produces a String that can be used to * create a Pattern that would match the string * s as if it were a literal pattern. Metacharacters * or escape sequences in the input sequence will be given no special * meaning. * * @param s The string to be literalized * @return A literal string replacement */ public static String quote(String s) { int slashEIndex = s.indexOf("\\E"); if (slashEIndex == -1) return "\\Q" + s + "\\E"; StringBuilder sb = new StringBuilder(s.length() * 2); sb.append("\\Q"); int current = 0; while ((slashEIndex = s.indexOf("\\E", current)) != -1) { sb.append(s.substring(current, slashEIndex)); current = slashEIndex + 2; sb.append("\\E\\\\E\\Q"); } sb.append(s.substring(current, s.length())); sb.append("\\E"); return sb.toString(); } private Pattern(String p, int flags) { int fm = (flags & CASE_INSENSITIVE) != 0 ? REFlags.IGNORE_CASE : 0; this.flags = flags; fm |= (flags & DOTALL) != 0 ? REFlags.DOTALL : 0; fm |= (flags & COMMENTS) != 0 ? REFlags.IGNORE_SPACES : 0; fm |= (flags & MULTILINE) != 0 ? REFlags.MULTILINE : 0; fm |= (flags & UNICODE_CHARACTER_CLASS) != 0 ? REFlags.UNICODE : 0; if((flags & LITERAL) != 0) internal = regexodus.Pattern.compile(quote(p), fm); else internal = regexodus.Pattern.compile(p, fm); } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy