All Downloads are FREE. Search and download functionalities are using the official Maven repository.

jflex.core.RegExp Maven / Gradle / Ivy

/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
 * JFlex 1.8.2                                                             *
 * Copyright (C) 1998-2018  Gerwin Klein                     *
 * All rights reserved.                                                    *
 *                                                                         *
 * License: BSD                                                            *
 *                                                                         *
 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */

package jflex.core;

import java.util.List;
import jflex.core.unicode.CharClasses;
import jflex.core.unicode.IntCharSet;
import jflex.exceptions.CharClassException;

/**
 * Stores a regular expression of rules section in a JFlex-specification.
 *
 * 

This base class has no content other than its type. * * @author Gerwin Klein * @version JFlex 1.8.2 */ public class RegExp { /** * The type of the regular expression. This field will be filled with values from class sym.java * (generated by cup) */ int type; /** * Create a new regular expression of the specified type. * * @param type a value from the cup generated class sym. */ public RegExp(int type) { this.type = type; } /** * Returns a String-representation of this regular expression with the specified indentation. * * @param tab a String that should contain only space characters and that is inserted in front of * standard String-representation pf this object. * @return a {@link java.lang.String} object. */ public String print(String tab) { return tab + toString(); } @Override public String toString() { return "type = " + typeName(); } /** String representation of the type of this regular expression. */ public String typeName() { return sym.terminalNames[type]; } /** * Find out if this regexp is a char class or equivalent to one. * * @return true if the regexp is equivalent to a char class. */ public boolean isCharClass() { switch (type) { case sym.CHAR: case sym.CHAR_I: case sym.PRIMCLASS: return true; case sym.BAR: RegExp2 binary = (RegExp2) this; return binary.r1.isCharClass() && binary.r2.isCharClass(); default: return false; } } /** * The approximate number of NFA states this expression will need (only works correctly after * macro expansion and without negation) * * @param macros macro table for expansion * @return a int. */ public int size(Macros macros) { RegExp1 unary; RegExp2 binary; RegExp content; switch (type) { case sym.BAR: binary = (RegExp2) this; return binary.r1.size(macros) + binary.r2.size(macros) + 2; case sym.CONCAT: binary = (RegExp2) this; return binary.r1.size(macros) + binary.r2.size(macros); case sym.STAR: case sym.PLUS: unary = (RegExp1) this; content = (RegExp) unary.content; return content.size(macros) + 2; case sym.QUESTION: unary = (RegExp1) this; content = (RegExp) unary.content; return content.size(macros); case sym.BANG: unary = (RegExp1) this; content = (RegExp) unary.content; return content.size(macros) * content.size(macros); // this is only a very rough estimate (worst case 2^n) // exact size too complicated (propably requires construction) case sym.TILDE: unary = (RegExp1) this; content = (RegExp) unary.content; return content.size(macros) * content.size(macros) * 3; // see sym.BANG case sym.STRING: case sym.STRING_I: unary = (RegExp1) this; return ((String) unary.content).length() + 1; case sym.CHAR: case sym.CHAR_I: return 2; case sym.CCLASS: case sym.CCLASSNOT: case sym.CCLASSOP: case sym.PRIMCLASS: return 2; case sym.MACROUSE: unary = (RegExp1) this; return macros.getDefinition((String) unary.content).size(macros); default: throw new RegExpException(this); } } /** Reverses a string. */ static String revString(String s) { return new StringBuilder(s).reverse().toString(); } /** * Recursively convert tilde (upto) expressions into negation and star. * * @return new RegExp equivalent to the current one, but without upto expressions. */ public final RegExp resolveTilde() { RegExp1 unary; RegExp2 binary; RegExp content; switch (type) { case sym.BAR: binary = (RegExp2) this; return new RegExp2(sym.BAR, binary.r1.resolveTilde(), binary.r2.resolveTilde()); case sym.CONCAT: binary = (RegExp2) this; return new RegExp2(sym.CONCAT, binary.r1.resolveTilde(), binary.r2.resolveTilde()); case sym.STAR: unary = (RegExp1) this; content = (RegExp) unary.content; return new RegExp1(sym.STAR, content.resolveTilde()); case sym.PLUS: unary = (RegExp1) this; content = (RegExp) unary.content; return new RegExp1(sym.PLUS, content.resolveTilde()); case sym.QUESTION: unary = (RegExp1) this; content = (RegExp) unary.content; return new RegExp1(sym.QUESTION, content.resolveTilde()); case sym.BANG: unary = (RegExp1) this; content = (RegExp) unary.content; return new RegExp1(sym.BANG, content.resolveTilde()); case sym.TILDE: // ~a = !([^]* a [^]*) a // uses subexpression sharing unary = (RegExp1) this; content = ((RegExp) unary.content).resolveTilde(); RegExp any_star = new RegExp1(sym.STAR, anyChar()); RegExp neg = new RegExp1( sym.BANG, new RegExp2(sym.CONCAT, any_star, new RegExp2(sym.CONCAT, content, any_star))); return new RegExp2(sym.CONCAT, neg, content); case sym.STRING: case sym.STRING_I: case sym.CHAR: case sym.CHAR_I: case sym.PRIMCLASS: unary = (RegExp1) this; return new RegExp1(unary.type, unary.content); default: throw new RegExpException(this); } } /** * Returns a regexp that matches any character: {@code [^]} * * @return the regexp for {@code [^]} */ public static RegExp anyChar() { return new RegExp1(sym.PRIMCLASS, IntCharSet.allChars()); } /** * Confirms that the parameter is a RegExp1 of type sym.PRIMCLASS. * * @param r the RegExp to check * @throws CharClassException if r is not a RegExp1 or of type sym.PRIMCLASS. * @return r cast to RegExp1 */ public static RegExp1 checkPrimClass(RegExp r) { if (!(r instanceof RegExp1 && r.type == sym.PRIMCLASS)) throw new CharClassException("Not normalised " + r); return (RegExp1) r; } /** * Performs the given set operation on the two {@link IntCharSet} parameters. * * @param op the operation to perform (as @{link sym} constant) * @param l the left operator of the expression * @param r the right operator of the expression * @param ctxt the regular expression containing the provided operator * @return a new {@link IntCharSet} * @throws RegExpException for {@code ctxt} if the operator is not supported */ public static IntCharSet performClassOp(int op, IntCharSet l, IntCharSet r, RegExp ctxt) { IntCharSet set; IntCharSet intersection = l.and(r); switch (op) { case sym.INTERSECTION: return intersection; case sym.DIFFERENCE: // IntCharSet.sub() assumes its argument is a subset, so subtract intersection set = IntCharSet.copyOf(l); set.sub(intersection); return set; case sym.SYMMETRICDIFFERENCE: set = IntCharSet.copyOf(l); set.add(r); set.sub(intersection); return set; default: throw new RegExpException(ctxt); } } /** * Normalise the regular expression to eliminate macro use (expand them), and compound character * class expression (compute their content). * * @return a regexp that contains only {@link IntCharSet} char classes and no {@link * sym#MACROUSE}. */ @SuppressWarnings("unchecked") public final RegExp normalise(Macros m) { RegExp1 unary; RegExp2 binary; RegExp content; switch (type) { case sym.BAR: case sym.CONCAT: binary = (RegExp2) this; return new RegExp2(type, binary.r1.normalise(m), binary.r2.normalise(m)); case sym.STAR: case sym.PLUS: case sym.QUESTION: case sym.BANG: case sym.TILDE: unary = (RegExp1) this; content = (RegExp) unary.content; return new RegExp1(type, content.normalise(m)); case sym.STRING: case sym.STRING_I: case sym.CHAR: case sym.CHAR_I: case sym.PRIMCLASS: unary = (RegExp1) this; return new RegExp1(type, unary.content); case sym.CCLASS: { unary = (RegExp1) this; List contents = (List) unary.content; IntCharSet set = new IntCharSet(); for (RegExp r : contents) { RegExp1 n = checkPrimClass(r.normalise(m)); set.add((IntCharSet) n.content); } return new RegExp1(sym.PRIMCLASS, set); } case sym.CCLASSNOT: { unary = (RegExp1) this; List contents = (List) unary.content; IntCharSet set = IntCharSet.allChars(); for (RegExp r : contents) { RegExp1 n = checkPrimClass(r.normalise(m)); set.sub((IntCharSet) n.content); } return new RegExp1(sym.PRIMCLASS, set); } case sym.CCLASSOP: unary = (RegExp1) this; binary = (RegExp2) unary.content; RegExp1 l = checkPrimClass(binary.r1.normalise(m)); IntCharSet setl = (IntCharSet) l.content; RegExp1 r = checkPrimClass(binary.r2.normalise(m)); IntCharSet setr = (IntCharSet) r.content; IntCharSet set = performClassOp(binary.type, setl, setr, this); return new RegExp1(sym.PRIMCLASS, set); case sym.MACROUSE: unary = (RegExp1) this; return m.getDefinition((String) unary.content).normalise(m); default: throw new RegExpException(this); } } /** * Make character class partitions based on the classes mentioned in this regexp. * *

Assumption: regexp is normalised. */ public final void makeCCLs(CharClasses c, boolean caseless) { RegExp1 unary; RegExp2 binary; RegExp content; switch (type) { case sym.BAR: case sym.CONCAT: binary = (RegExp2) this; binary.r1.makeCCLs(c, caseless); binary.r2.makeCCLs(c, caseless); return; case sym.STAR: case sym.PLUS: case sym.QUESTION: case sym.BANG: case sym.TILDE: unary = (RegExp1) this; content = (RegExp) unary.content; content.makeCCLs(c, caseless); return; case sym.STRING: case sym.STRING_I: case sym.CHAR: case sym.CHAR_I: return; case sym.PRIMCLASS: unary = (RegExp1) this; IntCharSet set = (IntCharSet) unary.content; c.makeClass(set, caseless); return; default: throw new CharClassException("makeCCLs: unexpected regexp " + this); } } /** * Creates a new regexp that matches the reverse text of this one. * * @return the reverse regexp */ public final RegExp rev() { RegExp1 unary; RegExp2 binary; RegExp content; switch (type) { case sym.BAR: binary = (RegExp2) this; return new RegExp2(sym.BAR, binary.r1.rev(), binary.r2.rev()); case sym.CONCAT: binary = (RegExp2) this; return new RegExp2(sym.CONCAT, binary.r2.rev(), binary.r1.rev()); case sym.STAR: unary = (RegExp1) this; content = (RegExp) unary.content; return new RegExp1(sym.STAR, content.rev()); case sym.PLUS: unary = (RegExp1) this; content = (RegExp) unary.content; return new RegExp1(sym.PLUS, content.rev()); case sym.QUESTION: unary = (RegExp1) this; content = (RegExp) unary.content; return new RegExp1(sym.QUESTION, content.rev()); case sym.BANG: unary = (RegExp1) this; content = (RegExp) unary.content; return new RegExp1(sym.BANG, content.rev()); case sym.TILDE: content = resolveTilde(); return content.rev(); case sym.STRING: case sym.STRING_I: unary = (RegExp1) this; return new RegExp1(unary.type, revString((String) unary.content)); case sym.CHAR: case sym.CHAR_I: case sym.PRIMCLASS: unary = (RegExp1) this; return new RegExp1(unary.type, unary.content); default: throw new RegExpException(this); } } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy