All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.gargoylesoftware.htmlunit.javascript.regexp.RegExpJsToJavaConverter Maven / Gradle / Ivy

Go to download

Vaadin is a web application framework for Rich Internet Applications (RIA). Vaadin enables easy development and maintenance of fast and secure rich web applications with a stunning look and feel and a wide browser support. It features a server-side architecture with the majority of the logic running on the server. Ajax technology is used at the browser-side to ensure a rich and interactive user experience.

There is a newer version: 1.2.0
Show newest version
/*
 * Copyright (c) 2002-2011 Gargoyle Software Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.gargoylesoftware.htmlunit.javascript.regexp;

/**
 * Translates JavaScript RegExp to Java RegExp.
* // [...\b...] -> [...\cH...] * // [...[...] -> [...\[...] * // [^\\1] -> . * // back reference in character classes are simply ignored by browsers [...ab\5cd...] -> [...abcd...] * // characters escaped without need should be "un-escaped" * Escape curly braces that are not used in an expression * like "{n}", "{n,}" or "{n,m}" (where n and m are positive integers). * * @version $Revision: 6206 $ * @author Ronald Brill */ public class RegExpJsToJavaConverter { private static final String DIGITS = "0123456789"; /** * Helper to encapsulate the transformations. */ private static class Tape { private StringBuilder tape_; private int currentPos_; /** * Wraps a JavaScript RegExp to access it char by char like a tape. * * @param input the Javascript RegExp */ public Tape(final String input) { currentPos_ = 0; tape_ = new StringBuilder(input); } /** * Moves the current read position by offset. * @param offset the move position offset */ public void move(final int offset) { currentPos_ += offset; } /** * Reads the character at the current position and * moves the read position by 1. * * @return the character at current position */ public int read() { if (currentPos_ < 0) { return -1; } if (currentPos_ >= tape_.length()) { return -1; } return tape_.charAt(currentPos_++); } /** * Inserts a string at the current position + offset. * @param token the string to insert * @param offset the move position offset */ public void insert(final String token, final int offset) { tape_.insert(currentPos_ + offset, token); currentPos_ += token.length(); } /** * Replaces the current char with the given string. * @param token the string to insert */ public void replace(final String token) { tape_.replace(currentPos_, currentPos_ + 1, token); } /** * Read the whole tape content. * * @return tape content */ public String toString() { return tape_.toString(); } } private Tape tape_; private boolean insideCharClass_; private boolean insideRepetition_; private int noOfSubexpressions_; /** * Initiate the FSM. */ public RegExpJsToJavaConverter() { super(); } /** * Run the state machine on a given input string. * * @param input * the js regexp to process * @return a valid java regex pattern */ public String convert(final String input) { tape_ = new Tape(input); insideCharClass_ = false; insideRepetition_ = false; noOfSubexpressions_ = 0; int current = tape_.read(); while (current > -1) { if ('\\' == current) { processEscapeSequence(); } else if ('[' == current) { processCharClassStart(); } else if (']' == current) { processCharClassEnd(); } else if ('{' == current) { processRepetitionStart(); } else if ('}' == current) { processRepetitionEnd(); } else if ('(' == current) { processSubExpressionStart(); } // process next current = tape_.read(); } return tape_.toString(); } private void processCharClassStart() { if (insideCharClass_) { tape_.insert("\\", -1); } else { insideCharClass_ = true; int next = tape_.read(); if (next < 0) { tape_.insert("\\", -1); return; } if ('^' == next) { // [^\2] next = tape_.read(); if (next < 0) { tape_.insert("\\", -2); return; } if ('\\' == next) { next = tape_.read(); if (DIGITS.indexOf(next) < 0) { tape_.move(-2); return; } // if it was a back reference then we have to check more if (handleBackReferenceOrOctal(next)) { next = tape_.read(); if (']' == next) { tape_.move(-3); tape_.replace(""); tape_.replace(""); tape_.replace("."); insideCharClass_ = false; } } } else { tape_.move(-1); } } else { tape_.move(-1); } } } private void processCharClassEnd() { insideCharClass_ = false; } private void processRepetitionStart() { final int next = tape_.read(); if (next < 0) { tape_.insert("\\", -1); return; } if (DIGITS.indexOf(next) > -1) { insideRepetition_ = true; } else if ('}' == next) { tape_.insert("\\", -2); tape_.insert("\\", -1); } else { tape_.insert("\\", -2); } } private void processRepetitionEnd() { if (insideRepetition_) { insideRepetition_ = false; return; } tape_.insert("\\", -1); } private void processSubExpressionStart() { int next = tape_.read(); if (next < 0) { return; } if ('?' != next) { noOfSubexpressions_++; tape_.move(-1); return; } next = tape_.read(); if (next < 0) { return; } if (':' != next) { noOfSubexpressions_++; tape_.move(-1); return; } } private void processEscapeSequence() { final int escapeSequence = tape_.read(); if (escapeSequence < 0) { return; } if ('x' == escapeSequence) { // Hex code (e.g. \x00) // Read the two char hex code tape_.move(2); return; } if ('u' == escapeSequence) { // Unicode (e.g. \u0009) // Read the for char unicode tape_.move(4); return; } if ("ACEFGHIJKLMNOPQRTUVXYZaeghijklmpqyz".indexOf(escapeSequence) > -1) { // no need to escape this chars tape_.move(-2); tape_.replace(""); tape_.move(1); return; } if (insideCharClass_ && 'b' == escapeSequence) { // [...\b...] -> [...\cH...] tape_.move(-1); tape_.replace("cH"); tape_.move(2); return; } if (DIGITS.indexOf(escapeSequence) > -1) { handleBackReferenceOrOctal(escapeSequence); } } private boolean handleBackReferenceOrOctal(final int aFirstChar) { // first try to figure out what the number is final StringBuilder tmpNo = new StringBuilder(Character.toString((char) aFirstChar)); int tmpInsertPos = -1; int next = tape_.read(); if (next > -1) { if (DIGITS.indexOf(next) > -1) { tmpNo.append(next); tmpInsertPos--; next = tape_.read(); if (next > -1) { if (DIGITS.indexOf(next) > -1) { tmpNo.append(next); tmpInsertPos--; } else { tape_.move(-1); } } } else { tape_.move(-1); } } final int value = Integer.parseInt(tmpNo.toString()); if (value > noOfSubexpressions_) { // we have a octal here tape_.insert("0", tmpInsertPos); return false; } if (insideCharClass_) { // drop back reference for (int i = tmpInsertPos; i <= 0; i++) { tape_.move(-1); tape_.replace(""); } } return true; } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy