All Downloads are FREE. Search and download functionalities are using the official Maven repository.

regexodus.Replacer Maven / Gradle / Ivy

Go to download

JVM AOT compiler currently generating JavaScript, C++, Haxe, with initial focus on Kotlin and games.

There is a newer version: 0.6.8
Show newest version
/**
 * Copyright (c) 2001, Sergey A. Samokhodkin
 * All rights reserved.
 * 
* Redistribution and use in source and binary forms, with or without modification, * are permitted provided that the following conditions are met: *
* - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form * must reproduce the above copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials provided with the distribution. * - Neither the name of jregex nor the names of its contributors may be used * to endorse or promote products derived from this software without specific prior * written permission. *
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * @version 1.2_01 */ package regexodus; import com.jtransc.annotation.JTranscInvisible; import java.io.IOException; import java.io.Reader; import java.io.Serializable; import java.io.Writer; import java.util.LinkedHashMap; import java.util.Map; /** * The Replacer class suggests some methods to replace occurrences of a pattern * either by a result of evaluation of a perl-like expression, or by a plain string, * or according to a custom substitution model, provided as a Substitution interface implementation.
* A Replacer instance may be obtained either using Pattern.replacer(...) method, or by constructor: * Pattern p=new Pattern("\\w+"); * Replacer perlExpressionReplacer=p.replacer("[$&]"); * //or another way to do the same * Substitution myOwnModel=new Substitution(){ * public void appendSubstitution(MatchResult match,TextBuffer tb){ * tb.append('['); * match.getGroup(MatchResult.MATCH,tb); * tb.append(']'); * } * } * Replacer myVeryOwnReplacer=new Replacer(p,myOwnModel); * * The second method is much more verbose, but gives more freedom. * To perform a replacement call replace(someInput):
 * System.out.print(perlExpressionReplacer.replace("All your base "));
 * System.out.println(myVeryOwnReplacer.replace("are belong to us"));
 * //result: "[All] [your] [base] [are] [belong] [to] [us]"
 * 
* This code was mostly written in 2001, I hope the reference isn't too outdated... * @see Substitution * @see PerlSubstitution * @see Replacer#Replacer(regexodus.Pattern, regexodus.Substitution) */ @JTranscInvisible public class Replacer implements Serializable { private Pattern pattern; private Substitution substitution; /** * Constructs a Replacer from a Pattern and implementation of Substitution. * Only meant to be used if you have complex substitution behavior. An example of how to make such an implementation * that surrounds each match with an increasing number of square brackets could be: *
* * Substitution mySub=new Substitution(){ * public int counter = 1; * public void appendSubstitution(MatchResult match,TextBuffer tb){ * for(int i = 0; i < counter; i++) * tb.append('['); * //appends the full match into tb; 0 can be used in place of MatchResult.MATCH * match.getGroup(MatchResult.MATCH, tb); * for(int i = 0; i < counter; i++) * tb.append(']'); * counter++; * } * } * * @param pattern a regexodus.Pattern that determines what should be replaced * @param substitution an implementation of the Substitution interface, which allows custom replacement behavior */ public Replacer(Pattern pattern, Substitution substitution) { this.pattern = pattern; this.substitution = substitution; } /** * Constructs a Replacer from a Pattern and a String to replace occurrences of the Pattern with. * @param pattern a regexodus.Pattern that determines what should be replaced * @param substitution a String that will be used to replace occurrences of the Pattern */ public Replacer(Pattern pattern, String substitution) { this(pattern, substitution, true); } public Replacer(Pattern pattern, String substitution, boolean isPerlExpr) { this.pattern = pattern; this.substitution = isPerlExpr ? new PerlSubstitution(substitution) : new DummySubstitution(substitution); } public void setSubstitution(String s, boolean isPerlExpr) { substitution = isPerlExpr ? new PerlSubstitution(s) : new DummySubstitution(s); } /** * Takes all instances in text of the Pattern this was constructed with, and replaces them with substitution. * @param text a String, StringBuilder, or other CharSequence that may contain the text to replace * @return the post-replacement text */ public String replace(CharSequence text) { TextBuffer tb = wrap(new StringBuilder(text.length())); replace(pattern.matcher(text), substitution, tb); return tb.toString(); } /** * Takes instances in text of the Pattern this was constructed with, up to count times, and replaces them with * substitution. If you want to change the position in a Matcher so you start the next replacement at a later point * in text, you can use {@code replace(Matcher, Substitution, TextBuffer, int)}, which this uses internally. The * difference is that internally, this uses a temporary Matcher that doesn't store the change in position, and code * that should track replacement points should use a longer-lived Matcher. * @param text a String, StringBuilder, or other CharSequence that may contain the text to replace * @param count the maximum number of replacements to perform; will make no changes if less than 1 * @return the post-replacement text */ public String replace(CharSequence text, int count) { TextBuffer tb = wrap(new StringBuilder(text.length())); replace(pattern.matcher(text), substitution, tb, count); return tb.toString(); } public String replace(char[] chars, int off, int len) { TextBuffer tb = wrap(new StringBuilder(len)); replace(pattern.matcher(chars, off, len), substitution, tb); return tb.toString(); } public String replace(MatchResult res, int group) { TextBuffer tb = wrap(new StringBuilder()); replace(pattern.matcher(res, group), substitution, tb); return tb.toString(); } @GwtIncompatible public String replace(Reader text, int length) throws IOException { TextBuffer tb = wrap(new StringBuilder(length >= 0 ? length : 0)); replace(pattern.matcher(text, length), substitution, tb); return tb.toString(); } /** * Takes all occurrences of the pattern this was constructed with in text and replaces them with the substitution. * Appends the replaced text into sb. * @param text a String, StringBuilder, or other CharSequence that may contain the text to replace * @param sb the StringBuilder to append the result into * @return the number of individual replacements performed; the results are applied to sb */ public int replace(CharSequence text, StringBuilder sb) { return replace(pattern.matcher(text), substitution, wrap(sb)); } /** * Takes instances in text of the Pattern this was constructed with, up to count times, and replaces them with the * substitution. Appends the replaced text into sb. * @param text a String, StringBuilder, or other CharSequence that may contain the text to replace * @param sb the StringBuilder to append the result into * @param count the maximum number of replacements to perform; will make no changes if less than 1 * @return the number of individual replacements performed; the results are applied to sb */ public int replace(CharSequence text, StringBuilder sb, int count) { return replace(pattern.matcher(text), substitution, wrap(sb), count); } /** */ public int replace(char[] chars, int off, int len, StringBuilder sb) { return replace(chars, off, len, wrap(sb)); } /** */ public int replace(MatchResult res, int group, StringBuilder sb) { return replace(res, group, wrap(sb)); } /** */ public int replace(MatchResult res, String groupName, StringBuilder sb) { return replace(res, groupName, wrap(sb)); } @GwtIncompatible public int replace(Reader text, int length, StringBuilder sb) throws IOException { return replace(text, length, wrap(sb)); } /** */ public int replace(CharSequence text, TextBuffer dest) { return replace(pattern.matcher(text), substitution, dest); } /** */ private int replace(char[] chars, int off, int len, TextBuffer dest) { return replace(pattern.matcher(chars, off, len), substitution, dest); } /** */ private int replace(MatchResult res, int group, TextBuffer dest) { return replace(pattern.matcher(res, group), substitution, dest); } /** */ private int replace(MatchResult res, String groupName, TextBuffer dest) { return replace(pattern.matcher(res, groupName), substitution, dest); } @GwtIncompatible private int replace(Reader text, int length, TextBuffer dest) throws IOException { return replace(pattern.matcher(text, length), substitution, dest); } /** * Replaces all occurrences of a matcher's pattern in a matcher's target * by a given substitution appending the result to a buffer.
* The substitution starts from current matcher's position, current match * not included. */ public static int replace(Matcher m, Substitution substitution, TextBuffer dest) { boolean firstPass = true; int c = 0; while (m.find()) { if (m.end() == 0 && !firstPass) continue; //allow to replace at "^" if (m.start() > 0) m.getGroup(MatchResult.PREFIX, dest); substitution.appendSubstitution(m, dest); c++; m.setTarget(m, MatchResult.SUFFIX); firstPass = false; } m.getGroup(MatchResult.TARGET, dest); return c; } /** * Replaces the first n occurrences of a matcher's pattern, where n is equal to count, * in a matcher's target by a given substitution, appending the result to a buffer. *
* The substitution starts from current matcher's position, current match not included. * @param m a Matcher * @param substitution a Substitution, typically a PerlSubstitution * @param dest the TextBuffer this will write to; see Replacer.wrap() * @param count the number of replacements to attempt * @return the number of replacements performed */ public static int replace(Matcher m, Substitution substitution, TextBuffer dest, int count) { boolean firstPass = true; int c = 0; while (c < count && m.find()) { if (m.end() == 0 && !firstPass) continue; //allow to replace at "^" if (m.start() > 0) m.getGroup(MatchResult.PREFIX, dest); substitution.appendSubstitution(m, dest); c++; m.setTarget(m, MatchResult.SUFFIX); firstPass = false; } m.getGroup(MatchResult.TARGET, dest); return c; } /** * Replaces the next occurrence of a matcher's pattern in a matcher's target by a given substitution, appending the * result to a buffer but not writing the remainder of m's match to the end of dest. *
* The substitution starts from current matcher's position, current match not included. *
* You typically want to call {@code m.getGroup(MatchResult.TARGET, dest);} after you have called replaceStep() * until it returns false, which will fill in the remainder of the matching text into dest. * @param m a Matcher * @param substitution a Substitution, typically a PerlSubstitution * @param dest the TextBuffer this will write to; see Replacer.wrap() * @return the number of replacements performed */ public static boolean replaceStep(Matcher m, Substitution substitution, TextBuffer dest) { boolean firstPass = true; int c = 0, count = 1; while (c < count && m.find()) { if (m.end() == 0 && !firstPass) continue; //allow to replace at "^" if (m.start() > 0) m.getGroup(MatchResult.PREFIX, dest); substitution.appendSubstitution(m, dest); c++; m.setTarget(m, MatchResult.SUFFIX); firstPass = false; } return c > 0; } @GwtIncompatible private static int replace(Matcher m, Substitution substitution, Writer out) throws IOException { try { return replace(m, substitution, wrap(out)); } catch (WriteException e) { throw e.reason; } } @GwtIncompatible public void replace(CharSequence text, Writer out) throws IOException { replace(pattern.matcher(text), substitution, out); } @GwtIncompatible public void replace(char[] chars, int off, int len, Writer out) throws IOException { replace(pattern.matcher(chars, off, len), substitution, out); } @GwtIncompatible public void replace(MatchResult res, int group, Writer out) throws IOException { replace(pattern.matcher(res, group), substitution, out); } @GwtIncompatible public void replace(MatchResult res, String groupName, Writer out) throws IOException { replace(pattern.matcher(res, groupName), substitution, out); } @GwtIncompatible public void replace(Reader in, int length, Writer out) throws IOException { replace(pattern.matcher(in, length), substitution, out); } private static class DummySubstitution implements Substitution { String str; DummySubstitution(String s) { str = s; } public void appendSubstitution(MatchResult match, TextBuffer res) { if (str != null) res.append(str); } } private static class TableSubstitution implements Substitution { final LinkedHashMap dictionary; TableSubstitution(LinkedHashMap dict) { dictionary = dict; } TableSubstitution(String... dict) { dictionary = new LinkedHashMap(dict.length / 2); for (int i = 0; i < dict.length - 1; i+=2) { dictionary.put(dict[i], dict[i+1]); } } @Override public void appendSubstitution(MatchResult match, TextBuffer dest) { String m = match.group(0); if(m == null) return; for (Map.Entry kv : dictionary.entrySet()) { if (kv.getKey().equals(m)) { dest.append(kv.getValue()); return; } } dest.append(m); } } /** * Makes a Replacer that replaces a literal String at index i in pairs with the String at index i+1. Doesn't need * escapes in the Strings it searches for (at index 0, 2, 4, etc.), but cannot search for the exact two characters * in immediate succession, backslash then capital E, because it finds literal Strings using {@code \\Q...\\E}. * Uses only default modes (not case-insensitive, and most other flags don't have any effect since this doesn't care * about "\\w" or other backslash-escaped special categories), but you can get the Pattern from this afterwards and * set its flags with its setFlags() method. The Strings this replaces with are at index 1, 3, 5, etc. and * correspond to the search String immediately before it; they are also literal. * @param pairs alternating search String, then replacement String, then search, replacement, etc. * @return a Replacer that will act as a replacement table for the given Strings */ public static Replacer makeTable(String... pairs) { if(pairs == null || pairs.length < 2) return new Replacer(Pattern.compile("(.+)"), new DummySubstitution("\\1")); TableSubstitution tab = new TableSubstitution(pairs); StringBuilder sb = new StringBuilder(128); sb.append("(?>"); for(String s : tab.dictionary.keySet()) { sb.append("\\Q"); sb.append(s); sb.append("\\E|"); } sb.deleteCharAt(sb.length() - 1); sb.append(')'); return new Replacer(Pattern.compile(sb.toString()), tab); } /** * Makes a Replacer that replaces a literal String key in dict with the corresponding String value in dict. Doesn't * need escapes in the Strings it searches for (at index 0, 2, 4, etc.), but cannot search for the exact two * characters in immediate succession, backslash then capital E, because it finds literal Strings using * {@code \\Q...\\E}. Uses only default modes (not case-insensitive, and most other flags don't have any effect * since this doesn't care about "\\w" or other backslash-escaped special categories), but you can get the Pattern * from this afterwards and set its flags with its setFlags() method. The Strings this replaces with are the values, * and are also literal. If the Map this is given is a sorted Map of some kind or a (preferably) LinkedHashMap, then * the order search strings will be tried will be stable; the same is not necessarily true for HashMap. * @param dict a Map (hopefully with stable order) with search String keys and replacement String values * @return a Replacer that will act as a replacement table for the given Strings */ public static Replacer makeTable(Map dict) { if(dict == null || dict.isEmpty()) return new Replacer(Pattern.compile("(.+)"), new DummySubstitution("\\1")); TableSubstitution tab = new TableSubstitution(new LinkedHashMap(dict)); StringBuilder sb = new StringBuilder(128); sb.append("(?>"); for(String s : tab.dictionary.keySet()) { sb.append("\\Q"); sb.append(s); sb.append("\\E|"); } sb.setCharAt(sb.length() - 1, ')'); return new Replacer(Pattern.compile(sb.toString()), tab); } public static StringBuilderBuffer wrap(final StringBuilder sb) { return new StringBuilderBuffer(sb); } public static class StringBuilderBuffer implements TextBuffer, Serializable { public StringBuilder sb; public StringBuilderBuffer() { sb = new StringBuilder(); } public StringBuilderBuffer(final StringBuilder builder) { sb = builder; } public void append(char c) { sb.append(c); } public void append(char[] chars, int start, int len) { sb.append(chars, start, len); } public void append(String s) { sb.append(s); } public String toString() { return sb.toString(); } public StringBuilder toStringBuilder() { return sb; } } public static StringBufferBuffer wrap(final StringBuffer sb) { return new StringBufferBuffer(sb); } public static class StringBufferBuffer implements TextBuffer, Serializable { public StringBuffer sb; public StringBufferBuffer() { sb = new StringBuffer(); } public StringBufferBuffer(final StringBuffer builder) { sb = builder; } public void append(char c) { sb.append(c); } public void append(char[] chars, int start, int len) { sb.append(chars, start, len); } public void append(String s) { sb.append(s); } public String toString() { return sb.toString(); } public StringBuffer toStringBuffer() { return sb; } } @GwtIncompatible private static TextBuffer wrap(final Writer writer) { return new TextBuffer() { public void append(char c) { try { writer.write(c); } catch (IOException e) { throw new WriteException(e); } } public void append(char[] chars, int off, int len) { try { writer.write(chars, off, len); } catch (IOException e) { throw new WriteException(e); } } public void append(String s) { try { writer.write(s); } catch (IOException e) { throw new WriteException(e); } } }; } private static class WriteException extends RuntimeException { IOException reason; WriteException(IOException io) { reason = io; } } public Pattern getPattern() { return pattern; } public void setPattern(Pattern pattern) { this.pattern = pattern; } public Substitution getSubstitution() { return substitution; } public void setSubstitution(Substitution substitution) { this.substitution = substitution; } public void setSubstitution(String substitution) { this.substitution = new PerlSubstitution(substitution); } @Override public boolean equals(Object o) { if (this == o) return true; if (o == null || getClass() != o.getClass()) return false; Replacer replacer = (Replacer) o; return pattern != null ? pattern.equals(replacer.pattern) : replacer.pattern == null && (substitution != null ? substitution.equals(replacer.substitution) : replacer.substitution == null); } @Override public int hashCode() { int result = pattern != null ? pattern.hashCode() : 0; result = 31 * result + (substitution != null ? substitution.hashCode() : 0); return result; } @Override public String toString() { return "Replacer{" + "pattern=" + pattern + ", substitution=" + substitution + '}'; } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy