All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.day.text.Replace Maven / Gradle / Ivy

There is a newer version: 2024.11.18751.20241128T090041Z-241100
Show newest version
/*************************************************************************
 *
 * ADOBE CONFIDENTIAL
 * __________________
 *
 *  Copyright 2012 Adobe Systems Incorporated
 *  All Rights Reserved.
 *
 * NOTICE:  All information contained herein is, and remains
 * the property of Adobe Systems Incorporated and its suppliers,
 * if any.  The intellectual and technical concepts contained
 * herein are proprietary to Adobe Systems Incorporated and its
 * suppliers and are protected by trade secret or copyright law.
 * Dissemination of this information or reproduction of this material
 * is strictly forbidden unless prior written permission is obtained
 * from Adobe Systems Incorporated.
 **************************************************************************/
package com.day.text;

import java.util.ArrayList;
import java.util.Iterator;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * The Replace class implements the string replacement
 * functionality formerly coded in the base/base_replace.c C
 * file. This class acts as a data container for the concrete replacement
 * definition and also does the actual replacement for the String/UString ECMA
 * host object and also for Java clients.
 * 

* The usage of this class is relatively simple : *

    *
  1. Acquire an instance through the default constructor *
  2. Add Pattern strings through the {@link #addPattern(String, String)}, * {@link #addPattern(int, String, String)} or {@link #addPatterns(String[][])} * calls *
  3. Optionally set replacement flags *
  4. call {@link #replace(String, int)} for each string you want to work on *
*

* The flags for the replacement are defined as follows : *

*
{@link #REPLACE_IGNORE_HTML} *
Don't replace anything contained within a tag enclosed in < and >, * resp. *
{@link #REPLACE_WHOLE_WORD} *
Only replace occurrences of patterns when they form standalone words. *
*

* The replacement algorithm works in three steps : *

    *
  1. All occurrences of the patterns are looked for in the string *
  2. Within theses occurrences, collisions are resolved using a weighting * algorithm which is based on the pattern length using a weight reference. *
  3. For each occurrence not invalidated through collision detection, the * pattern is replaced by the replacement. The rest of the input String is * copied to the destination unmodified. *
*/ public class Replace { /** default log */ private static final Logger log = LoggerFactory.getLogger(Replace.class); /** * Replacment flag indicating no special treatment */ public static final int REPLACE_NONE = 0x00; /** * Replacement flag to ignore any strings occuring within HTML tags */ public static final int REPLACE_IGNORE_HTML = 0x01; /** * Replacement flag to only replace search words occuring as stand- alone * words */ public static final int REPLACE_WHOLE_WORD = 0x02; /** * Actual replacement flags */ private int flags; /** * The list of configured replacement patterns */ private ArrayList patterns; /** * Default constructor for the replacement object */ public Replace() { this.flags = REPLACE_NONE; this.patterns = new ArrayList(); } /** * calls {@link #replace(String, int)} with wref=0 */ public String replace(String source) { return replace(source, 0); } /** * The real replacement of the patterns within the input takes place here. * * @param source The String to do the work in * @param wref Reference weight (?) */ public String replace(String source, int wref) { log.debug("replace: String \"{}\" with weight reference {}", source, new Integer(wref)); // Return empty string if source is empty if (source == null || source.length() == 0) return ""; // Return source if there is no pattern if (patterns.size() == 0) return source; // Get the occurrences list char[] src = source.toCharArray(); Occurrence[] occ = getOccurrences(src); // Process the occurrences processOccurrences(occ, wref); // Now let the replacment occurr return doReplace(src, occ); } // ---------- property accessors // -------------------------------------------- /** * Set the indicated replacement flags * * @param flags The flags to set */ public void setFlags(int flags) { this.flags = flags; } /** * Returns the current flags * * @return the current flags */ public int getFlags() { return flags; } /** * Insert the pattern at the indicated position in the internal pattern list * * @param pos The position to insert the pattern at * @param pattern The pattern String for the new pattern * @param replacement The replacement String for the new pattern */ public void addPattern(int pos, String pattern, String replacement) { log.debug( "addPattern: adding pattern \"{}\" to be replaced by \"{}\" as #{}", new Object[] { pattern, replacement, new Integer(pos) }); patterns.add(pos, new Pattern(pattern, replacement)); } /** * Append the pattern to the internal pattern list * * @param pattern The pattern String for the new pattern * @param replacement The replacement String for the new pattern */ public void addPattern(String pattern, String replacement) { addPattern(patterns.size(), pattern, replacement); } /** * Append all the pattern/replacement String pairs to the list. Each entry * is supposed to contain at least two entries. * * @param prPairs An Array of pattern/replacement String pairs. */ public void addPatterns(String[][] prPairs) { for (int i = 0; i < prPairs.length; i++) { String[] pair = prPairs[i]; if (pair == null || pair.length < 2) { // ignore empty or imcomplete pattern pair continue; } addPattern(patterns.size(), pair[0], pair[1]); } } /** * Returns an iterator over the existing patterns * * @return an iterator over the existing patterns */ public Iterator getPatterns() { return patterns.iterator(); } // ---------- internal // ------------------------------------------------------ /** * Check whether a character represents a word boundary in the sense of the * Replace class. Note that this need not be the same as * {@link Character.isDelimiter(char)} * * @param c The character to check * @return True if the character is a word delimiter */ private final boolean isWordDelim(char c) { return (c == '\0' || c == '/' || c == ' ' || c == '\r' || c == '\n' || c == '\t' || c == '.' || c == ';' || c == '(' || c == ')' || c == ',' || c == '<' || c == '\"' || c == '\\'); } /** * Walk through the source String and mark all occurrences of the patterns * * @param source The source String to analize * @return The List of occurrences to be later replaced */ private Occurrence[] getOccurrences(char[] src) { log.debug( "getOccurrences: Getting all occurrences according to the flags {}", Integer.toHexString(flags)); boolean insidetag = false; boolean reinit = true; ArrayList occ = new ArrayList(); int srclen = src.length; for (int i = 0; i < srclen; i++) { char c = src[i]; // Check whether we start a tag to ignore if (c == '<' && ((flags & REPLACE_IGNORE_HTML) != 0) && !insidetag) { log.debug("Starting an ignored HTML tag at pos " + i); insidetag = true; continue; } // Check whether we are within an ignored tag if (insidetag) { // Possibly end the tag if (c == '>') { log.debug("Ending an ignored HTML tag at pos " + i); insidetag = false; reinit = true; } continue; } // Now loop through the patterns Iterator pIter = patterns.iterator(); while (pIter.hasNext()) { Pattern pat = (Pattern) pIter.next(); // ignore empty patterns if (pat.len == 0) continue; // Get the start of a pattern int pos = reinit ? 0 : pat.pos; while (pos > -1 && pat.pattern[pos] != c) { pos = pat.shift[pos]; } pos++; if (pos >= pat.len) { int beg = i - pos + 1; int end = i - pos + pat.len; // last char in match boolean valid = true; // check word boundaries if ((flags & REPLACE_WHOLE_WORD) != 0) { char b = (beg > 0) ? src[beg - 1] : '\0'; char e = src[end + 1]; valid = (isWordDelim(b) && isWordDelim(e)); } if (valid) { log.debug("Found pattern \"{}\" at position {}", pat.pattern, new Integer(beg + 1)); occ.add(new Occurrence(beg, end, pat)); } else { log.debug("Ignoring pattern \"{}\" at position {}", pat.pattern, new Integer(beg + 1)); } pos = pat.shift[pos]; } pat.pos = pos; } reinit = false; } log.debug("Found " + occ.size() + " occurrences"); return (Occurrence[]) occ.toArray(new Occurrence[occ.size()]); } /** * Process the occurrences and handle collisions through pattern weighting * * @param occ The Occurrences to check for collisions * @param wref The reference weight * @return ??? */ private void processOccurrences(Occurrence[] occ, int wref) { int awref = java.lang.Math.abs(wref); int numOccs = occ.length; log.debug("Detecting collisions on " + occ.length + " patterns"); for (int i = 0; i < numOccs; i++) { // if an occurrence has been invalidated, ignore if (occ[i].pattern == null) { log.debug("The occurrence has been invalidated: #{}, [{}-{}]", new Object[] { new Integer(i), new Integer(occ[i].begin), new Integer(occ[i].end) }); continue; } int p = i + 1; int best = i; int weight = java.lang.Math.abs(occ[i].pattern.weight - awref); while (p < numOccs && occ[p].begin <= occ[i].end) { if (occ[p].pattern != null && ((occ[i].end >= occ[p].begin && occ[i].end <= occ[p].end) || (occ[p].begin >= occ[i].begin && occ[p].begin <= occ[i].end))) { int pweight = java.lang.Math.abs(occ[p].pattern.weight - awref); // sign of wref determines comparison if (wref < 0) { if (pweight < weight) { weight = pweight; occ[best].pattern = null; // invalidate looser best = p; } else { occ[p].pattern = null; // invalidate looser } if (pweight > weight) { weight = pweight; occ[best].pattern = null; // invalidate looser best = p; } else { occ[p].pattern = null; // invalidate looser } } } p++; } } } /** * Do the actual replacment of all the pattern occurrences found in the * source String. * * @param source The source String * @param occ The occurrences with possibly invalidated occurrences */ private String doReplace(char[] src, Occurrence[] occ) { StringBuffer dest = new StringBuffer(src.length); int p = 0; log.debug("Doing the replacment of all valid occurrences"); for (int i = 0; i < occ.length; i++) { // handle valid occurrences only if (occ[i].pattern != null) { log.debug("Replacing \"{}\" by \"{}\" @{}", new Object[] { occ[i].pattern.pattern, occ[i].pattern.replace, new Integer(occ[i].begin) }); // Append unmatched part before occurrence dest.append(src, p, occ[i].begin - p); // Replace occurrence dest.append(occ[i].pattern.replace); // Move on within the string p = occ[i].end + 1; } } // Append last part of the source string if (p < src.length) { dest.append(src, p, src.length - p); } return dest.toString(); } // ---------- Internal data helper classes // ---------------------------------- /** * The Pattern class abstracts the notion of a replacement * pattern. This pattern supports an optimized comparison algorithm in that * it is not always needed to re-check all of the pattern, if the pattern * has common subpatterns, such as 'tata'. */ private static final class Pattern { /** * The search pattern to be replaced */ public char[] pattern; /** * The replacement String */ public String replace; /** * New position offsets for next comparison. This implements the * comparison optimazation step. */ public int[] shift; /** * Next position within the pattern to compare. This position is * dependant of the already matched part and the shift table. Usually if * a match occurrs, this position is incremented. If there is no match, * the position is set according to the shift table entry for the * position at which the non-match occurred. */ public int pos; /** * Length of the pattern string */ public int len; /** * Weight of the pattern. Currently the weight of the pattern is * the same as its character length. */ public int weight; /** * Create and analize a new pattern. During the analysis the shift * table is built according to the inner structure of the pattern. * * @param pattern The pattern String to be replaced * @param replace The String replacement */ public Pattern(String pattern, String replace) { // init Pattern this.pattern = pattern.toCharArray(); this.len = pattern.length(); this.replace = replace; this.shift = new int[len + 1]; // Prepare the shift array this.weight = len; // The pattern weight is the length // Prepare shifts int j = shift[0] = -1; for (int i = 0; i < len;) { while (j > -1 && this.pattern[i] != this.pattern[j]) j = shift[j]; i++; j++; shift[i] = ((i < len && this.pattern[i] == this.pattern[j]) || (i == j)) ? shift[j] : j; } } } /** * The Occurrence class abstracts the notion of an occurrence * of a pattern string within the source string, which might later be * replaced by the indicated pattern. */ private static final class Occurrence { /** * Start of this occurrence within the source string */ public int begin; /** * End of this occurrence within the source string */ public int end; /** * The pattern associated with this occurrence. If during occurrence * processing an occurrence becomes invalid, this field will be set * to null. */ public Pattern pattern; /** * Create a new occurrence. * * @param begin The starting point of the occurrence in the string * @param end The ending point (last character) of the occurrence * @param pattern The pattern applying for this occurrence */ public Occurrence(int begin, int end, Pattern pattern) { this.begin = begin; this.end = end; this.pattern = pattern; } } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy