com.day.text.Replace Maven / Gradle / Ivy
/*************************************************************************
*
* ADOBE CONFIDENTIAL
* __________________
*
* Copyright 2012 Adobe Systems Incorporated
* All Rights Reserved.
*
* NOTICE: All information contained herein is, and remains
* the property of Adobe Systems Incorporated and its suppliers,
* if any. The intellectual and technical concepts contained
* herein are proprietary to Adobe Systems Incorporated and its
* suppliers and are protected by trade secret or copyright law.
* Dissemination of this information or reproduction of this material
* is strictly forbidden unless prior written permission is obtained
* from Adobe Systems Incorporated.
**************************************************************************/
package com.day.text;
import java.util.ArrayList;
import java.util.Iterator;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* The Replace
class implements the string replacement
* functionality formerly coded in the base/base_replace.c
C
* file. This class acts as a data container for the concrete replacement
* definition and also does the actual replacement for the String/UString ECMA
* host object and also for Java clients.
*
* The usage of this class is relatively simple :
*
* - Acquire an instance through the default constructor
*
- Add Pattern strings through the {@link #addPattern(String, String)},
* {@link #addPattern(int, String, String)} or {@link #addPatterns(String[][])}
* calls
*
- Optionally set replacement flags
*
- call {@link #replace(String, int)} for each string you want to work on
*
*
* The flags for the replacement are defined as follows :
*
* - {@link #REPLACE_IGNORE_HTML}
*
- Don't replace anything contained within a tag enclosed in < and >,
* resp.
*
- {@link #REPLACE_WHOLE_WORD}
*
- Only replace occurrences of patterns when they form standalone words.
*
*
* The replacement algorithm works in three steps :
*
* - All occurrences of the patterns are looked for in the string
*
- Within theses occurrences, collisions are resolved using a weighting
* algorithm which is based on the pattern length using a weight reference.
*
- For each occurrence not invalidated through collision detection, the
* pattern is replaced by the replacement. The rest of the input String is
* copied to the destination unmodified.
*
*/
public class Replace {
/** default log */
private static final Logger log = LoggerFactory.getLogger(Replace.class);
/**
* Replacment flag indicating no special treatment
*/
public static final int REPLACE_NONE = 0x00;
/**
* Replacement flag to ignore any strings occuring within HTML tags
*/
public static final int REPLACE_IGNORE_HTML = 0x01;
/**
* Replacement flag to only replace search words occuring as stand- alone
* words
*/
public static final int REPLACE_WHOLE_WORD = 0x02;
/**
* Actual replacement flags
*/
private int flags;
/**
* The list of configured replacement patterns
*/
private ArrayList patterns;
/**
* Default constructor for the replacement object
*/
public Replace() {
this.flags = REPLACE_NONE;
this.patterns = new ArrayList();
}
/**
* calls {@link #replace(String, int)} with wref=0
*/
public String replace(String source) {
return replace(source, 0);
}
/**
* The real replacement of the patterns within the input takes place here.
*
* @param source The String to do the work in
* @param wref Reference weight (?)
*/
public String replace(String source, int wref) {
log.debug("replace: String \"{}\" with weight reference {}", source,
new Integer(wref));
// Return empty string if source is empty
if (source == null || source.length() == 0) return "";
// Return source if there is no pattern
if (patterns.size() == 0) return source;
// Get the occurrences list
char[] src = source.toCharArray();
Occurrence[] occ = getOccurrences(src);
// Process the occurrences
processOccurrences(occ, wref);
// Now let the replacment occurr
return doReplace(src, occ);
}
// ---------- property accessors
// --------------------------------------------
/**
* Set the indicated replacement flags
*
* @param flags The flags to set
*/
public void setFlags(int flags) {
this.flags = flags;
}
/**
* Returns the current flags
*
* @return the current flags
*/
public int getFlags() {
return flags;
}
/**
* Insert the pattern at the indicated position in the internal pattern list
*
* @param pos The position to insert the pattern at
* @param pattern The pattern String for the new pattern
* @param replacement The replacement String for the new pattern
*/
public void addPattern(int pos, String pattern, String replacement) {
log.debug(
"addPattern: adding pattern \"{}\" to be replaced by \"{}\" as #{}",
new Object[] { pattern, replacement, new Integer(pos) });
patterns.add(pos, new Pattern(pattern, replacement));
}
/**
* Append the pattern to the internal pattern list
*
* @param pattern The pattern String for the new pattern
* @param replacement The replacement String for the new pattern
*/
public void addPattern(String pattern, String replacement) {
addPattern(patterns.size(), pattern, replacement);
}
/**
* Append all the pattern/replacement String pairs to the list. Each entry
* is supposed to contain at least two entries.
*
* @param prPairs An Array of pattern/replacement String pairs.
*/
public void addPatterns(String[][] prPairs) {
for (int i = 0; i < prPairs.length; i++) {
String[] pair = prPairs[i];
if (pair == null || pair.length < 2) {
// ignore empty or imcomplete pattern pair
continue;
}
addPattern(patterns.size(), pair[0], pair[1]);
}
}
/**
* Returns an iterator over the existing patterns
*
* @return an iterator over the existing patterns
*/
public Iterator getPatterns() {
return patterns.iterator();
}
// ---------- internal
// ------------------------------------------------------
/**
* Check whether a character represents a word boundary in the sense of the
* Replace
class. Note that this need not be the same as
* {@link Character.isDelimiter(char)}
*
* @param c The character to check
* @return True if the character is a word delimiter
*/
private final boolean isWordDelim(char c) {
return (c == '\0' || c == '/' || c == ' ' || c == '\r' || c == '\n'
|| c == '\t' || c == '.' || c == ';' || c == '(' || c == ')'
|| c == ',' || c == '<' || c == '\"' || c == '\\');
}
/**
* Walk through the source String and mark all occurrences of the patterns
*
* @param source The source String to analize
* @return The List of occurrences to be later replaced
*/
private Occurrence[] getOccurrences(char[] src) {
log.debug(
"getOccurrences: Getting all occurrences according to the flags {}",
Integer.toHexString(flags));
boolean insidetag = false;
boolean reinit = true;
ArrayList occ = new ArrayList();
int srclen = src.length;
for (int i = 0; i < srclen; i++) {
char c = src[i];
// Check whether we start a tag to ignore
if (c == '<' && ((flags & REPLACE_IGNORE_HTML) != 0) && !insidetag) {
log.debug("Starting an ignored HTML tag at pos " + i);
insidetag = true;
continue;
}
// Check whether we are within an ignored tag
if (insidetag) {
// Possibly end the tag
if (c == '>') {
log.debug("Ending an ignored HTML tag at pos " + i);
insidetag = false;
reinit = true;
}
continue;
}
// Now loop through the patterns
Iterator pIter = patterns.iterator();
while (pIter.hasNext()) {
Pattern pat = (Pattern) pIter.next();
// ignore empty patterns
if (pat.len == 0) continue;
// Get the start of a pattern
int pos = reinit ? 0 : pat.pos;
while (pos > -1 && pat.pattern[pos] != c) {
pos = pat.shift[pos];
}
pos++;
if (pos >= pat.len) {
int beg = i - pos + 1;
int end = i - pos + pat.len; // last char in match
boolean valid = true;
// check word boundaries
if ((flags & REPLACE_WHOLE_WORD) != 0) {
char b = (beg > 0) ? src[beg - 1] : '\0';
char e = src[end + 1];
valid = (isWordDelim(b) && isWordDelim(e));
}
if (valid) {
log.debug("Found pattern \"{}\" at position {}",
pat.pattern, new Integer(beg + 1));
occ.add(new Occurrence(beg, end, pat));
} else {
log.debug("Ignoring pattern \"{}\" at position {}",
pat.pattern, new Integer(beg + 1));
}
pos = pat.shift[pos];
}
pat.pos = pos;
}
reinit = false;
}
log.debug("Found " + occ.size() + " occurrences");
return (Occurrence[]) occ.toArray(new Occurrence[occ.size()]);
}
/**
* Process the occurrences and handle collisions through pattern weighting
*
* @param occ The Occurrences to check for collisions
* @param wref The reference weight
* @return ???
*/
private void processOccurrences(Occurrence[] occ, int wref) {
int awref = java.lang.Math.abs(wref);
int numOccs = occ.length;
log.debug("Detecting collisions on " + occ.length + " patterns");
for (int i = 0; i < numOccs; i++) {
// if an occurrence has been invalidated, ignore
if (occ[i].pattern == null) {
log.debug("The occurrence has been invalidated: #{}, [{}-{}]",
new Object[] { new Integer(i), new Integer(occ[i].begin),
new Integer(occ[i].end) });
continue;
}
int p = i + 1;
int best = i;
int weight = java.lang.Math.abs(occ[i].pattern.weight - awref);
while (p < numOccs && occ[p].begin <= occ[i].end) {
if (occ[p].pattern != null
&& ((occ[i].end >= occ[p].begin && occ[i].end <= occ[p].end) || (occ[p].begin >= occ[i].begin && occ[p].begin <= occ[i].end))) {
int pweight = java.lang.Math.abs(occ[p].pattern.weight
- awref);
// sign of wref determines comparison
if (wref < 0) {
if (pweight < weight) {
weight = pweight;
occ[best].pattern = null; // invalidate looser
best = p;
} else {
occ[p].pattern = null; // invalidate looser
}
if (pweight > weight) {
weight = pweight;
occ[best].pattern = null; // invalidate looser
best = p;
} else {
occ[p].pattern = null; // invalidate looser
}
}
}
p++;
}
}
}
/**
* Do the actual replacment of all the pattern occurrences found in the
* source String.
*
* @param source The source String
* @param occ The occurrences with possibly invalidated occurrences
*/
private String doReplace(char[] src, Occurrence[] occ) {
StringBuffer dest = new StringBuffer(src.length);
int p = 0;
log.debug("Doing the replacment of all valid occurrences");
for (int i = 0; i < occ.length; i++) {
// handle valid occurrences only
if (occ[i].pattern != null) {
log.debug("Replacing \"{}\" by \"{}\" @{}", new Object[] {
occ[i].pattern.pattern, occ[i].pattern.replace,
new Integer(occ[i].begin) });
// Append unmatched part before occurrence
dest.append(src, p, occ[i].begin - p);
// Replace occurrence
dest.append(occ[i].pattern.replace);
// Move on within the string
p = occ[i].end + 1;
}
}
// Append last part of the source string
if (p < src.length) {
dest.append(src, p, src.length - p);
}
return dest.toString();
}
// ---------- Internal data helper classes
// ----------------------------------
/**
* The Pattern
class abstracts the notion of a replacement
* pattern. This pattern supports an optimized comparison algorithm in that
* it is not always needed to re-check all of the pattern, if the pattern
* has common subpatterns, such as 'tata'.
*/
private static final class Pattern {
/**
* The search pattern to be replaced
*/
public char[] pattern;
/**
* The replacement String
*/
public String replace;
/**
* New position offsets for next comparison. This implements the
* comparison optimazation step.
*/
public int[] shift;
/**
* Next position within the pattern to compare. This position is
* dependant of the already matched part and the shift table. Usually if
* a match occurrs, this position is incremented. If there is no match,
* the position is set according to the shift table entry for the
* position at which the non-match occurred.
*/
public int pos;
/**
* Length of the pattern string
*/
public int len;
/**
* Weight of the pattern. Currently the weight of the pattern is
* the same as its character length.
*/
public int weight;
/**
* Create and analize a new pattern. During the analysis the shift
* table is built according to the inner structure of the pattern.
*
* @param pattern The pattern String to be replaced
* @param replace The String replacement
*/
public Pattern(String pattern, String replace) {
// init Pattern
this.pattern = pattern.toCharArray();
this.len = pattern.length();
this.replace = replace;
this.shift = new int[len + 1]; // Prepare the shift array
this.weight = len; // The pattern weight is the length
// Prepare shifts
int j = shift[0] = -1;
for (int i = 0; i < len;) {
while (j > -1 && this.pattern[i] != this.pattern[j])
j = shift[j];
i++;
j++;
shift[i] = ((i < len && this.pattern[i] == this.pattern[j]) || (i == j))
? shift[j]
: j;
}
}
}
/**
* The Occurrence
class abstracts the notion of an occurrence
* of a pattern string within the source string, which might later be
* replaced by the indicated pattern.
*/
private static final class Occurrence {
/**
* Start of this occurrence within the source string
*/
public int begin;
/**
* End of this occurrence within the source string
*/
public int end;
/**
* The pattern associated with this occurrence. If during occurrence
* processing an occurrence becomes invalid, this field will be set
* to null.
*/
public Pattern pattern;
/**
* Create a new occurrence.
*
* @param begin The starting point of the occurrence in the string
* @param end The ending point (last character) of the occurrence
* @param pattern The pattern applying for this occurrence
*/
public Occurrence(int begin, int end, Pattern pattern) {
this.begin = begin;
this.end = end;
this.pattern = pattern;
}
}
}