com.ibm.icu.text.StringReplacer Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of icu4j Show documentation
Show all versions of icu4j Show documentation
International Component for Unicode for Java (ICU4J) is a mature, widely used Java library
providing Unicode and Globalization support
/*
**********************************************************************
* Copyright (c) 2002-2007, International Business Machines Corporation
* and others. All Rights Reserved.
**********************************************************************
* Date Name Description
* 01/14/2002 aliu Creation.
**********************************************************************
*/
package com.ibm.icu.text;
import com.ibm.icu.impl.Utility;
/**
* A replacer that produces static text as its output. The text may
* contain transliterator stand-in characters that represent nested
* UnicodeReplacer objects, making it possible to encode a tree of
* replacers in a StringReplacer. A StringReplacer that contains such
* stand-ins is called a complex StringReplacer. A complex
* StringReplacer has a slower processing loop than a non-complex one.
* @author Alan Liu
*/
class StringReplacer implements UnicodeReplacer {
/**
* Output text, possibly containing stand-in characters that
* represent nested UnicodeReplacers.
*/
private String output;
/**
* Cursor position. Value is ignored if hasCursor is false.
*/
private int cursorPos;
/**
* True if this object outputs a cursor position.
*/
private boolean hasCursor;
/**
* A complex object contains nested replacers and requires more
* complex processing. StringReplacers are initially assumed to
* be complex. If no nested replacers are seen during processing,
* then isComplex is set to false, and future replacements are
* short circuited for better performance.
*/
private boolean isComplex;
/**
* Object that translates stand-in characters in 'output' to
* UnicodeReplacer objects.
*/
private final RuleBasedTransliterator.Data data;
/**
* Construct a StringReplacer that sets the emits the given output
* text and sets the cursor to the given position.
* @param theOutput text that will replace input text when the
* replace() method is called. May contain stand-in characters
* that represent nested replacers.
* @param theCursorPos cursor position that will be returned by
* the replace() method
* @param theData transliterator context object that translates
* stand-in characters to UnicodeReplacer objects
*/
public StringReplacer(String theOutput,
int theCursorPos,
RuleBasedTransliterator.Data theData) {
output = theOutput;
cursorPos = theCursorPos;
hasCursor = true;
data = theData;
isComplex = true;
}
/**
* Construct a StringReplacer that sets the emits the given output
* text and does not modify the cursor.
* @param theOutput text that will replace input text when the
* replace() method is called. May contain stand-in characters
* that represent nested replacers.
* @param theData transliterator context object that translates
* stand-in characters to UnicodeReplacer objects
*/
public StringReplacer(String theOutput,
RuleBasedTransliterator.Data theData) {
output = theOutput;
cursorPos = 0;
hasCursor = false;
data = theData;
isComplex = true;
}
//= public static UnicodeReplacer valueOf(String output,
//= int cursorPos,
//= RuleBasedTransliterator.Data data) {
//= if (output.length() == 1) {
//= char c = output.charAt(0);
//= UnicodeReplacer r = data.lookupReplacer(c);
//= if (r != null) {
//= return r;
//= }
//= }
//= return new StringReplacer(output, cursorPos, data);
//= }
/**
* UnicodeReplacer API
*/
public int replace(Replaceable text,
int start,
int limit,
int[] cursor) {
int outLen;
int newStart = 0;
// NOTE: It should be possible to _always_ run the complex
// processing code; just slower. If not, then there is a bug
// in the complex processing code.
// Simple (no nested replacers) Processing Code :
if (!isComplex) {
text.replace(start, limit, output);
outLen = output.length();
// Setup default cursor position (for cursorPos within output)
newStart = cursorPos;
}
// Complex (nested replacers) Processing Code :
else {
/* When there are segments to be copied, use the Replaceable.copy()
* API in order to retain out-of-band data. Copy everything to the
* end of the string, then copy them back over the key. This preserves
* the integrity of indices into the key and surrounding context while
* generating the output text.
*/
StringBuffer buf = new StringBuffer();
int oOutput; // offset into 'output'
isComplex = false;
// The temporary buffer starts at tempStart, and extends
// to destLimit + tempExtra. The start of the buffer has a single
// character from before the key. This provides style
// data when addition characters are filled into the
// temporary buffer. If there is nothing to the left, use
// the non-character U+FFFF, which Replaceable subclasses
// should treat specially as a "no-style character."
// destStart points to the point after the style context
// character, so it is tempStart+1 or tempStart+2.
int tempStart = text.length(); // start of temp buffer
int destStart = tempStart; // copy new text to here
if (start > 0) {
int len = UTF16.getCharCount(text.char32At(start-1));
text.copy(start-len, start, tempStart);
destStart += len;
} else {
text.replace(tempStart, tempStart, "\uFFFF");
destStart++;
}
int destLimit = destStart;
int tempExtra = 0; // temp chars after destLimit
for (oOutput=0; oOutput 0) {
text.replace(destLimit, destLimit, buf.toString());
destLimit += buf.length();
buf.setLength(0);
}
// Delegate output generation to replacer object
int len = r.replace(text, destLimit, destLimit, cursor);
destLimit += len;
}
oOutput = nextIndex;
}
// Insert any accumulated straight text.
if (buf.length() > 0) {
text.replace(destLimit, destLimit, buf.toString());
destLimit += buf.length();
}
if (oOutput == cursorPos) {
// Record the position of the cursor
newStart = destLimit - destStart; // relative to start
}
outLen = destLimit - destStart;
// Copy new text to start, and delete it
text.copy(destStart, destLimit, start);
text.replace(tempStart + outLen, destLimit + tempExtra + outLen, "");
// Delete the old text (the key)
text.replace(start + outLen, limit + outLen, "");
}
if (hasCursor) {
// Adjust the cursor for positions outside the key. These
// refer to code points rather than code units. If cursorPos
// is within the output string, then use newStart, which has
// already been set above.
if (cursorPos < 0) {
newStart = start;
int n = cursorPos;
// Outside the output string, cursorPos counts code points
while (n < 0 && newStart > 0) {
newStart -= UTF16.getCharCount(text.char32At(newStart-1));
++n;
}
newStart += n;
} else if (cursorPos > output.length()) {
newStart = start + outLen;
int n = cursorPos - output.length();
// Outside the output string, cursorPos counts code points
while (n > 0 && newStart < text.length()) {
newStart += UTF16.getCharCount(text.char32At(newStart));
--n;
}
newStart += n;
} else {
// Cursor is within output string. It has been set up above
// to be relative to start.
newStart += start;
}
cursor[0] = newStart;
}
return outLen;
}
/**
* UnicodeReplacer API
*/
public String toReplacerPattern(boolean escapeUnprintable) {
StringBuffer rule = new StringBuffer();
StringBuffer quoteBuf = new StringBuffer();
int cursor = cursorPos;
// Handle a cursor preceding the output
if (hasCursor && cursor < 0) {
while (cursor++ < 0) {
Utility.appendToRule(rule, '@', true, escapeUnprintable, quoteBuf);
}
// Fall through and append '|' below
}
for (int i=0; i rather than >= because
// if cursor == output.length() it is at the end of the output,
// which is the default position, so we need not emit it.
if (hasCursor && cursor > output.length()) {
cursor -= output.length();
while (cursor-- > 0) {
Utility.appendToRule(rule, '@', true, escapeUnprintable, quoteBuf);
}
Utility.appendToRule(rule, '|', true, escapeUnprintable, quoteBuf);
}
// Flush quoteBuf out to result
Utility.appendToRule(rule, -1,
true, escapeUnprintable, quoteBuf);
return rule.toString();
}
/**
* Union the set of all characters that may output by this object
* into the given set.
* @param toUnionTo the set into which to union the output characters
*/
public void addReplacementSetTo(UnicodeSet toUnionTo) {
int ch;
for (int i=0; i