com.ibm.icu.text.StringReplacer Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of virtdata-lib-realer Show documentation
Show all versions of virtdata-lib-realer Show documentation
With inspiration from other libraries
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html#License
/*
**********************************************************************
* Copyright (c) 2002-2007, International Business Machines Corporation
* and others. All Rights Reserved.
**********************************************************************
* Date Name Description
* 01/14/2002 aliu Creation.
**********************************************************************
*/
package com.ibm.icu.text;
import com.ibm.icu.impl.Utility;
/**
* A replacer that produces static text as its output. The text may
* contain transliterator stand-in characters that represent nested
* UnicodeReplacer objects, making it possible to encode a tree of
* replacers in a StringReplacer. A StringReplacer that contains such
* stand-ins is called a complex StringReplacer. A complex
* StringReplacer has a slower processing loop than a non-complex one.
* @author Alan Liu
*/
class StringReplacer implements UnicodeReplacer {
/**
* Output text, possibly containing stand-in characters that
* represent nested UnicodeReplacers.
*/
private String output;
/**
* Cursor position. Value is ignored if hasCursor is false.
*/
private int cursorPos;
/**
* True if this object outputs a cursor position.
*/
private boolean hasCursor;
/**
* A complex object contains nested replacers and requires more
* complex processing. StringReplacers are initially assumed to
* be complex. If no nested replacers are seen during processing,
* then isComplex is set to false, and future replacements are
* short circuited for better performance.
*/
private boolean isComplex;
/**
* Object that translates stand-in characters in 'output' to
* UnicodeReplacer objects.
*/
private final RuleBasedTransliterator.Data data;
/**
* Construct a StringReplacer that sets the emits the given output
* text and sets the cursor to the given position.
* @param theOutput text that will replace input text when the
* replace() method is called. May contain stand-in characters
* that represent nested replacers.
* @param theCursorPos cursor position that will be returned by
* the replace() method
* @param theData transliterator context object that translates
* stand-in characters to UnicodeReplacer objects
*/
public StringReplacer(String theOutput,
int theCursorPos,
RuleBasedTransliterator.Data theData) {
output = theOutput;
cursorPos = theCursorPos;
hasCursor = true;
data = theData;
isComplex = true;
}
/**
* Construct a StringReplacer that sets the emits the given output
* text and does not modify the cursor.
* @param theOutput text that will replace input text when the
* replace() method is called. May contain stand-in characters
* that represent nested replacers.
* @param theData transliterator context object that translates
* stand-in characters to UnicodeReplacer objects
*/
public StringReplacer(String theOutput,
RuleBasedTransliterator.Data theData) {
output = theOutput;
cursorPos = 0;
hasCursor = false;
data = theData;
isComplex = true;
}
//= public static UnicodeReplacer valueOf(String output,
//= int cursorPos,
//= RuleBasedTransliterator.Data data) {
//= if (output.length() == 1) {
//= char c = output.charAt(0);
//= UnicodeReplacer r = data.lookupReplacer(c);
//= if (r != null) {
//= return r;
//= }
//= }
//= return new StringReplacer(output, cursorPos, data);
//= }
/**
* UnicodeReplacer API
*/
@Override
public int replace(Replaceable text,
int start,
int limit,
int[] cursor) {
int outLen;
int newStart = 0;
// NOTE: It should be possible to _always_ run the complex
// processing code; just slower. If not, then there is a bug
// in the complex processing code.
// Simple (no nested replacers) Processing Code :
if (!isComplex) {
text.replace(start, limit, output);
outLen = output.length();
// Setup default cursor position (for cursorPos within output)
newStart = cursorPos;
}
// Complex (nested replacers) Processing Code :
else {
/* When there are segments to be copied, use the Replaceable.copy()
* API in order to retain out-of-band data. Copy everything to the
* end of the string, then copy them back over the key. This preserves
* the integrity of indices into the key and surrounding context while
* generating the output text.
*/
StringBuffer buf = new StringBuffer();
int oOutput; // offset into 'output'
isComplex = false;
// The temporary buffer starts at tempStart, and extends
// to destLimit + tempExtra. The start of the buffer has a single
// character from before the key. This provides style
// data when addition characters are filled into the
// temporary buffer. If there is nothing to the left, use
// the non-character U+FFFF, which Replaceable subclasses
// should treat specially as a "no-style character."
// destStart points to the point after the style context
// character, so it is tempStart+1 or tempStart+2.
int tempStart = text.length(); // start of temp buffer
int destStart = tempStart; // copy new text to here
if (start > 0) {
int len = UTF16.getCharCount(text.char32At(start-1));
text.copy(start-len, start, tempStart);
destStart += len;
} else {
text.replace(tempStart, tempStart, "\uFFFF");
destStart++;
}
int destLimit = destStart;
int tempExtra = 0; // temp chars after destLimit
for (oOutput=0; oOutput 0) {
text.replace(destLimit, destLimit, buf.toString());
destLimit += buf.length();
buf.setLength(0);
}
// Delegate output generation to replacer object
int len = r.replace(text, destLimit, destLimit, cursor);
destLimit += len;
}
oOutput = nextIndex;
}
// Insert any accumulated straight text.
if (buf.length() > 0) {
text.replace(destLimit, destLimit, buf.toString());
destLimit += buf.length();
}
if (oOutput == cursorPos) {
// Record the position of the cursor
newStart = destLimit - destStart; // relative to start
}
outLen = destLimit - destStart;
// Copy new text to start, and delete it
text.copy(destStart, destLimit, start);
text.replace(tempStart + outLen, destLimit + tempExtra + outLen, "");
// Delete the old text (the key)
text.replace(start + outLen, limit + outLen, "");
}
if (hasCursor) {
// Adjust the cursor for positions outside the key. These
// refer to code points rather than code units. If cursorPos
// is within the output string, then use newStart, which has
// already been set above.
if (cursorPos < 0) {
newStart = start;
int n = cursorPos;
// Outside the output string, cursorPos counts code points
while (n < 0 && newStart > 0) {
newStart -= UTF16.getCharCount(text.char32At(newStart-1));
++n;
}
newStart += n;
} else if (cursorPos > output.length()) {
newStart = start + outLen;
int n = cursorPos - output.length();
// Outside the output string, cursorPos counts code points
while (n > 0 && newStart < text.length()) {
newStart += UTF16.getCharCount(text.char32At(newStart));
--n;
}
newStart += n;
} else {
// Cursor is within output string. It has been set up above
// to be relative to start.
newStart += start;
}
cursor[0] = newStart;
}
return outLen;
}
/**
* UnicodeReplacer API
*/
@Override
public String toReplacerPattern(boolean escapeUnprintable) {
StringBuffer rule = new StringBuffer();
StringBuffer quoteBuf = new StringBuffer();
int cursor = cursorPos;
// Handle a cursor preceding the output
if (hasCursor && cursor < 0) {
while (cursor++ < 0) {
Utility.appendToRule(rule, '@', true, escapeUnprintable, quoteBuf);
}
// Fall through and append '|' below
}
for (int i=0; i rather than >= because
// if cursor == output.length() it is at the end of the output,
// which is the default position, so we need not emit it.
if (hasCursor && cursor > output.length()) {
cursor -= output.length();
while (cursor-- > 0) {
Utility.appendToRule(rule, '@', true, escapeUnprintable, quoteBuf);
}
Utility.appendToRule(rule, '|', true, escapeUnprintable, quoteBuf);
}
// Flush quoteBuf out to result
Utility.appendToRule(rule, -1,
true, escapeUnprintable, quoteBuf);
return rule.toString();
}
/**
* Union the set of all characters that may output by this object
* into the given set.
* @param toUnionTo the set into which to union the output characters
*/
@Override
public void addReplacementSetTo(UnicodeSet toUnionTo) {
int ch;
for (int i=0; i
© 2015 - 2025 Weber Informatics LLC | Privacy Policy