com.ibm.icu.text.BidiTransform Maven / Gradle / Ivy
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html#License
package com.ibm.icu.text;
import com.ibm.icu.lang.UCharacter;
/**
* Bidi Layout Transformation Engine.
*
* @author Lina Kemmel
*
* @draft ICU 58
* @provisional This API might change or be removed in a future release.
*/
public class BidiTransform
{
/**
* {@link Order}
indicates the order of text.
*
* This bidi transformation engine supports all possible combinations (4 in
* total) of input and output text order:
*
* - {logical input, visual output}: unless the output direction is RTL,
* this corresponds to a normal operation of the Bidi algorithm as
* described in the Unicode Technical Report and implemented by
*
{@link Bidi}
when the reordering mode is set to
* Bidi#REORDER_DEFAULT
. Visual RTL mode is not supported by
* {@link Bidi}
and is accomplished through reversing a visual
* LTR string,
* - {visual input, logical output}: unless the input direction is RTL,
* this corresponds to an "inverse bidi algorithm" in
*
{@link Bidi}
with the reordering mode set to
* {@link Bidi#REORDER_INVERSE_LIKE_DIRECT}
. Visual RTL mode
* is not not supported by {@link Bidi}
and is accomplished
* through reversing a visual LTR string,
* - {logical input, logical output}: if the input and output base
* directions mismatch, this corresponds to the
{@link Bidi}
* implementation with the reordering mode set to
* {@link Bidi#REORDER_RUNS_ONLY}
; and if the input and output
* base directions are identical, the transformation engine will only
* handle character mirroring and Arabic shaping operations without
* reordering,
* - {visual input, visual output}: this reordering mode is not supported
* by the
{@link Bidi}
engine; it implies character mirroring,
* Arabic shaping, and - if the input/output base directions mismatch -
* string reverse operations.
*
*
* @see Bidi#setInverse
* @see Bidi#setReorderingMode
* @see Bidi#REORDER_DEFAULT
* @see Bidi#REORDER_INVERSE_LIKE_DIRECT
* @see Bidi#REORDER_RUNS_ONLY
* @draft ICU 58
* @provisional This API might change or be removed in a future release.
*/
public enum Order {
/**
* Constant indicating a logical order.
*
* @draft ICU 58
* @provisional This API might change or be removed in a future release.
*/
LOGICAL,
/**
* Constant indicating a visual order.
*
* @draft ICU 58
* @provisional This API might change or be removed in a future release.
*/
VISUAL;
}
/**
* {@link Mirroring}
indicates whether or not characters with
* the "mirrored" property in RTL runs should be replaced with their
* mirror-image counterparts.
*
* @see Bidi#DO_MIRRORING
* @see Bidi#setReorderingOptions
* @see Bidi#writeReordered
* @see Bidi#writeReverse
* @draft ICU 58
* @provisional This API might change or be removed in a future release.
*/
public enum Mirroring {
/**
* Constant indicating that character mirroring should not be
* performed.
*
* @draft ICU 58
* @provisional This API might change or be removed in a future release.
*/
OFF,
/**
* Constant indicating that character mirroring should be performed.
*
* This corresponds to calling {@link Bidi#writeReordered}
* or {@link Bidi#writeReverse}
with the
* {@link Bidi#DO_MIRRORING}
option bit set.
*
* @draft ICU 58
* @provisional This API might change or be removed in a future release.
*/
ON;
}
private Bidi bidi;
private String text;
private int reorderingOptions;
private int shapingOptions;
/**
* {@link BidiTransform}
default constructor.
*
* @draft ICU 58
* @provisional This API might change or be removed in a future release.
*/
public BidiTransform()
{
}
/**
* Performs transformation of text from the bidi layout defined by the
* input ordering scheme to the bidi layout defined by the output ordering
* scheme, and applies character mirroring and Arabic shaping operations.
*
* In terms of {@link Bidi}
class, such a transformation
* implies:
*
* - calling
{@link Bidi#setReorderingMode}
as needed (when
* the reordering mode is other than normal),
* - calling
{@link Bidi#setInverse}
as needed (when text
* should be transformed from a visual to a logical form),
* - resolving embedding levels of each character in the input text by
* calling
{@link Bidi#setPara}
,
* - reordering the characters based on the computed embedding levels,
* also performing character mirroring as needed, and streaming the result
* to the output, by calling
{@link Bidi#writeReordered}
,
* - performing Arabic digit and letter shaping on the output text by
* calling
{@link ArabicShaping#shape}
.
*
* An "ordering scheme" encompasses the base direction and the order of
* text, and these characteristics must be defined by the caller for both
* input and output explicitly .
* There are 36 possible combinations of {input, output} ordering schemes,
* which are partially supported by {@link Bidi}
already.
* Examples of the currently supported combinations:
*
* - {Logical LTR, Visual LTR}: this is equivalent to calling
*
{@link Bidi#setPara}
with
* paraLevel == {@link Bidi#LTR}
,
* - {Logical RTL, Visual LTR}: this is equivalent to calling
*
{@link Bidi#setPara}
with
* paraLevel == {@link Bidi#RTL}
,
* - {Logical Default ("Auto") LTR, Visual LTR}: this is equivalent to
* calling
{@link Bidi#setPara}
with
* paraLevel == {@link Bidi#LEVEL_DEFAULT_LTR}
,
* - {Logical Default ("Auto") RTL, Visual LTR}: this is equivalent to
* calling
{@link Bidi#setPara}
with
* paraLevel == {@link Bidi#LEVEL_DEFAULT_RTL}
,
* - {Visual LTR, Logical LTR}: this is equivalent to
* calling
{@link Bidi#setInverse}(true)
and then
* {@link Bidi#setPara}
with
* paraLevel == {@link Bidi#LTR}
,
* - {Visual LTR, Logical RTL}: this is equivalent to calling
*
{@link Bidi#setInverse}(true)
and then
* {@link Bidi#setPara}
with
* paraLevel == {@link Bidi#RTL}
.
*
* All combinations that involve the Visual RTL scheme are unsupported by
* {@link Bidi}
, for instance:
*
* - {Logical LTR, Visual RTL},
* - {Visual RTL, Logical RTL}.
*
* Example of usage of the transformation engine:
*
* BidiTransform bidiTransform = new BidiTransform();
* String in = "abc \u06f0123"; // "abc \\u06f0123"
* // Run a transformation.
* String out = bidiTransform.transform(in,
* Bidi.LTR, Order.VISUAL,
* Bidi.RTL, Order.LOGICAL,
* Mirroring.OFF,
* ArabicShaping.DIGITS_AN2EN | ArabicShaping.DIGIT_TYPE_AN_EXTENDED);
* // Result: "0123 abc".
* // Do something with out.
* out = out.replace('0', '4');
* // Result: "4123 abc".
* // Run a reverse transformation.
* String inNew = bidiTransform.transform(out,
* Bidi.RTL, Order.LOGICAL,
* Bidi.LTR, Order.VISUAL,
* Mirroring.OFF,
* ArabicShaping.DIGITS_EN2AN | ArabicShaping.DIGIT_TYPE_AN_EXTENDED);
* // Result: "abc \\u06f4\\u06f1\\u06f2\\u06f3"
*
*
* @param text An input character sequence that the Bidi layout
* transformations will be performed on.
* @param inParaLevel A base embedding level of the input as defined in
* {@link Bidi#setPara(String, byte, byte[])}
* documentation for the paraLevel
parameter.
* @param inOrder An order of the input, which can be one of the
* {@link Order}
values.
* @param outParaLevel A base embedding level of the output as defined in
* {@link Bidi#setPara(String, byte, byte[])}
* documentation for the paraLevel
parameter.
* @param outOrder An order of the output, which can be one of the
* {@link Order}
values.
* @param doMirroring Indicates whether or not to perform character
* mirroring, and can accept one of the
* {@link Mirroring}
values.
* @param shapingOptions Arabic digit and letter shaping options defined in
* the {@link ArabicShaping}
documentation.
* Note: Direction indicator options are
* computed by the transformation engine based on the effective
* ordering schemes, so user-defined direction indicators will be
* ignored.
* @return The output string, which is the result of the layout
* transformation.
* @throws IllegalArgumentException if text
,
* inOrder
, outOrder
, or
* doMirroring
parameter is null
.
* @draft ICU 58
* @provisional This API might change or be removed in a future release.
*/
public String transform(CharSequence text,
byte inParaLevel, Order inOrder,
byte outParaLevel, Order outOrder,
Mirroring doMirroring, int shapingOptions)
{
if (text == null || inOrder == null || outOrder == null || doMirroring == null) {
throw new IllegalArgumentException();
}
this.text = text.toString();
byte[] levels = {inParaLevel, outParaLevel};
resolveBaseDirection(levels);
ReorderingScheme currentScheme = findMatchingScheme(levels[0], inOrder,
levels[1], outOrder);
if (currentScheme != null) {
this.bidi = new Bidi();
this.reorderingOptions = Mirroring.ON.equals(doMirroring)
? Bidi.DO_MIRRORING : Bidi.REORDER_DEFAULT;
/* Ignore TEXT_DIRECTION_* flags, as we apply our own depending on the
text scheme at the time shaping is invoked. */
this.shapingOptions = shapingOptions & ~ArabicShaping.TEXT_DIRECTION_MASK;
currentScheme.doTransform(this);
}
return this.text;
}
/**
* When the direction option is
* {@link Bidi#LEVEL_DEFAULT_LTR}
or
* {@link Bidi#LEVEL_DEFAULT_RTL}
, resolves the base
* direction according to that of the first strong directional character in
* the text.
*
* @param levels Byte array, where levels[0] is an input level levels[1] is
* an output level. Resolved levels override these.
*/
private void resolveBaseDirection(byte[] levels) {
if (Bidi.IsDefaultLevel(levels[0])) {
byte level = Bidi.getBaseDirection(text);
levels[0] = level != Bidi.NEUTRAL ? level
: levels[0] == Bidi.LEVEL_DEFAULT_RTL ? Bidi.RTL : Bidi.LTR;
} else {
levels[0] &= 1;
}
if (Bidi.IsDefaultLevel(levels[1])) {
levels[1] = levels[0];
} else {
levels[1] &= 1;
}
}
/**
* Finds a valid {@link ReorderingScheme}
matching the
* caller-defined scheme.
*
* @return A valid ReorderingScheme
object or null
*/
private ReorderingScheme findMatchingScheme(byte inLevel, Order inOrder,
byte outLevel, Order outOrder) {
for (ReorderingScheme scheme : ReorderingScheme.values()) {
if (scheme.matches(inLevel, inOrder, outLevel, outOrder)) {
return scheme;
}
}
return null;
}
/**
* Performs bidi resolution of text.
*
* @param level Base embedding level
* @param options Reordering options
*/
private void resolve(byte level, int options) {
bidi.setInverse((options & Bidi.REORDER_INVERSE_LIKE_DIRECT) != 0);
bidi.setReorderingMode(options);
bidi.setPara(text, level, null);
}
/**
* Performs basic reordering of text (Logical LTR or RTL to Visual LTR).
*
*/
private void reorder() {
text = bidi.writeReordered(reorderingOptions);
reorderingOptions = Bidi.REORDER_DEFAULT;
}
/**
* Performs string reverse.
*/
private void reverse() {
text = Bidi.writeReverse(text, Bidi.OPTION_DEFAULT);
}
/**
* Performs character mirroring without reordering. When this method is
* called, {@link #text}
should be in a Logical form.
*/
private void mirror() {
if ((reorderingOptions & Bidi.DO_MIRRORING) == 0) {
return;
}
StringBuffer sb = new StringBuffer(text);
byte[] levels = bidi.getLevels();
for (int i = 0, n = levels.length; i < n;) {
int ch = UTF16.charAt(sb, i);
if ((levels[i] & 1) != 0) {
UTF16.setCharAt(sb, i, UCharacter.getMirror(ch));
}
i += UTF16.getCharCount(ch);
}
text = sb.toString();
reorderingOptions &= ~Bidi.DO_MIRRORING;
}
/**
* Performs digit and letter shaping
*
* @param digitsDir Digit shaping option that indicates whether the text
* should be treated as logical or visual.
* @param lettersDir Letter shaping option that indicates whether the text
* should be treated as logical or visual form (can mismatch the digit
* option).
*/
private void shapeArabic(int digitsDir, int lettersDir) {
if (digitsDir == lettersDir) {
shapeArabic(shapingOptions | digitsDir);
} else {
/* Honor all shape options other than letters (not necessarily digits
only) */
shapeArabic((shapingOptions & ~ArabicShaping.LETTERS_MASK) | digitsDir);
/* Honor all shape options other than digits (not necessarily letters
only) */
shapeArabic((shapingOptions & ~ArabicShaping.DIGITS_MASK) | lettersDir);
}
}
/**
* Performs digit and letter shaping
*
* @param options Shaping options covering both letters and digits
*/
private void shapeArabic(int options) {
if (options != 0) {
ArabicShaping shaper = new ArabicShaping(options);
try {
text = shaper.shape(text);
} catch(ArabicShapingException e) {
}
}
}
private enum ReorderingScheme {
LOG_LTR_TO_VIS_LTR {
@Override
boolean matches(byte inLevel, Order inOrder, byte outLevel, Order outOrder) {
return IsLTR(inLevel) && IsLogical(inOrder)
&& IsLTR(outLevel) && IsVisual(outOrder);
}
@Override
void doTransform(BidiTransform transform) {
transform.shapeArabic(ArabicShaping.TEXT_DIRECTION_LOGICAL, ArabicShaping.TEXT_DIRECTION_LOGICAL);
transform.resolve(Bidi.LTR, Bidi.REORDER_DEFAULT);
transform.reorder();
}
},
LOG_RTL_TO_VIS_LTR {
@Override
boolean matches(byte inLevel, Order inOrder, byte outLevel, Order outOrder) {
return IsRTL(inLevel) && IsLogical(inOrder)
&& IsLTR(outLevel) && IsVisual(outOrder);
}
@Override
void doTransform(BidiTransform transform) {
transform.resolve(Bidi.RTL, Bidi.REORDER_DEFAULT);
transform.reorder();
transform.shapeArabic(ArabicShaping.TEXT_DIRECTION_LOGICAL, ArabicShaping.TEXT_DIRECTION_VISUAL_LTR);
}
},
LOG_LTR_TO_VIS_RTL {
@Override
boolean matches(byte inLevel, Order inOrder, byte outLevel, Order outOrder) {
return IsLTR(inLevel) && IsLogical(inOrder)
&& IsRTL(outLevel) && IsVisual(outOrder);
}
@Override
void doTransform(BidiTransform transform) {
transform.shapeArabic(ArabicShaping.TEXT_DIRECTION_LOGICAL, ArabicShaping.TEXT_DIRECTION_LOGICAL);
transform.resolve(Bidi.LTR, Bidi.REORDER_DEFAULT);
transform.reorder();
transform.reverse();
}
},
LOG_RTL_TO_VIS_RTL {
@Override
boolean matches(byte inLevel, Order inOrder, byte outLevel, Order outOrder) {
return IsRTL(inLevel) && IsLogical(inOrder)
&& IsRTL(outLevel) && IsVisual(outOrder);
}
@Override
void doTransform(BidiTransform transform) {
transform.resolve(Bidi.RTL, Bidi.REORDER_DEFAULT);
transform.reorder();
transform.shapeArabic(ArabicShaping.TEXT_DIRECTION_LOGICAL, ArabicShaping.TEXT_DIRECTION_VISUAL_LTR);
transform.reverse();
}
},
VIS_LTR_TO_LOG_RTL {
@Override
boolean matches(byte inLevel, Order inOrder, byte outLevel, Order outOrder) {
return IsLTR(inLevel) && IsVisual(inOrder)
&& IsRTL(outLevel) && IsLogical(outOrder);
}
@Override
void doTransform(BidiTransform transform) {
transform.shapeArabic(ArabicShaping.TEXT_DIRECTION_LOGICAL, ArabicShaping.TEXT_DIRECTION_VISUAL_LTR);
transform.resolve(Bidi.RTL, Bidi.REORDER_INVERSE_LIKE_DIRECT);
transform.reorder();
}
},
VIS_RTL_TO_LOG_RTL {
@Override
boolean matches(byte inLevel, Order inOrder, byte outLevel, Order outOrder) {
return IsRTL(inLevel) && IsVisual(inOrder)
&& IsRTL(outLevel) && IsLogical(outOrder);
}
@Override
void doTransform(BidiTransform transform) {
transform.reverse();
transform.shapeArabic(ArabicShaping.TEXT_DIRECTION_LOGICAL, ArabicShaping.TEXT_DIRECTION_VISUAL_LTR);
transform.resolve(Bidi.RTL, Bidi.REORDER_INVERSE_LIKE_DIRECT);
transform.reorder();
}
},
VIS_LTR_TO_LOG_LTR {
@Override
boolean matches(byte inLevel, Order inOrder, byte outLevel, Order outOrder) {
return IsLTR(inLevel) && IsVisual(inOrder)
&& IsLTR(outLevel) && IsLogical(outOrder);
}
@Override
void doTransform(BidiTransform transform) {
transform.resolve(Bidi.LTR, Bidi.REORDER_INVERSE_LIKE_DIRECT);
transform.reorder();
transform.shapeArabic(ArabicShaping.TEXT_DIRECTION_LOGICAL, ArabicShaping.TEXT_DIRECTION_LOGICAL);
}
},
VIS_RTL_TO_LOG_LTR {
@Override
boolean matches(byte inLevel, Order inOrder, byte outLevel, Order outOrder) {
return IsRTL(inLevel) && IsVisual(inOrder)
&& IsLTR(outLevel) && IsLogical(outOrder);
}
@Override
void doTransform(BidiTransform transform) {
transform.reverse();
transform.resolve(Bidi.LTR, Bidi.REORDER_INVERSE_LIKE_DIRECT);
transform.reorder();
transform.shapeArabic(ArabicShaping.TEXT_DIRECTION_LOGICAL, ArabicShaping.TEXT_DIRECTION_LOGICAL);
}
},
LOG_LTR_TO_LOG_RTL {
@Override
boolean matches(byte inLevel, Order inOrder, byte outLevel, Order outOrder) {
return IsLTR(inLevel) && IsLogical(inOrder)
&& IsRTL(outLevel) && IsLogical(outOrder);
}
@Override
void doTransform(BidiTransform transform) {
transform.shapeArabic(ArabicShaping.TEXT_DIRECTION_LOGICAL, ArabicShaping.TEXT_DIRECTION_LOGICAL);
transform.resolve(Bidi.LTR, Bidi.REORDER_DEFAULT);
transform.mirror();
transform.resolve(Bidi.LTR, Bidi.REORDER_RUNS_ONLY);
transform.reorder();
}
},
LOG_RTL_TO_LOG_LTR {
@Override
boolean matches(byte inLevel, Order inOrder, byte outLevel, Order outOrder) {
return IsRTL(inLevel) && IsLogical(inOrder)
&& IsLTR(outLevel) && IsLogical(outOrder);
}
@Override
void doTransform(BidiTransform transform) {
transform.resolve(Bidi.RTL, Bidi.REORDER_DEFAULT);
transform.mirror();
transform.resolve(Bidi.RTL, Bidi.REORDER_RUNS_ONLY);
transform.reorder();
transform.shapeArabic(ArabicShaping.TEXT_DIRECTION_LOGICAL, ArabicShaping.TEXT_DIRECTION_LOGICAL);
}
},
VIS_LTR_TO_VIS_RTL {
@Override
boolean matches(byte inLevel, Order inOrder, byte outLevel, Order outOrder) {
return IsLTR(inLevel) && IsVisual(inOrder)
&& IsRTL(outLevel) && IsVisual(outOrder);
}
@Override
void doTransform(BidiTransform transform) {
transform.resolve(Bidi.LTR, Bidi.REORDER_DEFAULT);
transform.mirror();
transform.shapeArabic(ArabicShaping.TEXT_DIRECTION_LOGICAL, ArabicShaping.TEXT_DIRECTION_VISUAL_LTR);
transform.reverse();
}
},
VIS_RTL_TO_VIS_LTR {
@Override
boolean matches(byte inLevel, Order inOrder, byte outLevel, Order outOrder) {
return IsRTL(inLevel) && IsVisual(inOrder)
&& IsLTR(outLevel) && IsVisual(outOrder);
}
@Override
void doTransform(BidiTransform transform) {
transform.reverse();
transform.resolve(Bidi.LTR, Bidi.REORDER_DEFAULT);
transform.mirror();
transform.shapeArabic(ArabicShaping.TEXT_DIRECTION_LOGICAL, ArabicShaping.TEXT_DIRECTION_VISUAL_LTR);
}
},
LOG_LTR_TO_LOG_LTR {
@Override
boolean matches(byte inLevel, Order inOrder, byte outLevel, Order outOrder) {
return IsLTR(inLevel) && IsLogical(inOrder)
&& IsLTR(outLevel) && IsLogical(outOrder);
}
@Override
void doTransform(BidiTransform transform) {
transform.resolve(Bidi.LTR, Bidi.REORDER_DEFAULT);
transform.mirror();
transform.shapeArabic(ArabicShaping.TEXT_DIRECTION_LOGICAL, ArabicShaping.TEXT_DIRECTION_LOGICAL);
}
},
LOG_RTL_TO_LOG_RTL {
@Override
boolean matches(byte inLevel, Order inOrder, byte outLevel, Order outOrder) {
return IsRTL(inLevel) && IsLogical(inOrder)
&& IsRTL(outLevel) && IsLogical(outOrder);
}
@Override
void doTransform(BidiTransform transform) {
transform.resolve(Bidi.RTL, Bidi.REORDER_DEFAULT);
transform.mirror();
transform.shapeArabic(ArabicShaping.TEXT_DIRECTION_VISUAL_LTR, ArabicShaping.TEXT_DIRECTION_LOGICAL);
}
},
VIS_LTR_TO_VIS_LTR {
@Override
boolean matches(byte inLevel, Order inOrder, byte outLevel, Order outOrder) {
return IsLTR(inLevel) && IsVisual(inOrder)
&& IsLTR(outLevel) && IsVisual(outOrder);
}
@Override
void doTransform(BidiTransform transform) {
transform.resolve(Bidi.LTR, Bidi.REORDER_DEFAULT);
transform.mirror();
transform.shapeArabic(ArabicShaping.TEXT_DIRECTION_LOGICAL, ArabicShaping.TEXT_DIRECTION_VISUAL_LTR);
}
},
VIS_RTL_TO_VIS_RTL {
@Override
boolean matches(byte inLevel, Order inOrder, byte outLevel, Order outOrder) {
return IsRTL(inLevel) && IsVisual(inOrder)
&& IsRTL(outLevel) && IsVisual(outOrder);
}
@Override
void doTransform(BidiTransform transform) {
transform.reverse();
transform.resolve(Bidi.LTR, Bidi.REORDER_DEFAULT);
transform.mirror();
transform.shapeArabic(ArabicShaping.TEXT_DIRECTION_LOGICAL, ArabicShaping.TEXT_DIRECTION_VISUAL_LTR);
transform.reverse();
}
};
/**
* Indicates whether this scheme matches another one in terms of
* equality of base direction and ordering scheme.
*
* @param inLevel Base level of the input text
* @param inOrder Order of the input text
* @param outLevel Base level of the output text
* @param outOrder Order of the output text
*
* @return true
if it's a match, false
* otherwise
*/
abstract boolean matches(byte inLevel, Order inOrder, byte outLevel, Order outOrder);
/**
* Performs a series of bidi layout transformations unique for the current
* scheme.
* @param transform Bidi transformation engine
*/
abstract void doTransform(BidiTransform transform);
}
/**
* Is level LTR? convenience method
* @param level Embedding level
*/
private static boolean IsLTR(byte level) {
return (level & 1) == 0;
}
/**
* Is level RTL? convenience method
* @param level Embedding level
*/
private static boolean IsRTL(byte level) {
return (level & 1) == 1;
}
/**
* Is order logical? convenience method
* @param level Order value
*/
private static boolean IsLogical(Order order) {
return Order.LOGICAL.equals(order);
}
/**
* Is order visual? convenience method
* @param level Order value
*/
private static boolean IsVisual(Order order) {
return Order.VISUAL.equals(order);
}
}