com.ibm.icu.text.BidiWriter Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of icu4j Show documentation
Show all versions of icu4j Show documentation
International Component for Unicode for Java (ICU4J) is a mature, widely used Java library
providing Unicode and Globalization support
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
*******************************************************************************
* Copyright (C) 2001-2010, International Business Machines
* Corporation and others. All Rights Reserved.
*******************************************************************************
*/
/* Written by Simon Montagu, Matitiahu Allouche
* (ported from C code written by Markus W. Scherer)
*/
package com.ibm.icu.text;
import com.ibm.icu.lang.UCharacter;
final class BidiWriter {
/** Bidi control code points */
static final char LRM_CHAR = 0x200e;
static final char RLM_CHAR = 0x200f;
static final int MASK_R_AL = (1 << UCharacter.RIGHT_TO_LEFT |
1 << UCharacter.RIGHT_TO_LEFT_ARABIC);
private static boolean IsCombining(int type)
{
return ((1< 0);
break;
case Bidi.KEEP_BASE_COMBINING:
/*
* Here, too, the destination
* run will have the same length as the source run,
* and there is no mirroring.
* We do need to keep combining characters with their base
* characters.
*/
srcLength = src.length();
/* preserve character integrity */
do {
/* i is always after the last code unit known to need to be kept
* in this segment */
int c;
int i = srcLength;
/* collect code units and modifier letters for one base
* character */
do {
c = UTF16.charAt(src, srcLength - 1);
srcLength -= UTF16.getCharCount(c);
} while(srcLength > 0 && IsCombining(UCharacter.getType(c)));
/* copy this "user character" */
dest.append(src.substring(srcLength, i));
} while(srcLength > 0);
break;
default:
/*
* With several "complicated" options set, this is the most
* general and the slowest copying of an RTL run.
* We will do mirroring, remove Bidi controls, and
* keep combining characters with their base characters
* as requested.
*/
srcLength = src.length();
/* preserve character integrity */
do {
/* i is always after the last code unit known to need to be kept
* in this segment */
int i = srcLength;
/* collect code units for one base character */
int c = UTF16.charAt(src, srcLength - 1);
srcLength -= UTF16.getCharCount(c);
if ((options & Bidi.KEEP_BASE_COMBINING) != 0) {
/* collect modifier letters for this base character */
while(srcLength > 0 && IsCombining(UCharacter.getType(c))) {
c = UTF16.charAt(src, srcLength - 1);
srcLength -= UTF16.getCharCount(c);
}
}
if ((options & Bidi.REMOVE_BIDI_CONTROLS) != 0 &&
Bidi.IsBidiControlChar(c)) {
/* do not copy this Bidi control character */
continue;
}
/* copy this "user character" */
int j = srcLength;
if((options & Bidi.DO_MIRRORING) != 0) {
/* mirror only the base character */
c = UCharacter.getMirror(c);
UTF16.append(dest, c);
j += UTF16.getCharCount(c);
}
dest.append(src.substring(j, i));
} while(srcLength > 0);
break;
} /* end of switch */
return dest.toString();
}
static String doWriteReverse(char[] text, int start, int limit, int options)
{
return writeReverse(new String(text, start, limit - start), options);
}
static String writeReordered(Bidi bidi, int options)
{
int run, runCount;
StringBuilder dest;
char[] text = bidi.text;
runCount = bidi.countRuns();
/*
* Option "insert marks" implies Bidi.INSERT_LRM_FOR_NUMERIC if the
* reordering mode (checked below) is appropriate.
*/
if ((bidi.reorderingOptions & Bidi.OPTION_INSERT_MARKS) != 0) {
options |= Bidi.INSERT_LRM_FOR_NUMERIC;
options &= ~Bidi.REMOVE_BIDI_CONTROLS;
}
/*
* Option "remove controls" implies Bidi.REMOVE_BIDI_CONTROLS
* and cancels Bidi.INSERT_LRM_FOR_NUMERIC.
*/
if ((bidi.reorderingOptions & Bidi.OPTION_REMOVE_CONTROLS) != 0) {
options |= Bidi.REMOVE_BIDI_CONTROLS;
options &= ~Bidi.INSERT_LRM_FOR_NUMERIC;
}
/*
* If we do not perform the "inverse Bidi" algorithm, then we
* don't need to insert any LRMs, and don't need to test for it.
*/
if ((bidi.reorderingMode != Bidi.REORDER_INVERSE_NUMBERS_AS_L) &&
(bidi.reorderingMode != Bidi.REORDER_INVERSE_LIKE_DIRECT) &&
(bidi.reorderingMode != Bidi.REORDER_INVERSE_FOR_NUMBERS_SPECIAL) &&
(bidi.reorderingMode != Bidi.REORDER_RUNS_ONLY)) {
options &= ~Bidi.INSERT_LRM_FOR_NUMERIC;
}
dest = new StringBuilder((options & Bidi.INSERT_LRM_FOR_NUMERIC) != 0 ?
bidi.length * 2 : bidi.length);
/*
* Iterate through all visual runs and copy the run text segments to
* the destination, according to the options.
*
* The tests for where to insert LRMs ignore the fact that there may be
* BN codes or non-BMP code points at the beginning and end of a run;
* they may insert LRMs unnecessarily but the tests are faster this way
* (this would have to be improved for UTF-8).
*/
if ((options & Bidi.OUTPUT_REVERSE) == 0) {
/* forward output */
if ((options & Bidi.INSERT_LRM_FOR_NUMERIC) == 0) {
/* do not insert Bidi controls */
for (run = 0; run < runCount; ++run) {
BidiRun bidiRun = bidi.getVisualRun(run);
if (bidiRun.isEvenRun()) {
dest.append(doWriteForward(text, bidiRun.start,
bidiRun.limit,
options & ~Bidi.DO_MIRRORING));
} else {
dest.append(doWriteReverse(text, bidiRun.start,
bidiRun.limit, options));
}
}
} else {
/* insert Bidi controls for "inverse Bidi" */
byte[] dirProps = bidi.dirProps;
char uc;
int markFlag;
for (run = 0; run < runCount; ++run) {
BidiRun bidiRun = bidi.getVisualRun(run);
markFlag=0;
/* check if something relevant in insertPoints */
markFlag = bidi.runs[run].insertRemove;
if (markFlag < 0) { /* bidi controls count */
markFlag = 0;
}
if (bidiRun.isEvenRun()) {
if (bidi.isInverse() &&
dirProps[bidiRun.start] != Bidi.L) {
markFlag |= Bidi.LRM_BEFORE;
}
if ((markFlag & Bidi.LRM_BEFORE) != 0) {
uc = LRM_CHAR;
} else if ((markFlag & Bidi.RLM_BEFORE) != 0) {
uc = RLM_CHAR;
} else {
uc = 0;
}
if (uc != 0) {
dest.append(uc);
}
dest.append(doWriteForward(text,
bidiRun.start, bidiRun.limit,
options & ~Bidi.DO_MIRRORING));
if (bidi.isInverse() &&
dirProps[bidiRun.limit - 1] != Bidi.L) {
markFlag |= Bidi.LRM_AFTER;
}
if ((markFlag & Bidi.LRM_AFTER) != 0) {
uc = LRM_CHAR;
} else if ((markFlag & Bidi.RLM_AFTER) != 0) {
uc = RLM_CHAR;
} else {
uc = 0;
}
if (uc != 0) {
dest.append(uc);
}
} else { /* RTL run */
if (bidi.isInverse() &&
!bidi.testDirPropFlagAt(MASK_R_AL,
bidiRun.limit - 1)) {
markFlag |= Bidi.RLM_BEFORE;
}
if ((markFlag & Bidi.LRM_BEFORE) != 0) {
uc = LRM_CHAR;
} else if ((markFlag & Bidi.RLM_BEFORE) != 0) {
uc = RLM_CHAR;
} else {
uc = 0;
}
if (uc != 0) {
dest.append(uc);
}
dest.append(doWriteReverse(text, bidiRun.start,
bidiRun.limit, options));
if(bidi.isInverse() &&
(MASK_R_AL & Bidi.DirPropFlag(dirProps[bidiRun.start])) == 0) {
markFlag |= Bidi.RLM_AFTER;
}
if ((markFlag & Bidi.LRM_AFTER) != 0) {
uc = LRM_CHAR;
} else if ((markFlag & Bidi.RLM_AFTER) != 0) {
uc = RLM_CHAR;
} else {
uc = 0;
}
if (uc != 0) {
dest.append(uc);
}
}
}
}
} else {
/* reverse output */
if((options & Bidi.INSERT_LRM_FOR_NUMERIC) == 0) {
/* do not insert Bidi controls */
for(run = runCount; --run >= 0; ) {
BidiRun bidiRun = bidi.getVisualRun(run);
if (bidiRun.isEvenRun()) {
dest.append(doWriteReverse(text,
bidiRun.start, bidiRun.limit,
options & ~Bidi.DO_MIRRORING));
} else {
dest.append(doWriteForward(text, bidiRun.start,
bidiRun.limit, options));
}
}
} else {
/* insert Bidi controls for "inverse Bidi" */
byte[] dirProps = bidi.dirProps;
for (run = runCount; --run >= 0; ) {
/* reverse output */
BidiRun bidiRun = bidi.getVisualRun(run);
if (bidiRun.isEvenRun()) {
if (dirProps[bidiRun.limit - 1] != Bidi.L) {
dest.append(LRM_CHAR);
}
dest.append(doWriteReverse(text, bidiRun.start,
bidiRun.limit, options & ~Bidi.DO_MIRRORING));
if (dirProps[bidiRun.start] != Bidi.L) {
dest.append(LRM_CHAR);
}
} else {
if ((MASK_R_AL & Bidi.DirPropFlag(dirProps[bidiRun.start])) == 0) {
dest.append(RLM_CHAR);
}
dest.append(doWriteForward(text, bidiRun.start,
bidiRun.limit, options));
if ((MASK_R_AL & Bidi.DirPropFlag(dirProps[bidiRun.limit - 1])) == 0) {
dest.append(RLM_CHAR);
}
}
}
}
}
return dest.toString();
}
}