com.ibm.icu.text.BidiWriter Maven / Gradle / Ivy
The newest version!
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
*******************************************************************************
* Copyright (C) 2001-2010, International Business Machines
* Corporation and others. All Rights Reserved.
*******************************************************************************
*/
/* Written by Simon Montagu, Matitiahu Allouche
* (ported from C code written by Markus W. Scherer)
*/
package com.ibm.icu.text;
import com.ibm.icu.lang.UCharacter;
final class BidiWriter {
/** Bidi control code points */
static final char LRM_CHAR = 0x200e;
static final char RLM_CHAR = 0x200f;
static final int MASK_R_AL = (1 << UCharacter.RIGHT_TO_LEFT |
1 << UCharacter.RIGHT_TO_LEFT_ARABIC);
private static boolean IsCombining(int type)
{
return ((1< 0);
break;
case Bidi.KEEP_BASE_COMBINING:
/*
* Here, too, the destination
* run will have the same length as the source run,
* and there is no mirroring.
* We do need to keep combining characters with their base
* characters.
*/
srcLength = src.length();
/* preserve character integrity */
do {
/* i is always after the last code unit known to need to be kept
* in this segment */
int c;
int i = srcLength;
/* collect code units and modifier letters for one base
* character */
do {
c = UTF16.charAt(src, srcLength - 1);
srcLength -= UTF16.getCharCount(c);
} while(srcLength > 0 && IsCombining(UCharacter.getType(c)));
/* copy this "user character" */
dest.append(src.substring(srcLength, i));
} while(srcLength > 0);
break;
default:
/*
* With several "complicated" options set, this is the most
* general and the slowest copying of an RTL run.
* We will do mirroring, remove Bidi controls, and
* keep combining characters with their base characters
* as requested.
*/
srcLength = src.length();
/* preserve character integrity */
do {
/* i is always after the last code unit known to need to be kept
* in this segment */
int i = srcLength;
/* collect code units for one base character */
int c = UTF16.charAt(src, srcLength - 1);
srcLength -= UTF16.getCharCount(c);
if ((options & Bidi.KEEP_BASE_COMBINING) != 0) {
/* collect modifier letters for this base character */
while(srcLength > 0 && IsCombining(UCharacter.getType(c))) {
c = UTF16.charAt(src, srcLength - 1);
srcLength -= UTF16.getCharCount(c);
}
}
if ((options & Bidi.REMOVE_BIDI_CONTROLS) != 0 &&
Bidi.IsBidiControlChar(c)) {
/* do not copy this Bidi control character */
continue;
}
/* copy this "user character" */
int j = srcLength;
if((options & Bidi.DO_MIRRORING) != 0) {
/* mirror only the base character */
c = UCharacter.getMirror(c);
UTF16.append(dest, c);
j += UTF16.getCharCount(c);
}
dest.append(src.substring(j, i));
} while(srcLength > 0);
break;
} /* end of switch */
return dest.toString();
}
static String doWriteReverse(char[] text, int start, int limit, int options)
{
return writeReverse(new String(text, start, limit - start), options);
}
static String writeReordered(Bidi bidi, int options)
{
int run, runCount;
StringBuilder dest;
char[] text = bidi.text;
runCount = bidi.countRuns();
/*
* Option "insert marks" implies Bidi.INSERT_LRM_FOR_NUMERIC if the
* reordering mode (checked below) is appropriate.
*/
if ((bidi.reorderingOptions & Bidi.OPTION_INSERT_MARKS) != 0) {
options |= Bidi.INSERT_LRM_FOR_NUMERIC;
options &= ~Bidi.REMOVE_BIDI_CONTROLS;
}
/*
* Option "remove controls" implies Bidi.REMOVE_BIDI_CONTROLS
* and cancels Bidi.INSERT_LRM_FOR_NUMERIC.
*/
if ((bidi.reorderingOptions & Bidi.OPTION_REMOVE_CONTROLS) != 0) {
options |= Bidi.REMOVE_BIDI_CONTROLS;
options &= ~Bidi.INSERT_LRM_FOR_NUMERIC;
}
/*
* If we do not perform the "inverse Bidi" algorithm, then we
* don't need to insert any LRMs, and don't need to test for it.
*/
if ((bidi.reorderingMode != Bidi.REORDER_INVERSE_NUMBERS_AS_L) &&
(bidi.reorderingMode != Bidi.REORDER_INVERSE_LIKE_DIRECT) &&
(bidi.reorderingMode != Bidi.REORDER_INVERSE_FOR_NUMBERS_SPECIAL) &&
(bidi.reorderingMode != Bidi.REORDER_RUNS_ONLY)) {
options &= ~Bidi.INSERT_LRM_FOR_NUMERIC;
}
dest = new StringBuilder((options & Bidi.INSERT_LRM_FOR_NUMERIC) != 0 ?
bidi.length * 2 : bidi.length);
/*
* Iterate through all visual runs and copy the run text segments to
* the destination, according to the options.
*
* The tests for where to insert LRMs ignore the fact that there may be
* BN codes or non-BMP code points at the beginning and end of a run;
* they may insert LRMs unnecessarily but the tests are faster this way
* (this would have to be improved for UTF-8).
*/
if ((options & Bidi.OUTPUT_REVERSE) == 0) {
/* forward output */
if ((options & Bidi.INSERT_LRM_FOR_NUMERIC) == 0) {
/* do not insert Bidi controls */
for (run = 0; run < runCount; ++run) {
BidiRun bidiRun = bidi.getVisualRun(run);
if (bidiRun.isEvenRun()) {
dest.append(doWriteForward(text, bidiRun.start,
bidiRun.limit,
options & ~Bidi.DO_MIRRORING));
} else {
dest.append(doWriteReverse(text, bidiRun.start,
bidiRun.limit, options));
}
}
} else {
/* insert Bidi controls for "inverse Bidi" */
byte[] dirProps = bidi.dirProps;
char uc;
int markFlag;
for (run = 0; run < runCount; ++run) {
BidiRun bidiRun = bidi.getVisualRun(run);
markFlag=0;
/* check if something relevant in insertPoints */
markFlag = bidi.runs[run].insertRemove;
if (markFlag < 0) { /* bidi controls count */
markFlag = 0;
}
if (bidiRun.isEvenRun()) {
if (bidi.isInverse() &&
dirProps[bidiRun.start] != Bidi.L) {
markFlag |= Bidi.LRM_BEFORE;
}
if ((markFlag & Bidi.LRM_BEFORE) != 0) {
uc = LRM_CHAR;
} else if ((markFlag & Bidi.RLM_BEFORE) != 0) {
uc = RLM_CHAR;
} else {
uc = 0;
}
if (uc != 0) {
dest.append(uc);
}
dest.append(doWriteForward(text,
bidiRun.start, bidiRun.limit,
options & ~Bidi.DO_MIRRORING));
if (bidi.isInverse() &&
dirProps[bidiRun.limit - 1] != Bidi.L) {
markFlag |= Bidi.LRM_AFTER;
}
if ((markFlag & Bidi.LRM_AFTER) != 0) {
uc = LRM_CHAR;
} else if ((markFlag & Bidi.RLM_AFTER) != 0) {
uc = RLM_CHAR;
} else {
uc = 0;
}
if (uc != 0) {
dest.append(uc);
}
} else { /* RTL run */
if (bidi.isInverse() &&
!bidi.testDirPropFlagAt(MASK_R_AL,
bidiRun.limit - 1)) {
markFlag |= Bidi.RLM_BEFORE;
}
if ((markFlag & Bidi.LRM_BEFORE) != 0) {
uc = LRM_CHAR;
} else if ((markFlag & Bidi.RLM_BEFORE) != 0) {
uc = RLM_CHAR;
} else {
uc = 0;
}
if (uc != 0) {
dest.append(uc);
}
dest.append(doWriteReverse(text, bidiRun.start,
bidiRun.limit, options));
if(bidi.isInverse() &&
(MASK_R_AL & Bidi.DirPropFlag(dirProps[bidiRun.start])) == 0) {
markFlag |= Bidi.RLM_AFTER;
}
if ((markFlag & Bidi.LRM_AFTER) != 0) {
uc = LRM_CHAR;
} else if ((markFlag & Bidi.RLM_AFTER) != 0) {
uc = RLM_CHAR;
} else {
uc = 0;
}
if (uc != 0) {
dest.append(uc);
}
}
}
}
} else {
/* reverse output */
if((options & Bidi.INSERT_LRM_FOR_NUMERIC) == 0) {
/* do not insert Bidi controls */
for(run = runCount; --run >= 0; ) {
BidiRun bidiRun = bidi.getVisualRun(run);
if (bidiRun.isEvenRun()) {
dest.append(doWriteReverse(text,
bidiRun.start, bidiRun.limit,
options & ~Bidi.DO_MIRRORING));
} else {
dest.append(doWriteForward(text, bidiRun.start,
bidiRun.limit, options));
}
}
} else {
/* insert Bidi controls for "inverse Bidi" */
byte[] dirProps = bidi.dirProps;
for (run = runCount; --run >= 0; ) {
/* reverse output */
BidiRun bidiRun = bidi.getVisualRun(run);
if (bidiRun.isEvenRun()) {
if (dirProps[bidiRun.limit - 1] != Bidi.L) {
dest.append(LRM_CHAR);
}
dest.append(doWriteReverse(text, bidiRun.start,
bidiRun.limit, options & ~Bidi.DO_MIRRORING));
if (dirProps[bidiRun.start] != Bidi.L) {
dest.append(LRM_CHAR);
}
} else {
if ((MASK_R_AL & Bidi.DirPropFlag(dirProps[bidiRun.start])) == 0) {
dest.append(RLM_CHAR);
}
dest.append(doWriteForward(text, bidiRun.start,
bidiRun.limit, options));
if ((MASK_R_AL & Bidi.DirPropFlag(dirProps[bidiRun.limit - 1])) == 0) {
dest.append(RLM_CHAR);
}
}
}
}
}
return dest.toString();
}
}