org.apache.abdera.i18n.text.Bidi Maven / Gradle / Ivy
The newest version!
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. The ASF licenses this file to You
* under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License. For additional information regarding
* copyright in this work, please see the NOTICE file in the top level
* directory of this distribution.
*/
package org.apache.abdera.i18n.text;
import java.text.AttributedString;
import java.util.Arrays;
import org.apache.abdera.i18n.rfc4646.Lang;
/**
* Bidi guessing algorithms
*/
public class Bidi {
public enum Direction {
UNSPECIFIED, LTR, RTL
};
private static final String[] RTL_LANGS = {"ar", "dv", "fa", "he", "ps", "syr", "ur", "yi"};
private static final String[] RTL_SCRIPTS =
{"arab", "avst", "hebr", "hung", "lydi", "mand", "mani", "mero", "mong", "nkoo", "orkh", "phlv", "phnx",
"samr", "syrc", "syre", "syrj", "syrn", "tfng", "thaa"};
// charset encodings that one may typically expect to be RTL
private static final String[] RTL_ENCODINGS =
{"iso-8859-6", "iso-8859-6-bidi", "iso-8859-6-i", "iso-ir-127", "ecma-114", "asmo-708", "arabic",
"csisolatinarabic", "windows-1256", "ibm-864", "macarabic", "macfarsi", "iso-8859-8-i", "iso-8859-8-bidi",
"windows-1255", "iso-8859-8", "ibm-862", "machebrew", "asmo-449", "iso-9036", "arabic7", "iso-ir-89",
"csiso89asmo449", "iso-unicode-ibm-1264", "csunicodeibm1264", "iso_8859-8:1988", "iso-ir-138", "hebrew",
"csisolatinhebrew", "iso-unicode-ibm-1265", "csunicodeibm1265", "cp862", "862", "cspc862latinhebrew"};
/**
* Algorithm that will determine text direction by looking at the characteristics of the language tag. If the tag
* uses a language or script that is known to be RTL, then Direction.RTL will be returned
*/
public static Direction guessDirectionFromLanguage(Lang lang) {
if (lang.getScript() != null) {
String script = lang.getScript().getName();
if (Arrays.binarySearch(RTL_SCRIPTS, script.toLowerCase()) > -1)
return Direction.RTL;
}
String primary = lang.getLanguage().getName();
if (Arrays.binarySearch(RTL_LANGS, primary.toLowerCase()) > -1)
return Direction.RTL;
return Direction.UNSPECIFIED;
}
/**
* Algorithm that will determine text direction by looking at the character set encoding. If the charset is
* typically used for RTL languages, Direction.RTL will be returned
*/
public static Direction guessDirectionFromEncoding(String charset) {
if (charset == null)
return Direction.UNSPECIFIED;
charset = charset.replace('_', '-');
Arrays.sort(RTL_ENCODINGS);
if (Arrays.binarySearch(RTL_ENCODINGS, charset.toLowerCase()) > -1)
return Direction.RTL;
return Direction.UNSPECIFIED;
}
/**
* Algorithm that analyzes properties of the text to determine text direction. If the majority of characters in the
* text are RTL characters, then Direction.RTL will be returned.
*/
public static Direction guessDirectionFromTextProperties(String text) {
if (text != null && text.length() > 0) {
if (text.charAt(0) == 0x200F)
return Direction.RTL; // if using the unicode right-to-left mark
if (text.charAt(0) == 0x200E)
return Direction.LTR; // if using the unicode left-to-right mark
int c = 0;
for (int n = 0; n < text.length(); n++) {
char ch = text.charAt(n);
if (java.text.Bidi.requiresBidi(new char[] {ch}, 0, 1))
c++;
else
c--;
}
return c > 0 ? Direction.RTL : Direction.LTR;
}
return Direction.UNSPECIFIED;
}
/**
* Algorithm that defers to the Java Bidi implementation to determine text direction.
*/
public static Direction guessDirectionFromJavaBidi(String text) {
if (text != null) {
AttributedString s = new AttributedString(text);
java.text.Bidi bidi = new java.text.Bidi(s.getIterator());
return bidi.baseIsLeftToRight() ? Direction.LTR : Direction.RTL;
}
return Direction.UNSPECIFIED;
}
}
© 2015 - 2024 Weber Informatics LLC | Privacy Policy