All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.abdera.i18n.text.Bidi Maven / Gradle / Ivy

/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements.  The ASF licenses this file to You
* under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.  For additional information regarding
* copyright in this work, please see the NOTICE file in the top level
* directory of this distribution.
*/
package org.apache.abdera.i18n.text;

import java.text.AttributedString;
import java.util.Arrays;

import org.apache.abdera.i18n.rfc4646.Lang;

/**
 * Bidi guessing algorithms
 */
public class Bidi {

  public enum Direction { UNSPECIFIED, LTR, RTL };
  
  private static final String[] RTL_LANGS = {
    "ar",
    "dv",
    "fa",
    "he",
    "ps",
    "syr",
    "ur",
    "yi"};
  
  private static final String[] RTL_SCRIPTS = {
    "arab","avst","hebr","hung","lydi","mand",
    "mani","mero","mong","nkoo","orkh","phlv",
    "phnx","samr","syrc","syre","syrj","syrn",
    "tfng","thaa"
  };
  // charset encodings that one may typically expect to be RTL
  private static final String[] RTL_ENCODINGS = {
    "iso-8859-6", "iso-8859-6-bidi", 
    "iso-8859-6-i", "iso-ir-127", 
    "ecma-114", "asmo-708", "arabic", 
    "csisolatinarabic", "windows-1256", 
    "ibm-864", "macarabic", "macfarsi", 
    "iso-8859-8-i", "iso-8859-8-bidi", 
    "windows-1255", "iso-8859-8", "ibm-862", 
    "machebrew", "asmo-449", "iso-9036", 
    "arabic7", "iso-ir-89", "csiso89asmo449", 
    "iso-unicode-ibm-1264", "csunicodeibm1264", 
    "iso_8859-8:1988", "iso-ir-138", "hebrew", 
    "csisolatinhebrew", "iso-unicode-ibm-1265", 
    "csunicodeibm1265", "cp862", "862", 
    "cspc862latinhebrew"
  };
  
  /**
   * Algorithm that will determine text direction by looking at the 
   * characteristics of the language tag.  If the tag uses a language
   * or script that is known to be RTL, then Direction.RTL will be returned
   */
  public static Direction guessDirectionFromLanguage(Lang lang) {
    if (lang.getScript() != null) {
      String script = lang.getScript().getName();
      if (Arrays.binarySearch(RTL_SCRIPTS, script.toLowerCase()) > -1)
        return Direction.RTL;
    }
    String primary = lang.getLanguage().getName();
    if (Arrays.binarySearch(RTL_LANGS, primary.toLowerCase()) > -1) 
          return Direction.RTL;
    return Direction.UNSPECIFIED;
  }
 
  /**
   * Algorithm that will determine text direction by looking at the 
   * character set encoding.  If the charset is typically used for RTL 
   * languages, Direction.RTL will be returned
   */
  public static Direction guessDirectionFromEncoding(String charset) {
    if (charset == null) return Direction.UNSPECIFIED;
    charset = charset.replace('_', '-');
    Arrays.sort(RTL_ENCODINGS);
    if (Arrays.binarySearch(RTL_ENCODINGS, charset.toLowerCase()) > -1) 
      return Direction.RTL;
    return Direction.UNSPECIFIED;
  }
  
  /**
   * Algorithm that analyzes properties of the text to determine text direction.
   * If the majority of characters in the text are RTL characters, then
   * Direction.RTL will be returned.
   */
  public static Direction guessDirectionFromTextProperties(String text) {
    if (text != null && text.length() > 0) {
      if (text.charAt(0) == 0x200F) return Direction.RTL; // if using the unicode right-to-left mark
      if (text.charAt(0) == 0x200E) return Direction.LTR; // if using the unicode left-to-right mark
      int c = 0;
      for (int n = 0; n < text.length(); n++) {
        char ch = text.charAt(n);
        if (java.text.Bidi.requiresBidi(new char[] {ch}, 0, 1)) c++;
        else c--;
      }
      return c > 0 ? Direction.RTL : Direction.LTR;
    }
    return Direction.UNSPECIFIED;
  }
  
  /**
   * Algorithm that defers to the Java Bidi implementation to determine text direction.
   */
  public static Direction guessDirectionFromJavaBidi(String text) {
    if (text != null) {
      AttributedString s = new AttributedString(text);
      java.text.Bidi bidi = new java.text.Bidi(s.getIterator());
      return bidi.baseIsLeftToRight() ? Direction.LTR : Direction.RTL;
    }
    return Direction.UNSPECIFIED;
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy