Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
/*
*******************************************************************************
* Copyright (C) 2008-2012, Google Inc, International Business Machines Corporation
* and others. All Rights Reserved.
*******************************************************************************
*/
package com.ibm.icu.text;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import java.util.TreeSet;
import com.ibm.icu.impl.MultiComparator;
import com.ibm.icu.lang.UCharacter;
import com.ibm.icu.lang.UProperty;
import com.ibm.icu.lang.UScript;
import com.ibm.icu.text.AlphabeticIndex.Bucket;
import com.ibm.icu.text.AlphabeticIndex.Bucket.LabelType;
import com.ibm.icu.util.LocaleData;
import com.ibm.icu.util.ULocale;
/**
* AlphabeticIndex supports the creation of a UI index appropriate for a given language. It can support either direct
* use, or use with a client that doesn't support localized collation. The following is an example of what an index
* might look like in a UI:
*
*
* ... A B C D E F G H I J K L M N O P Q R S T U V W X Y Z ...
*
* A
* Addison
* Albertson
* Azensky
* B
* Baecker
* ...
*
*
* The class can generate a list of labels for use as a UI "index", that is, a list of clickable characters (or
* character sequences) that allow the user to see a segment (bucket) of a larger "target" list. That is, each label
* corresponds to a bucket in the target list, where everything in the bucket is greater than or equal to the character
* (according to the locale's collation). Strings can be added to the index; they will be in sorted order in the right
* bucket.
*
* The class also supports having buckets for strings before the first (underflow), after the last (overflow), and
* between scripts (inflow). For example, if the index is constructed with labels for Russian and English, Greek
* characters would fall into an inflow bucket between the other two scripts.
*
*
Note: If you expect to have a lot of ASCII or Latin characters as well as characters from the user's language, then it is a good idea to call addLabels(ULocale.English).
*
*
Direct Use
*
The following shows an example of building an index directly.
* The "show..." methods below are just to illustrate usage.
*
*
* // Create a simple index where the values for the strings are Integers, and add the strings
*
* AlphabeticIndex index = new AlphabeticIndex(desiredLocale).addLabels(additionalLocale);
* int counter = 0;
* for (String item : test) {
* index.addRecord(item, counter++);
* }
* ...
* // Show index at top. We could skip or gray out empty buckets
*
* for (AlphabeticIndex.Bucket bucket : index) {
* if (showAll || bucket.size() != 0) {
* showLabelAtTop(UI, bucket.getLabel());
* }
* }
* ...
* // Show the buckets with their contents, skipping empty buckets
*
* for (AlphabeticIndex.Bucket bucket : index) {
* if (bucket.size() != 0) {
* showLabelInList(UI, bucket.getLabel());
* for (AlphabeticIndex.Record item : bucket) {
* showIndexedItem(UI, item.getName(), item.getData());
* }
*
*
* The caller can build different UIs using this class. For example, an index character could be omitted or grayed-out
* if its bucket is empty. Small buckets could also be combined based on size, such as:
*
*
* ... A-F G-N O-Z ...
*
*
*
Client Support
*
* Callers can also use the AlphabeticIndex to support sorting on a client that doesn't support collation.
*
*
getLabels() can be used to get a list of the labels, such as "...", "A", "B",..., and send that list to the client.
*
*
When the client has a new name, it sends that name to the server. The server needs to call the following methods,
* and communicate the bucketIndex and collationKey back to the client.
*
*
The client would put the name (and associated information) into its bucket for bucketIndex. The collationKey is a
* sequence of bytes that can be compared with a binary compare, and produce the right localized result.
*
*
*
* Notes:
*
*
Additional collation parameters can be passed in as part of the locale name. For example, German plus numeric
* sorting would be "de@kn-true".
*
* @author markdavis
* @stable ICU 4.8
*/
public final class AlphabeticIndex implements Iterable> {
/**
* Internals
*/
static final boolean HACK_CODED_FIRSTS = true;
private static UnicodeSet UNIHAN = new UnicodeSet("[:script=Hani:]").freeze();
static final String BASE = "\uFDD0";
// these are generated. Later, get from CLDR data.
static final UnicodeSet PINYIN_LABELS = new UnicodeSet("[A-Z{\uFDD0A}{\uFDD0B}{\uFDD0C}{\uFDD0D}{\uFDD0E}{\uFDD0F}{\uFDD0G}{\uFDD0H}{\uFDD0I}{\uFDD0J}{\uFDD0K}{\uFDD0L}{\uFDD0M}{\uFDD0N}{\uFDD0O}{\uFDD0P}{\uFDD0Q}{\uFDD0R}{\uFDD0S}{\uFDD0T}{\uFDD0U}{\uFDD0V}{\uFDD0W}{\uFDD0X}{\uFDD0Y}{\uFDD0Z}]").freeze();
static final UnicodeSet STROKE_LABELS = new UnicodeSet("[{\uFDD0\u2801}{\uFDD0\u2802}{\uFDD0\u2803}{\uFDD0\u2804}{\uFDD0\u2805}{\uFDD0\u2806}{\uFDD0\u2807}{\uFDD0\u2808}{\uFDD0\u2809}{\uFDD0\u280A}{\uFDD0\u280B}{\uFDD0\u280C}{\uFDD0\u280D}{\uFDD0\u280E}{\uFDD0\u280F}{\uFDD0\u2810}{\uFDD0\u2811}{\uFDD0\u2812}{\uFDD0\u2813}{\uFDD0\u2814}{\uFDD0\u2815}{\uFDD0\u2816}{\uFDD0\u2817}{\uFDD0\u2818}{\uFDD0\u2819}{\uFDD0\u281A}{\uFDD0\u281B}{\uFDD0\u281C}{\uFDD0\u281D}{\uFDD0\u281E}{\uFDD0\u281F}{\uFDD0\u2820}{\uFDD0\u2821}{\uFDD0\u2822}{\uFDD0\u2823}{\uFDD0\u2824}{\uFDD0\u2825}{\uFDD0\u2826}{\uFDD0\u2827}{\uFDD0\u2828}{\uFDD0\u2829}{\uFDD0\u282A}{\uFDD0\u282B}{\uFDD0\u282C}{\uFDD0\u282E}{\uFDD0\u2830}{\uFDD0\u2834}{\uFDD0\u2840}]").freeze();
static final UnicodeSet RADICAL_LABELS = new UnicodeSet("[{\uFDD0\u2E80}{\uFDD0\u2E81}{\uFDD0\u2E84}{\uFDD0\u2E85}{\uFDD0\u2E86}{\uFDD0\u2E87}{\uFDD0\u2E88}{\uFDD0\u2E8A}{\uFDD0\u2E8B}{\uFDD0\u2E8C}{\uFDD0\u2E91}{\uFDD0\u2E92}{\uFDD0\u2E93}{\uFDD0\u2E95}{\uFDD0\u2E97}{\uFDD0\u2E98}{\uFDD0\u2E99}{\uFDD0\u2E9B}{\uFDD0\u2E9D}{\uFDD0\u2E9E}{\uFDD0\u2E9F}{\uFDD0\u2EA0}{\uFDD0\u2EA2}{\uFDD0\u2EA3}{\uFDD0\u2EA4}{\uFDD0\u2EA7}{\uFDD0\u2EA8}{\uFDD0\u2EA9}{\uFDD0\u2EAA}{\uFDD0\u2EAB}{\uFDD0\u2EAC}{\uFDD0\u2EAE}{\uFDD0\u2EAF}{\uFDD0\u2EB0}{\uFDD0\u2EB4}{\uFDD0\u2EB8}{\uFDD0\u2EB9}{\uFDD0\u2EBB}{\uFDD0\u2EBC}{\uFDD0\u2EBD}{\uFDD0\u2EC0}{\uFDD0\u2EC1}{\uFDD0\u2EC2}{\uFDD0\u2EC3}{\uFDD0\u2EC5}{\uFDD0\u2EC6}{\uFDD0\u2EC8}{\uFDD0\u2EC9}{\uFDD0\u2ECA}{\uFDD0\u2ECB}{\uFDD0\u2ECF}{\uFDD0\u2ED0}{\uFDD0\u2ED1}{\uFDD0\u2ED3}{\uFDD0\u2ED4}{\uFDD0\u2ED6}{\uFDD0\u2ED7}{\uFDD0\u2ED8}{\uFDD0\u2ED9}{\uFDD0\u2EDA}{\uFDD0\u2EDB}{\uFDD0\u2EDC}{\uFDD0\u2EDD}{\uFDD0\u2EE0}{\uFDD0\u2EE1}{\uFDD0\u2EE2}{\uFDD0\u2EE3}{\uFDD0\u2EE4}{\uFDD0\u2EE5}{\uFDD0\u2EE6}{\uFDD0\u2EE7}{\uFDD0\u2EE8}{\uFDD0\u2EEA}{\uFDD0\u2EEB}{\uFDD0\u2EED}{\uFDD0\u2EEE}{\uFDD0\u2EEF}{\uFDD0\u2EF0}{\uFDD0\u2EF2}{\uFDD0\u2EF3}{\uFDD0\u2F00}{\uFDD0\u2F01}{\uFDD0\u2F02}{\uFDD0\u2F03}{\uFDD0\u2F05}{\uFDD0\u2F06}{\uFDD0\u2F07}{\uFDD0\u2F09}{\uFDD0\u2F0A}{\uFDD0\u2F0B}{\uFDD0\u2F0D}{\uFDD0\u2F0E}{\uFDD0\u2F10}{\uFDD0\u2F12}{\uFDD0\u2F13}{\uFDD0\u2F14}{\uFDD0\u2F15}{\uFDD0\u2F16}{\uFDD0\u2F17}{\uFDD0\u2F1B}{\uFDD0\u2F1D}{\uFDD0\u2F1E}{\uFDD0\u2F1F}{\uFDD0\u2F20}{\uFDD0\u2F21}{\uFDD0\u2F22}{\uFDD0\u2F23}{\uFDD0\u2F24}{\uFDD0\u2F25}{\uFDD0\u2F26}{\uFDD0\u2F27}{\uFDD0\u2F28}{\uFDD0\u2F2B}{\uFDD0\u2F2C}{\uFDD0\u2F2D}{\uFDD0\u2F2E}{\uFDD0\u2F2F}{\uFDD0\u2F31}{\uFDD0\u2F32}{\uFDD0\u2F34}{\uFDD0\u2F35}{\uFDD0\u2F36}{\uFDD0\u2F37}{\uFDD0\u2F38}{\uFDD0\u2F3A}{\uFDD0\u2F3B}{\uFDD0\u2F3D}{\uFDD0\u2F3E}{\uFDD0\u2F40}{\uFDD0\u2F42}{\uFDD0\u2F43}{\uFDD0\u2F44}{\uFDD0\u2F45}{\uFDD0\u2F46}{\uFDD0\u2F48}{\uFDD0\u2F4A}{\uFDD0\u2F4B}{\uFDD0\u2F4C}{\uFDD0\u2F4E}{\uFDD0\u2F50}{\uFDD0\u2F51}{\uFDD0\u2F53}{\uFDD0\u2F57}{\uFDD0\u2F58}{\uFDD0\u2F59}{\uFDD0\u2F5A}{\uFDD0\u2F5B}{\uFDD0\u2F5E}{\uFDD0\u2F60}{\uFDD0\u2F61}{\uFDD0\u2F62}{\uFDD0\u2F63}{\uFDD0\u2F64}{\uFDD0\u2F65}{\uFDD0\u2F67}{\uFDD0\u2F68}{\uFDD0\u2F69}{\uFDD0\u2F6A}{\uFDD0\u2F6B}{\uFDD0\u2F6D}{\uFDD0\u2F6E}{\uFDD0\u2F6F}{\uFDD0\u2F71}{\uFDD0\u2F72}{\uFDD0\u2F73}{\uFDD0\u2F74}{\uFDD0\u2F76}{\uFDD0\u2F78}{\uFDD0\u2F7B}{\uFDD0\u2F7D}{\uFDD0\u2F7E}{\uFDD0\u2F7F}{\uFDD0\u2F82}{\uFDD0\u2F83}{\uFDD0\u2F84}{\uFDD0\u2F86}{\uFDD0\u2F87}{\uFDD0\u2F88}{\uFDD0\u2F89}{\uFDD0\u2F8A}{\uFDD0\u2F8D}{\uFDD0\u2F8E}{\uFDD0\u2F8F}{\uFDD0\u2F92}{\uFDD0\u2F94}{\uFDD0\u2F95}{\uFDD0\u2F96}{\uFDD0\u2F97}{\uFDD0\u2F98}{\uFDD0\u2F99}{\uFDD0\u2F9A}{\uFDD0\u2F9B}{\uFDD0\u2F9D}{\uFDD0\u2F9E}{\uFDD0\u2F9F}{\uFDD0\u2FA0}{\uFDD0\u2FA1}{\uFDD0\u2FA3}{\uFDD0\u2FA4}{\uFDD0\u2FA5}{\uFDD0\u2FA6}{\uFDD0\u2FA8}{\uFDD0\u2FAA}{\uFDD0\u2FAB}{\uFDD0\u2FAE}{\uFDD0\u2FAF}{\uFDD0\u2FB0}{\uFDD0\u2FB1}{\uFDD0\u2FB2}{\uFDD0\u2FB3}{\uFDD0\u2FB4}{\uFDD0\u2FB5}{\uFDD0\u2FB6}{\uFDD0\u2FB9}{\uFDD0\u2FBA}{\uFDD0\u2FBC}{\uFDD0\u2FBD}{\uFDD0\u2FBE}{\uFDD0\u2FBF}{\uFDD0\u2FC0}{\uFDD0\u2FC2}{\uFDD0\u2FC3}{\uFDD0\u2FC4}{\uFDD0\u2FC5}{\uFDD0\u2FC6}{\uFDD0\u2FC7}{\uFDD0\u2FC8}{\uFDD0\u2FC9}{\uFDD0\u2FCA}{\uFDD0\u2FCB}{\uFDD0\u2FCC}{\uFDD0\u2FCD}{\uFDD0\u2FCE}{\uFDD0\u2FCF}{\uFDD0\u2FD0}{\uFDD0\u2FD1}{\uFDD0\u2FD5}]").freeze();
static final List PROBES = Arrays.asList("\u4E00", "\uFDD0A", "\uFDD0\u2801", "\uFDD0\u2E80");
static final int PINYIN_PROBE_INDEX = 1;
static final UnicodeSet[] MATCHING = {null, PINYIN_LABELS, STROKE_LABELS, RADICAL_LABELS};
private static final char CGJ = '\u034F';
private static final UnicodeSet ALPHABETIC = new UnicodeSet("[[:alphabetic:]-[:mark:]]").add(BASE).freeze();
private static final UnicodeSet HANGUL = new UnicodeSet(
"[\uAC00 \uB098 \uB2E4 \uB77C \uB9C8 \uBC14 \uC0AC \uC544 \uC790 \uCC28 \uCE74 \uD0C0 \uD30C \uD558]").freeze();
private static final UnicodeSet ETHIOPIC = new UnicodeSet("[[:Block=Ethiopic:]&[:Script=Ethiopic:]]").freeze();
private static final UnicodeSet CORE_LATIN = new UnicodeSet("[a-z]").freeze();
private final RuleBasedCollator collatorOriginal;
private final RuleBasedCollator collatorPrimaryOnly;
private RuleBasedCollator collatorExternal;
// for testing
private final LinkedHashMap> alreadyIn = new LinkedHashMap>();
private final List noDistinctSorting = new ArrayList();
private final List notAlphabetic = new ArrayList();
// We accumulate these as we build up the input parameters
private final UnicodeSet initialLabels = new UnicodeSet();
private final Collection> inputList = new ArrayList>();
// Lazy evaluated: null means that we have not built yet.
private BucketList buckets;
private String overflowLabel = "\u2026";
private String underflowLabel = "\u2026";
private String inflowLabel = "\u2026";
private boolean hasPinyin;
/**
* Create the index object.
*
* @param locale
* The locale for the index.
* @stable ICU 4.8
*/
public AlphabeticIndex(ULocale locale) {
this(locale, null, null);
}
/**
* Create the index object.
*
* @param locale
* The locale for the index.
* @stable ICU 4.8
*/
public AlphabeticIndex(Locale locale) {
this(ULocale.forLocale(locale));
}
// /**
// * @internal
// * @deprecated This API is ICU internal only, for testing purposes and use with CLDR.
// */
// public enum LangType {
// /**
// * @internal
// * @deprecated This API is ICU internal only, for testing purposes and use with CLDR.
// */
// NORMAL,
// /**
// * @internal
// * @deprecated This API is ICU internal only, for testing purposes and use with CLDR.
// */
// SIMPLIFIED,
// /**
// * @internal
// * @deprecated This API is ICU internal only, for testing purposes and use with CLDR.
// */
// TRADITIONAL;
// /**
// * @internal
// * @deprecated This API is ICU internal only, for testing purposes and use with CLDR.
// */
// public static LangType fromLocale(ULocale locale) {
// String lang = locale.getLanguage();
// if (lang.equals("zh")) {
// if ("Hant".equals(locale.getScript()) || "TW".equals(locale.getCountry())) {
// return TRADITIONAL;
// }
// return SIMPLIFIED;
// }
// return NORMAL;
// }
// }
/**
* @internal
* @deprecated This API is ICU internal only, for testing purposes and use with CLDR.
*/
public AlphabeticIndex(ULocale locale, RuleBasedCollator collator, UnicodeSet exemplarChars) {
// langType = LangType.fromLocale(locale);
// // HACK because we have to know the type of the collation for Chinese
// if (langType != LangType.NORMAL) {
// locale = locale.setKeywordValue("collation", langType == LangType.TRADITIONAL ? "stroke" : "pinyin");
// }
hasPinyin = false;
collatorOriginal = collator != null ? collator : (RuleBasedCollator) Collator.getInstance(locale);
try {
collatorPrimaryOnly = (RuleBasedCollator) (collatorOriginal.clone());
} catch (Exception e) {
// should never happen
throw new IllegalStateException("Collator cannot be cloned", e);
}
collatorPrimaryOnly.setStrength(Collator.PRIMARY);
if (exemplarChars == null) {
exemplarChars = getIndexExemplars(locale);
}
addLabels(exemplarChars);
}
/**
* Add more index characters (aside from what are in the locale)
* @param additions additional characters to add to the index, such as A-Z.
* @return this, for chaining
* @stable ICU 4.8
*/
public AlphabeticIndex addLabels(UnicodeSet additions) {
initialLabels.addAll(additions);
buckets = null;
return this;
}
/**
* Add more index characters (aside from what are in the locale)
* @param additions additional characters to add to the index, such as those in Swedish.
* @return this, for chaining
* @stable ICU 4.8
*/
public AlphabeticIndex addLabels(ULocale... additions) {
for (ULocale addition : additions) {
initialLabels.addAll(getIndexExemplars(addition));
}
buckets = null;
return this;
}
/**
* Add more index characters (aside from what are in the locale)
* @param additions additional characters to add to the index, such as those in Swedish.
* @return this, for chaining
* @stable ICU 4.8
*/
public AlphabeticIndex addLabels(Locale... additions) {
for (Locale addition : additions) {
initialLabels.addAll(getIndexExemplars(ULocale.forLocale(addition)));
}
buckets = null;
return this;
}
/**
* Set the overflow label
* @param overflowLabel see class description
* @return this, for chaining
* @stable ICU 4.8
*/
public AlphabeticIndex setOverflowLabel(String overflowLabel) {
this.overflowLabel = overflowLabel;
return this;
}
/**
* Get the default label used in the IndexCharacters' locale for underflow, eg the last item in: X Y Z ...
*
* @return underflow label
* @stable ICU 4.8
*/
public String getUnderflowLabel() {
return underflowLabel; // TODO get localized version
}
/**
* Set the underflowLabel label
* @param underflowLabel see class description
* @return this, for chaining
* @stable ICU 4.8
*/
public AlphabeticIndex setUnderflowLabel(String underflowLabel) {
this.underflowLabel = underflowLabel;
return this;
}
/**
* Get the default label used in the IndexCharacters' locale for overflow, eg the first item in: ... A B C
*
* @return overflow label
* @stable ICU 4.8
*/
public String getOverflowLabel() {
return overflowLabel; // TODO get localized version
}
/**
* Set the inflowLabel label
* @param inflowLabel see class description
* @return this, for chaining
* @stable ICU 4.8
*/
public AlphabeticIndex setInflowLabel(String inflowLabel) {
this.inflowLabel = inflowLabel;
return this;
}
/**
* Get the default label used for abbreviated buckets between other labels. For example, consider the labels
* for Latin and Greek are used: X Y Z ... Α Β Γ.
*
* @return inflow label
* @stable ICU 4.8
*/
public String getInflowLabel() {
return inflowLabel; // TODO get localized version
}
/**
* Get the limit on the number of labels in the index. The number of buckets can be slightly larger: see getBucketCount().
*
* @return maxLabelCount maximum number of labels.
* @stable ICU 4.8
*/
public int getMaxLabelCount() {
return maxLabelCount;
}
/**
* Set a limit on the number of labels in the index. The number of buckets can be slightly larger: see
* getBucketCount().
*
* @return maxLabelCount label Set the maximum number of labels. Currently, if the number is exceeded, then every
* nth item is removed to bring the count down. A more sophisticated mechanism may be available in the
* future.
* @stable ICU 4.8
*/
public AlphabeticIndex setMaxLabelCount(int maxLabelCount) {
this.maxLabelCount = maxLabelCount;
return this;
}
/**
* Determine the best labels to use. This is based on the exemplars, but we also process to make sure that they are unique,
* and sort differently, and that the overall list is small enough.
*/
private ArrayList initLabels() {
UnicodeSet exemplars = new UnicodeSet(initialLabels);
// First sort them, with an "best" ordering among items that are the same according
// to the collator.
// Re the warning: the JDK inexplicably didn't make Collators be Comparator!
@SuppressWarnings("unchecked")
Set preferenceSorting = new TreeSet(new MultiComparator