com.adobe.xfa.text.TextBreakIterator Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of aem-sdk-api Show documentation
Show all versions of aem-sdk-api Show documentation
The Adobe Experience Manager SDK
package com.adobe.xfa.text;
/**
*
* Fashioned loosely on the ICU break iterator class, the text break
* iterator can be used to find significant breakpoints in text. The
* most significant difference is that this class doesn't require an
* array of characters; instead, the caller supplies a character
* property iterator.
*
*
* Currently the caller can request one of two implementation types,
* through static class methods, in order to find grapheme cluster
* breaks or word breaks. Note that word breaks are algorithmic only
* and do not extend to languages like Thai which require dictionary
* based breaking.
*
* @exclude from published api -- Mike Tardif, May 2006.
*/
public abstract class TextBreakIterator {
public static final int DONE = Integer.MAX_VALUE;
/**
* Find the first break (typically the start-of-text).
* @return Index value--in the context of the given character property
* iterator--of the first break in the text.
*/
public abstract int first ();
/**
* Find the next break.
* @return Index value--in the context of the given character property
* iterator--of the next break in the text. A special value of DONE
* indicates there are no more breaks.
*/
public abstract int next ();
/**
* Create a grapheme cluster break iterator.
* @param poTextCharPropIterator - Pointer to underlying character property
* iterator, used by the grapheme cluster break iterator, to obtain
* character properties for analysis.
* @return Pointer to a break iterator implementation that performs
* grapheme cluster analysis. This object uses the AXTE reference
* counting model and the caller must release its reference when done.
*/
public static TextBreakIterator createGraphemeInstance (TextCharPropIterator poTextCharPropIterator) {
return new GraphemeBreakIterator (poTextCharPropIterator);
}
/**
* Create a word break iterator.
* @param poTextCharPropIterator - Pointer to underlying character property
* iterator, used by the word break iterator, to obtain character
* properties for analysis.
* @return Pointer to a break iterator implementation that performs word
* analysis. This object uses the AXTE reference counting model and the
* caller must release its reference when done.
*/
public static TextBreakIterator createWordInstance (TextCharPropIterator poTextCharPropIterator) {
return new WordBreakIterator (poTextCharPropIterator);
}
}
//----------------------------------------------------------------------
//
// TextIteratorData - Helper structure for tracking
// iterator data
//
//----------------------------------------------------------------------
class IteratorData {
public int mnCharIndex;
public boolean mbEnd;
public int mePrevData;
public int next (TextCharPropIterator poTextCharPropIterator) {
int eData = poTextCharPropIterator.next (mnCharIndex);
mbEnd = eData == TextCharProp.INVALID;
mnCharIndex = poTextCharPropIterator.getNextIndex();
return eData;
}
public int next (GraphemeBreakIterator oGraphemeIterator) {
mnCharIndex = oGraphemeIterator.advanceGrapheme();
mbEnd = mnCharIndex == TextBreakIterator.DONE;
return oGraphemeIterator.getPrevData();
}
}
//----------------------------------------------------------------------
//
// TextGraphemeBreakIterator - Private implementation class
// for finding grapheme cluster breaks
//
//----------------------------------------------------------------------
class GraphemeBreakIterator extends TextBreakIterator {
private final TextCharPropIterator mpoTextCharPropIterator;
private final IteratorData moData = new IteratorData();
private int mePrevData;
static final boolean gbGraphemeBreak[][] = {
/* Control CR Default Extend L LF LV LVT T V */
/* Control */ { true, true, true, false, true, true, true, true, true, true },
/* CR */ { true, true, true, false, true, false, true, true, true, true },
/* Default */ { true, true, true, false, true, true, true, true, true, true },
/* Extend */ { true, true, true, false, true, true, true, true, true, true },
/* L */ { true, true, true, false, false, true, false, false, true, false },
/* LF */ { true, true, true, false, true, true, true, true, true, true },
/* LV */ { true, true, true, false, true, true, true, true, false, false },
/* LVT */ { true, true, true, false, true, true, true, true, false, true },
/* T */ { true, true, true, false, true, true, true, true, false, true },
/* V */ { true, true, true, false, true, true, true, true, false, false }
};
public GraphemeBreakIterator (TextCharPropIterator poTextCharPropIterator) {
mpoTextCharPropIterator = poTextCharPropIterator;
doFirst();
}
public int getPrevData () {
return mePrevData;
}
public int advanceGrapheme () {
if (moData.mbEnd) {
return DONE;
}
mePrevData = moData.mePrevData;
for (; ; ) {
int nTestIndex = moData.mnCharIndex;
int eNextData = moData.next (mpoTextCharPropIterator);
if (moData.mbEnd) {
return nTestIndex;
}
int nPrevIndex = TextCharProp.graphemeToIndex (moData.mePrevData);
int nNextIndex = TextCharProp.graphemeToIndex (eNextData);
moData.mePrevData = eNextData;
if (gbGraphemeBreak[nPrevIndex][nNextIndex]) {
return nTestIndex;
}
}
}
public int first () {
return doFirst();
}
public int next () {
return advanceGrapheme();
}
private int doFirst () {
int nIndex = mpoTextCharPropIterator.first();
moData.mnCharIndex = nIndex;
moData.mePrevData = moData.next (mpoTextCharPropIterator);
return nIndex;
}
}
//----------------------------------------------------------------------
//
// TextWordBreakIterator - Private implementation class for
// finding word breaks
//
//----------------------------------------------------------------------
class WordBreakIterator extends TextBreakIterator {
private final GraphemeBreakIterator moGraphemeIterator;
private final IteratorData moData = new IteratorData();
private IteratorData moPrevData = new IteratorData();
private boolean mbPrev;
private static final int WB_NO = 0;
private static final int WB_YES = 1;
private static final int WB_1ST = 2;
private static final int WB_MID = 3;
static final int gbWordBreak[][] = {
/* ALetter Default ExtNL Format Ktkna MidLet MidNum Numeric */
/* ALetter */ { WB_NO, WB_YES, WB_NO, WB_1ST, WB_YES, WB_MID, WB_YES, WB_YES },
/* Default */ { WB_YES, WB_YES, WB_YES, WB_1ST, WB_YES, WB_YES, WB_YES, WB_YES },
/* ExtNL */ { WB_NO, WB_YES, WB_NO, WB_1ST, WB_NO, WB_YES, WB_YES, WB_NO },
/* Format */ { WB_YES, WB_YES, WB_YES, WB_1ST, WB_YES, WB_YES, WB_YES, WB_YES },
/* Ktkna */ { WB_YES, WB_YES, WB_NO, WB_1ST, WB_NO, WB_YES, WB_YES, WB_YES },
/* MidLet */ { WB_YES, WB_YES, WB_YES, WB_1ST, WB_YES, WB_YES, WB_YES, WB_YES },
/* MidNum */ { WB_YES, WB_YES, WB_YES, WB_1ST, WB_YES, WB_YES, WB_YES, WB_YES },
/* Numeric */ { WB_NO, WB_YES, WB_NO, WB_1ST, WB_YES, WB_YES, WB_MID, WB_YES }
};
public WordBreakIterator (TextCharPropIterator poTextCharPropIterator) {
moGraphemeIterator = new GraphemeBreakIterator (poTextCharPropIterator);
mbPrev = false;
doFirst();
}
public int first () {
return doFirst();
}
public int next () {
if (mbPrev) {
if (moPrevData.mbEnd) {
return DONE;
}
} else {
if (moData.mbEnd) {
return DONE;
}
}
for (;;) {
int nTestIndex = moData.mnCharIndex;
int ePrevData;
int eNextData;
if (mbPrev) {
ePrevData = moPrevData.mePrevData;
eNextData = moData.mePrevData;
mbPrev = false;
} else {
ePrevData = moData.mePrevData;
eNextData = moData.next (moGraphemeIterator);
}
if (moData.mbEnd) {
return nTestIndex;
}
int nPrevIndex = TextCharProp.wordToIndex (ePrevData);
int nNextIndex = TextCharProp.wordToIndex (eNextData);
switch (gbWordBreak[nPrevIndex][nNextIndex]) {
case WB_NO:
moData.mePrevData = eNextData;
break;
case WB_YES:
return nTestIndex;
case WB_1ST:
break;
case WB_MID:
moPrevData = moData;
moData.mePrevData = eNextData;
eNextData = moData.next (moGraphemeIterator);
if (moData.mbEnd
|| (TextCharProp.getWordClass (eNextData) != TextCharProp.getWordClass (moPrevData.mePrevData))) {
return nTestIndex;
}
break;
}
}
}
private int doFirst () {
int nIndex = moGraphemeIterator.first();
moData.mnCharIndex = nIndex;
moData.mePrevData = moData.next (moGraphemeIterator);
return nIndex;
}
}
© 2015 - 2024 Weber Informatics LLC | Privacy Policy