All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.adobe.xfa.text.TextBreakIterator Maven / Gradle / Ivy

There is a newer version: 2024.11.18751.20241128T090041Z-241100
Show newest version
package com.adobe.xfa.text;

/**
 * 

* Fashioned loosely on the ICU break iterator class, the text break * iterator can be used to find significant breakpoints in text. The * most significant difference is that this class doesn't require an * array of characters; instead, the caller supplies a character * property iterator. *

*

* Currently the caller can request one of two implementation types, * through static class methods, in order to find grapheme cluster * breaks or word breaks. Note that word breaks are algorithmic only * and do not extend to languages like Thai which require dictionary * based breaking. *

* @exclude from published api -- Mike Tardif, May 2006. */ public abstract class TextBreakIterator { public static final int DONE = Integer.MAX_VALUE; /** * Find the first break (typically the start-of-text). * @return Index value--in the context of the given character property * iterator--of the first break in the text. */ public abstract int first (); /** * Find the next break. * @return Index value--in the context of the given character property * iterator--of the next break in the text. A special value of DONE * indicates there are no more breaks. */ public abstract int next (); /** * Create a grapheme cluster break iterator. * @param poTextCharPropIterator - Pointer to underlying character property * iterator, used by the grapheme cluster break iterator, to obtain * character properties for analysis. * @return Pointer to a break iterator implementation that performs * grapheme cluster analysis. This object uses the AXTE reference * counting model and the caller must release its reference when done. */ public static TextBreakIterator createGraphemeInstance (TextCharPropIterator poTextCharPropIterator) { return new GraphemeBreakIterator (poTextCharPropIterator); } /** * Create a word break iterator. * @param poTextCharPropIterator - Pointer to underlying character property * iterator, used by the word break iterator, to obtain character * properties for analysis. * @return Pointer to a break iterator implementation that performs word * analysis. This object uses the AXTE reference counting model and the * caller must release its reference when done. */ public static TextBreakIterator createWordInstance (TextCharPropIterator poTextCharPropIterator) { return new WordBreakIterator (poTextCharPropIterator); } } //---------------------------------------------------------------------- // // TextIteratorData - Helper structure for tracking // iterator data // //---------------------------------------------------------------------- class IteratorData { public int mnCharIndex; public boolean mbEnd; public int mePrevData; public int next (TextCharPropIterator poTextCharPropIterator) { int eData = poTextCharPropIterator.next (mnCharIndex); mbEnd = eData == TextCharProp.INVALID; mnCharIndex = poTextCharPropIterator.getNextIndex(); return eData; } public int next (GraphemeBreakIterator oGraphemeIterator) { mnCharIndex = oGraphemeIterator.advanceGrapheme(); mbEnd = mnCharIndex == TextBreakIterator.DONE; return oGraphemeIterator.getPrevData(); } } //---------------------------------------------------------------------- // // TextGraphemeBreakIterator - Private implementation class // for finding grapheme cluster breaks // //---------------------------------------------------------------------- class GraphemeBreakIterator extends TextBreakIterator { private final TextCharPropIterator mpoTextCharPropIterator; private final IteratorData moData = new IteratorData(); private int mePrevData; static final boolean gbGraphemeBreak[][] = { /* Control CR Default Extend L LF LV LVT T V */ /* Control */ { true, true, true, false, true, true, true, true, true, true }, /* CR */ { true, true, true, false, true, false, true, true, true, true }, /* Default */ { true, true, true, false, true, true, true, true, true, true }, /* Extend */ { true, true, true, false, true, true, true, true, true, true }, /* L */ { true, true, true, false, false, true, false, false, true, false }, /* LF */ { true, true, true, false, true, true, true, true, true, true }, /* LV */ { true, true, true, false, true, true, true, true, false, false }, /* LVT */ { true, true, true, false, true, true, true, true, false, true }, /* T */ { true, true, true, false, true, true, true, true, false, true }, /* V */ { true, true, true, false, true, true, true, true, false, false } }; public GraphemeBreakIterator (TextCharPropIterator poTextCharPropIterator) { mpoTextCharPropIterator = poTextCharPropIterator; doFirst(); } public int getPrevData () { return mePrevData; } public int advanceGrapheme () { if (moData.mbEnd) { return DONE; } mePrevData = moData.mePrevData; for (; ; ) { int nTestIndex = moData.mnCharIndex; int eNextData = moData.next (mpoTextCharPropIterator); if (moData.mbEnd) { return nTestIndex; } int nPrevIndex = TextCharProp.graphemeToIndex (moData.mePrevData); int nNextIndex = TextCharProp.graphemeToIndex (eNextData); moData.mePrevData = eNextData; if (gbGraphemeBreak[nPrevIndex][nNextIndex]) { return nTestIndex; } } } public int first () { return doFirst(); } public int next () { return advanceGrapheme(); } private int doFirst () { int nIndex = mpoTextCharPropIterator.first(); moData.mnCharIndex = nIndex; moData.mePrevData = moData.next (mpoTextCharPropIterator); return nIndex; } } //---------------------------------------------------------------------- // // TextWordBreakIterator - Private implementation class for // finding word breaks // //---------------------------------------------------------------------- class WordBreakIterator extends TextBreakIterator { private final GraphemeBreakIterator moGraphemeIterator; private final IteratorData moData = new IteratorData(); private IteratorData moPrevData = new IteratorData(); private boolean mbPrev; private static final int WB_NO = 0; private static final int WB_YES = 1; private static final int WB_1ST = 2; private static final int WB_MID = 3; static final int gbWordBreak[][] = { /* ALetter Default ExtNL Format Ktkna MidLet MidNum Numeric */ /* ALetter */ { WB_NO, WB_YES, WB_NO, WB_1ST, WB_YES, WB_MID, WB_YES, WB_YES }, /* Default */ { WB_YES, WB_YES, WB_YES, WB_1ST, WB_YES, WB_YES, WB_YES, WB_YES }, /* ExtNL */ { WB_NO, WB_YES, WB_NO, WB_1ST, WB_NO, WB_YES, WB_YES, WB_NO }, /* Format */ { WB_YES, WB_YES, WB_YES, WB_1ST, WB_YES, WB_YES, WB_YES, WB_YES }, /* Ktkna */ { WB_YES, WB_YES, WB_NO, WB_1ST, WB_NO, WB_YES, WB_YES, WB_YES }, /* MidLet */ { WB_YES, WB_YES, WB_YES, WB_1ST, WB_YES, WB_YES, WB_YES, WB_YES }, /* MidNum */ { WB_YES, WB_YES, WB_YES, WB_1ST, WB_YES, WB_YES, WB_YES, WB_YES }, /* Numeric */ { WB_NO, WB_YES, WB_NO, WB_1ST, WB_YES, WB_YES, WB_MID, WB_YES } }; public WordBreakIterator (TextCharPropIterator poTextCharPropIterator) { moGraphemeIterator = new GraphemeBreakIterator (poTextCharPropIterator); mbPrev = false; doFirst(); } public int first () { return doFirst(); } public int next () { if (mbPrev) { if (moPrevData.mbEnd) { return DONE; } } else { if (moData.mbEnd) { return DONE; } } for (;;) { int nTestIndex = moData.mnCharIndex; int ePrevData; int eNextData; if (mbPrev) { ePrevData = moPrevData.mePrevData; eNextData = moData.mePrevData; mbPrev = false; } else { ePrevData = moData.mePrevData; eNextData = moData.next (moGraphemeIterator); } if (moData.mbEnd) { return nTestIndex; } int nPrevIndex = TextCharProp.wordToIndex (ePrevData); int nNextIndex = TextCharProp.wordToIndex (eNextData); switch (gbWordBreak[nPrevIndex][nNextIndex]) { case WB_NO: moData.mePrevData = eNextData; break; case WB_YES: return nTestIndex; case WB_1ST: break; case WB_MID: moPrevData = moData; moData.mePrevData = eNextData; eNextData = moData.next (moGraphemeIterator); if (moData.mbEnd || (TextCharProp.getWordClass (eNextData) != TextCharProp.getWordClass (moPrevData.mePrevData))) { return nTestIndex; } break; } } } private int doFirst () { int nIndex = moGraphemeIterator.first(); moData.mnCharIndex = nIndex; moData.mePrevData = moData.next (moGraphemeIterator); return nIndex; } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy