com.groupbyinc.flux.common.apache.lucene.analysis.wikipedia.WikipediaTokenizer.class Maven / Gradle / Ivy
???? 4u Ncom/groupbyinc/flux/common/apache/lucene/analysis/wikipedia/WikipediaTokenizer ;com/groupbyinc/flux/common/apache/lucene/analysis/Tokenizer WikipediaTokenizer.java Ccom/groupbyinc/flux/common/apache/lucene/util/AttributeSource$State =com/groupbyinc/flux/common/apache/lucene/util/AttributeSource State
INTERNAL_LINK Ljava/lang/String; il
EXTERNAL_LINK el EXTERNAL_LINK_URL elu CITATION ci CATEGORY c BOLD b ITALICS i BOLD_ITALICS bi " HEADING h % SUB_HEADING sh ( ALPHANUM_ID I
APOSTROPHE_ID
ACRONYM_ID
COMPANY_ID EMAIL_ID HOST_ID NUM_ID CJ_ID INTERNAL_LINK_ID EXTERNAL_LINK_ID CITATION_ID
CATEGORY_ID BOLD_ID
ITALICS_ID
BOLD_ITALICS_ID
HEADING_ID SUB_HEADING_ID EXTERNAL_LINK_URL_ID TOKEN_TYPES [Ljava/lang/String; TOKENS_ONLY UNTOKENIZED_ONLY BOTH UNTOKENIZED_TOKEN_FLAG scanner TLcom/groupbyinc/flux/common/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl; tokenOutput untokenizedTypes Ljava/util/Set; #Ljava/util/Set; tokens Ljava/util/Iterator; [Ljava/util/Iterator; offsetAtt SLcom/groupbyinc/flux/common/apache/lucene/analysis/tokenattributes/OffsetAttribute; typeAtt QLcom/groupbyinc/flux/common/apache/lucene/analysis/tokenattributes/TypeAttribute;
posIncrAtt ^Lcom/groupbyinc/flux/common/apache/lucene/analysis/tokenattributes/PositionIncrementAttribute; termAtt ULcom/groupbyinc/flux/common/apache/lucene/analysis/tokenattributes/CharTermAttribute; flagsAtt RLcom/groupbyinc/flux/common/apache/lucene/analysis/tokenattributes/FlagsAttribute; first Z ()V java/util/Collections l emptySet ()Ljava/util/Set; n o
m p (ILjava/util/Set;)V j r
s this PLcom/groupbyinc/flux/common/apache/lucene/analysis/wikipedia/WikipediaTokenizer; j k
w W + y X Y { [ \ } Qcom/groupbyinc/flux/common/apache/lucene/analysis/tokenattributes/OffsetAttribute addAttribute L(Ljava/lang/Class;)Lcom/groupbyinc/flux/common/apache/lucene/util/Attribute; ? ?
? ^ _ ? Ocom/groupbyinc/flux/common/apache/lucene/analysis/tokenattributes/TypeAttribute ? ` a ? \com/groupbyinc/flux/common/apache/lucene/analysis/tokenattributes/PositionIncrementAttribute ? b c ? Scom/groupbyinc/flux/common/apache/lucene/analysis/tokenattributes/CharTermAttribute ? d e ? Pcom/groupbyinc/flux/common/apache/lucene/analysis/tokenattributes/FlagsAttribute ? f g ? Rcom/groupbyinc/flux/common/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl ? input Ljava/io/Reader; ? ? ? (Ljava/io/Reader;)V j ?
? ? U V ? init ? r
? S(Lcom/groupbyinc/flux/common/apache/lucene/util/AttributeFactory;ILjava/util/Set;)V C(Lcom/groupbyinc/flux/common/apache/lucene/util/AttributeFactory;)V j ?
? factory @Lcom/groupbyinc/flux/common/apache/lucene/util/AttributeFactory; "java/lang/IllegalArgumentException ? 9tokenOutput must be TOKENS_ONLY, UNTOKENIZED_ONLY or BOTH ? (Ljava/lang/String;)V j ?
? ? incrementToken ()Z java/io/IOException ? java/util/Iterator ? hasNext ? ? ? ? next ()Ljava/lang/Object; ? ? ? ? restoreState H(Lcom/groupbyinc/flux/common/apache/lucene/util/AttributeSource$State;)V ? ?
? clearAttributes ? k
? getNextToken ()I ? ?
? ? O P ? ?
java/util/Set ? contains (Ljava/lang/Object;)Z ? ? ? ? java/lang/String ?
setupToken ? k
? collapseTokens (I)V ? ?
? collapseAndSaveTokens (ILjava/lang/String;)V ? ?
? getPositionIncrement ? ?
? ? h i ? setPositionIncrement ? ? ? ? setType ? ? ? ? state ELcom/groupbyinc/flux/common/apache/lucene/util/AttributeSource$State; tokenType type posinc java/lang/StringBuilder ? j ?
? ? setText (Ljava/lang/StringBuilder;)I ? ?
? ? yychar ? ?
? ? java/util/ArrayList ?
? w setupSavedToken ? ?
? captureState G()Lcom/groupbyinc/flux/common/apache/lucene/util/AttributeSource$State;
java/util/List add ? getNumWikiTokensSeen ?
?
append (C)Ljava/lang/StringBuilder;
? toString ()Ljava/lang/String;
? trim
? setEmpty W()Lcom/groupbyinc/flux/common/apache/lucene/analysis/tokenattributes/CharTermAttribute; ? i(Ljava/lang/String;)Lcom/groupbyinc/flux/common/apache/lucene/analysis/tokenattributes/CharTermAttribute; ?
correctOffset (I)I
length" ?
?# setOffset (II)V%& ?' setFlags) ? ?* yylength, ?
?-
yypushback/ ?
?0 iterator ()Ljava/util/Iterator;234 currPos buffer Ljava/lang/StringBuilder; numAdded theStart lastPos
tmpTokType numSeen tmp WLjava/util/List; Ljava/util/List; s positionInc getText X(Lcom/groupbyinc/flux/common/apache/lucene/analysis/tokenattributes/CharTermAttribute;)VCD
?E ?# start closeI k
J yyresetL ?
?M resetO k
P
?P endS k
T finalOffset
X Z \ ^ ` b d f ?
ConstantValue Signature Code LocalVariableTable LineNumberTable LocalVariableTypeTable '(ILjava/util/Set;)V g(Lcom/groupbyinc/flux/common/apache/lucene/util/AttributeFactory;ILjava/util/Set;)V
StackMapTable
Exceptions
SourceFile InnerClasses 1 + i i i i i i i ! i # $ i &