All Downloads are FREE. Search and download functionalities are using the official Maven repository.

okapi-lib.1.44.0.source-code.rbbi.txt Maven / Gradle / Ivy

!!chain;
!!quoted_literals_only;
$Han=[:Han:];
$CR=[\p{Word_Break=CR}];
$LF=[\p{Word_Break=LF}];
$Newline=[\p{Word_Break=Newline}];
$Extend=[\p{Word_Break=Extend}-$Han];
$ZWJ=[\p{Word_Break=ZWJ}];
$Regional_Indicator=[\p{Word_Break=Regional_Indicator}];
$Format=[\p{Word_Break=Format}];
$Katakana=[\p{Word_Break=Katakana}];
$Hebrew_Letter=[\p{Word_Break=Hebrew_Letter}];
$ALetter=[\p{Word_Break=ALetter}];
$Single_Quote=[\p{Word_Break=Single_Quote}];
$Double_Quote=[\p{Word_Break=Double_Quote}];
$MidNumLet=[\p{Word_Break=MidNumLet}];
$MidLetter=[\p{Word_Break=MidLetter}];
$MidNum=[\p{Word_Break=MidNum}];
$Numeric=[\p{Word_Break=Numeric}];
$ExtendNumLet=[\p{Word_Break=ExtendNumLet}];
$WSegSpace=[\p{Word_Break=WSegSpace}];
$Extended_Pict=[\p{Extended_Pictographic}];
$Hiragana=[:Hiragana:];
$Ideographic=[\p{Ideographic}];
$Control=[\p{Grapheme_Cluster_Break=Control}];
$HangulSyllable=[\uac00-\ud7a3];
$ComplexContext=[:LineBreak=Complex_Context:];
$KanaKanji=[$Han$Hiragana$Katakana];
$dictionaryCJK=[$KanaKanji$HangulSyllable];
$dictionary=[$ComplexContext$dictionaryCJK];
$ALetterPlus=[$ALetter-$dictionaryCJK[$ComplexContext-$Extend-$Control]];
$CR$LF;
$ZWJ$Extended_Pict;
$WSegSpace$WSegSpace;
$ExFm=[$Extend$Format$ZWJ];
^$ExFm+;
[^$CR$LF$Newline$ExFm]$ExFm*;
$Numeric$ExFm*{100};
$ALetterPlus$ExFm*{200};
$HangulSyllable{200};
$Hebrew_Letter$ExFm*{200};
$Katakana$ExFm*{400};
$Hiragana$ExFm*{400};
$Ideographic$ExFm*{400};
($ALetterPlus|$Hebrew_Letter)$ExFm*($ALetterPlus|$Hebrew_Letter);
($ALetterPlus|$Hebrew_Letter)$ExFm*($MidLetter|$MidNumLet|$Single_Quote)$ExFm*($ALetterPlus|$Hebrew_Letter){200};
$Hebrew_Letter$ExFm*$Single_Quote{200};
$Hebrew_Letter$ExFm*$Double_Quote$ExFm*$Hebrew_Letter;
$Numeric$ExFm*$Numeric;
($ALetterPlus|$Hebrew_Letter)$ExFm*$Numeric;
$Numeric$ExFm*($ALetterPlus|$Hebrew_Letter);
$Numeric$ExFm*($MidNum|$MidNumLet|$Single_Quote)$ExFm*$Numeric;
$Katakana$ExFm*$Katakana{400};
$ALetterPlus$ExFm*$ExtendNumLet{200};
$Hebrew_Letter$ExFm*$ExtendNumLet{200};
$Numeric$ExFm*$ExtendNumLet{100};
$Katakana$ExFm*$ExtendNumLet{400};
$ExtendNumLet$ExFm*$ExtendNumLet{200};
$ExtendNumLet$ExFm*$ALetterPlus{200};
$ExtendNumLet$ExFm*$Hebrew_Letter{200};
$ExtendNumLet$ExFm*$Numeric{100};
$ExtendNumLet$ExFm*$Katakana{400};
^$Regional_Indicator$ExFm*$Regional_Indicator;
$HangulSyllable$HangulSyllable{200};
$KanaKanji$KanaKanji{400};
$HYPHEN=[[\p{Pd}]-[\u1400\u2027\u2043\u2e1a\u2e40\u30a0\u10ead]];
$HYPHENATED_WORD=(($ALetterPlus|$Hebrew_Letter|$Numeric)+$HYPHEN($ALetterPlus|$Hebrew_Letter|$Numeric)+)+;
^$HYPHENATED_WORD {501};
$EMAIL=[A-Za-z0-9_\-\.]+\@[A-Za-z][A-Za-z0-9_]+\.[a-z]+;
^$EMAIL {502};
$MARKUP=\<[^\>]+\>;
^$MARKUP {504};
$EMOTICON=[B8\:\;\{\[][-=\/\{\}\)\(]+;
^$EMOTICON {505};
$INTERNET=([0-9]+\.[0-9]+\.[0-9]+\.[0-9]+)|([a-z0-9]*\.[a-z0-9]+\.[a-z0-9]+)|([a-z]+\:\/\/[a-z0-9]+(\.[a-z0-9]+)+(\/[a-z0-9][a-z0-9\.]+));
^$INTERNET {506};
$AM_PM=(\A\M|\a\m|\a\M|\A\m|\P\M|\p\m|\p\M|\P\m);
$TIME=((([0]?[1-9]|[1][0-2])(\:|\.)[0-5][0-9]((\:|\.)[0-5][0-9])?([:space:])?$AM_PM)|(([0]?[0-9]|[1][0-9]|[2][0-3])(\:|\.)[0-5][0-9]((\:|\.)[0-5][0-9])?));
^$TIME {513};
$CURRENCY_SYMBOL=[\p{Sc}];
$CURRENCY=$CURRENCY_SYMBOL([1-9][0-9]?[0-9]?(\,[0-9][0-9][0-9])*(\.[0-9]?[0-9])?|[1-9][0-9]*(\.[0-9]?[0-9]?)?|[0](\.[0-9]?[0-9]?)?|(\.[0-9][0-9?])?);
^$CURRENCY {514};
$DATE=(((([0][13578])|([13578])|([1][02]))[\/](([1-9])|([0-2][0-9])|([3][01])))|((([0][469])|([469])|(\1\1))[\/](([1-9])|([0-2][0-9])|(\3\0)))|((\2|\0\2)[\/](([1-9])|([0-2][0-9]))))[\/]([0-9])[0-9][0-9][0-9];
^$DATE {515};
$ABBREVIATION=([\p{Lu}][\p{Lu}])+|([\p{Lu}]([.][\p{Lu}])+[.]?);
^$ABBREVIATION {516};
$WHITESPACE=[\p{Z}];
^$WHITESPACE {508};
$PUNCTUATION=[\p{P}];
^$PUNCTUATION {509};
$EMOJI=[\p{Extended_Pictographic}];
^$EMOJI {510};
$OTHER_SYMBOL=[[\p{S}]-[\p{Sc}]];
^$OTHER_SYMBOL {512};
.;




© 2015 - 2025 Weber Informatics LLC | Privacy Policy