All Downloads are FREE. Search and download functionalities are using the official Maven repository.

core.common.segments.root.xml Maven / Gradle / Ivy

The newest version!
<?xml version="1.0" encoding="UTF-8" ?>
<!DOCTYPE ldml SYSTEM "../../common/dtd/ldml.dtd">
<!--
Copyright © 1991-2015 Unicode, Inc.
CLDR data files are interpreted according to the LDML specification (http://unicode.org/reports/tr35/)
For terms of use, see http://www.unicode.org/copyright.html
-->
<ldml>
	<identity>
		<version number="$Revision: 12925 $"/>
		<language type="root"/>
	</identity>
	<segmentations>
		<segmentation type="GraphemeClusterBreak">
			<variables>
				<variable id="$CR">\p{Grapheme_Cluster_Break=CR}</variable>
				<variable id="$LF">\p{Grapheme_Cluster_Break=LF}</variable>
				<variable id="$Control">\p{Grapheme_Cluster_Break=Control}</variable>
				<variable id="$Extend">\p{Grapheme_Cluster_Break=Extend}</variable>
				<variable id="$ZWJ">\p{Grapheme_Cluster_Break=ZWJ}</variable>
				<variable id="$Regional_Indicator">\p{Grapheme_Cluster_Break=Regional_Indicator}</variable>
				<variable id="$Prepend">\p{Grapheme_Cluster_Break=Prepend}</variable>
				<variable id="$SpacingMark">\p{Grapheme_Cluster_Break=SpacingMark}</variable>
				<variable id="$L">\p{Grapheme_Cluster_Break=L}</variable>
				<variable id="$V">\p{Grapheme_Cluster_Break=V}</variable>
				<variable id="$T">\p{Grapheme_Cluster_Break=T}</variable>
				<variable id="$LV">\p{Grapheme_Cluster_Break=LV}</variable>
				<variable id="$LVT">\p{Grapheme_Cluster_Break=LVT}</variable>
				<variable id="$E_Base">[[\p{Grapheme_Cluster_Break = EB}] \U0001F3C2\U0001F3C7\U0001F3CC\U0001F46A-\U0001F46D\U0001F46F\U0001F574\U0001F6CC]</variable>
				<variable id="$E_Modifier">\p{Grapheme_Cluster_Break = EM}</variable>
				<variable id="$Extended_Pict">[\U0001F774-\U0001F77F\u2700-\u2701\u2703-\u2704\u270E\u2710-\u2711\u2765-\u2767\U0001F030-\U0001F093\U0001F094-\U0001F09F\U0001F10D-\U0001F10F\U0001F12F\U0001F16C-\U0001F16F\U0001F1AD-\U0001F1E5\U0001F203-\U0001F20F\U0001F23C-\U0001F23F\U0001F249-\U0001F24F\U0001F252-\U0001F2FF\U0001F7D5-\U0001F7FF\U0001F000-\U0001F003\U0001F005-\U0001F02B\U0001F02C-\U0001F02F\U0001F322-\U0001F323\U0001F394-\U0001F395\U0001F398\U0001F39C-\U0001F39D\U0001F3F1-\U0001F3F2\U0001F3F6\U0001F4FE\U0001F53E-\U0001F548\U0001F54F\U0001F568-\U0001F56E\U0001F571-\U0001F572\U0001F57B-\U0001F586\U0001F588-\U0001F589\U0001F58E-\U0001F58F\U0001F591-\U0001F594\U0001F597-\U0001F5A3\U0001F5A6-\U0001F5A7\U0001F5A9-\U0001F5B0\U0001F5B3-\U0001F5BB\U0001F5BD-\U0001F5C1\U0001F5C5-\U0001F5D0\U0001F5D4-\U0001F5DB\U0001F5DF-\U0001F5E0\U0001F5E2\U0001F5E4-\U0001F5E7\U0001F5E9-\U0001F5EE\U0001F5F0-\U0001F5F2\U0001F5F4-\U0001F5F9\u2605\u2607-\u260D\u260F-\u2610\u2612\u2616-\u2617\u2619-\u261C\u261E-\u261F\u2621\u2624-\u2625\u2627-\u2629\u262B-\u262D\u2630-\u2637\u263B-\u2647\u2654-\u265F\u2661-\u2662\u2664\u2667\u2669-\u267A\u267C-\u267E\u2680-\u2691\u2695\u2698\u269A\u269D-\u269F\u26A2-\u26A9\u26AC-\u26AF\u26B2-\u26BC\u26BF-\u26C3\u26C6-\u26C7\u26C9-\u26CD\u26D0\u26D2\u26D5-\u26E8\u26EB-\u26EF\u26F6\u26FB-\u26FC\u26FE-\u26FF\u2388\U0001FA00-\U0001FFFD\U0001F0A0-\U0001F0AE\U0001F0B1-\U0001F0BF\U0001F0C1-\U0001F0CF\U0001F0D1-\U0001F0F5\U0001F0AF-\U0001F0B0\U0001F0C0\U0001F0D0\U0001F0F6-\U0001F0FF\U0001F80C-\U0001F80F\U0001F848-\U0001F84F\U0001F85A-\U0001F85F\U0001F888-\U0001F88F\U0001F8AE-\U0001F8FF\U0001F900-\U0001F90F\U0001F91F\U0001F928-\U0001F92F\U0001F931-\U0001F932\U0001F93F\U0001F94C-\U0001F94F\U0001F95F-\U0001F97F\U0001F992-\U0001F9BF\U0001F9C1-\U0001F9FF\U0001F6C6-\U0001F6CA\U0001F6E6-\U0001F6E8\U0001F6EA\U0001F6F1-\U0001F6F2\U0001F6D3-\U0001F6DF\U0001F6ED-\U0001F6EF\U0001F6F7-\U0001F6FF]</variable>
				<variable id="$E_Base_GAZ">\p{Grapheme_Cluster_Break = EBG}</variable>
				<variable id="$EmojiNRK">[[\p{Emoji}] - [\p{Grapheme_Cluster_Break=Regional_Indicator}*\u00230-9©®™〰〽]]</variable>
			</variables>
			<segmentRules>
				<rule id="3"> $CR × $LF </rule>
				<rule id="4"> ( $Control | $CR | $LF ) ÷ </rule>
				<rule id="5"> ÷ ( $Control | $CR | $LF ) </rule>
				<rule id="6"> $L × ( $L | $V | $LV | $LVT ) </rule>
				<rule id="7"> ( $LV | $V ) × ( $V | $T ) </rule>
				<rule id="8"> ( $LVT | $T) × $T </rule>
				<rule id="9"> × ($Extend | $ZWJ) </rule>
				<rule id="9.1"> × $SpacingMark </rule>
				<rule id="9.2"> $Prepend × </rule>
				<rule id="10"> ($E_Base | $E_Base_GAZ) $Extend* × $E_Modifier </rule>
				<rule id="11"> ($Extended_Pict | $EmojiNRK) $ZWJ × ($Extended_Pict | $EmojiNRK) </rule>
				<rule id="12"> ^$Prepend* $Regional_Indicator $Regional_Indicator ÷ $Regional_Indicator </rule>
				<rule id="13"> ^$Prepend* $Regional_Indicator $Regional_Indicator </rule>
			</segmentRules>
		</segmentation>
		<segmentation type="LineBreak">
			<variables>
				<!-- Variables -->
				<variable id="$AI">\p{Line_Break=Ambiguous}</variable>
				<variable id="$AL">\p{Line_Break=Alphabetic}</variable>
				<variable id="$B2">\p{Line_Break=Break_Both}</variable>
				<variable id="$BA">\p{Line_Break=Break_After}</variable>
				<variable id="$BB">\p{Line_Break=Break_Before}</variable>
				<variable id="$BK">\p{Line_Break=Mandatory_Break}</variable>
				<variable id="$CB">\p{Line_Break=Contingent_Break}</variable>
				<variable id="$CL">\p{Line_Break=Close_Punctuation}</variable>
				<variable id="$CP">\p{Line_Break=Close_Parenthesis}</variable>
				<variable id="$CM">\p{Line_Break=Combining_Mark}</variable>
				<variable id="$CR">\p{Line_Break=Carriage_Return}</variable>
				<variable id="$EX">\p{Line_Break=Exclamation}</variable>
				<variable id="$GL">\p{Line_Break=Glue}</variable>
				<variable id="$H2">\p{Line_Break=H2}</variable>
				<variable id="$H3">\p{Line_Break=H3}</variable>
				<variable id="$HL">\p{Line_Break=Hebrew_Letter}</variable>
				<variable id="$HY">\p{Line_Break=Hyphen}</variable>
				<variable id="$ID">\p{Line_Break=Ideographic}</variable>
				<variable id="$IN">\p{Line_Break=Inseparable}</variable>
				<variable id="$IS">\p{Line_Break=Infix_Numeric}</variable>
				<variable id="$JL">\p{Line_Break=JL}</variable>
				<variable id="$JT">\p{Line_Break=JT}</variable>
				<variable id="$JV">\p{Line_Break=JV}</variable>
				<variable id="$LF">\p{Line_Break=Line_Feed}</variable>
				<variable id="$NL">\p{Line_Break=Next_Line}</variable>
				<variable id="$NS">\p{Line_Break=Nonstarter}</variable>
				<variable id="$NU">\p{Line_Break=Numeric}</variable>
				<variable id="$OP">\p{Line_Break=Open_Punctuation}</variable>
				<variable id="$PO">\p{Line_Break=Postfix_Numeric}</variable>
				<variable id="$PR">\p{Line_Break=Prefix_Numeric}</variable>
				<variable id="$QU">\p{Line_Break=Quotation}</variable>
				<variable id="$SA">\p{Line_Break=Complex_Context}</variable>
				<variable id="$SG">\p{Line_Break=Surrogate}</variable>
				<variable id="$SP">\p{Line_Break=Space}</variable>
				<variable id="$SY">\p{Line_Break=Break_Symbols}</variable>
				<variable id="$WJ">\p{Line_Break=Word_Joiner}</variable>
				<variable id="$XX">\p{Line_Break=Unknown}</variable>
				<variable id="$ZW">\p{Line_Break=ZWSpace}</variable>
				<variable id="$CJ">\p{Line_Break=Conditional_Japanese_Starter}</variable>
				<variable id="$RI">\p{Line_Break=Regional_Indicator}</variable>
				<variable id="$EB">[[\p{Line_Break=EB}]\U0001F3C2\U0001F3C7\U0001F3CC\U0001F46A-\U0001F46D\U0001F46F\U0001F574\U0001F6CC]</variable>
				<variable id="$EM">\p{Line_Break=EM}</variable>
				<variable id="$ZWJ">\p{Line_Break=ZWJ}</variable>
				<variable id="$EmojiNRK">[[\p{Emoji}] - [$RI \u002a\u00230-9©®™〰〽]]</variable>
				<variable id="$Extended_Pict">[\U0001F774-\U0001F77F\u2700-\u2701\u2703-\u2704\u270E\u2710-\u2711\u2765-\u2767\U0001F030-\U0001F093\U0001F094-\U0001F09F\U0001F10D-\U0001F10F\U0001F12F\U0001F16C-\U0001F16F\U0001F1AD-\U0001F1E5\U0001F203-\U0001F20F\U0001F23C-\U0001F23F\U0001F249-\U0001F24F\U0001F252-\U0001F2FF\U0001F7D5-\U0001F7FF\U0001F000-\U0001F003\U0001F005-\U0001F02B\U0001F02C-\U0001F02F\U0001F322-\U0001F323\U0001F394-\U0001F395\U0001F398\U0001F39C-\U0001F39D\U0001F3F1-\U0001F3F2\U0001F3F6\U0001F4FE\U0001F53E-\U0001F548\U0001F54F\U0001F568-\U0001F56E\U0001F571-\U0001F572\U0001F57B-\U0001F586\U0001F588-\U0001F589\U0001F58E-\U0001F58F\U0001F591-\U0001F594\U0001F597-\U0001F5A3\U0001F5A6-\U0001F5A7\U0001F5A9-\U0001F5B0\U0001F5B3-\U0001F5BB\U0001F5BD-\U0001F5C1\U0001F5C5-\U0001F5D0\U0001F5D4-\U0001F5DB\U0001F5DF-\U0001F5E0\U0001F5E2\U0001F5E4-\U0001F5E7\U0001F5E9-\U0001F5EE\U0001F5F0-\U0001F5F2\U0001F5F4-\U0001F5F9\u2605\u2607-\u260D\u260F-\u2610\u2612\u2616-\u2617\u2619-\u261C\u261E-\u261F\u2621\u2624-\u2625\u2627-\u2629\u262B-\u262D\u2630-\u2637\u263B-\u2647\u2654-\u265F\u2661-\u2662\u2664\u2667\u2669-\u267A\u267C-\u267E\u2680-\u2691\u2695\u2698\u269A\u269D-\u269F\u26A2-\u26A9\u26AC-\u26AF\u26B2-\u26BC\u26BF-\u26C3\u26C6-\u26C7\u26C9-\u26CD\u26D0\u26D2\u26D5-\u26E8\u26EB-\u26EF\u26F6\u26FB-\u26FC\u26FE-\u26FF\u2388\U0001FA00-\U0001FFFD\U0001F0A0-\U0001F0AE\U0001F0B1-\U0001F0BF\U0001F0C1-\U0001F0CF\U0001F0D1-\U0001F0F5\U0001F0AF-\U0001F0B0\U0001F0C0\U0001F0D0\U0001F0F6-\U0001F0FF\U0001F80C-\U0001F80F\U0001F848-\U0001F84F\U0001F85A-\U0001F85F\U0001F888-\U0001F88F\U0001F8AE-\U0001F8FF\U0001F900-\U0001F90F\U0001F91F\U0001F928-\U0001F92F\U0001F931-\U0001F932\U0001F93F\U0001F94C-\U0001F94F\U0001F95F-\U0001F97F\U0001F992-\U0001F9BF\U0001F9C1-\U0001F9FF\U0001F6C6-\U0001F6CA\U0001F6E6-\U0001F6E8\U0001F6EA\U0001F6F1-\U0001F6F2\U0001F6D3-\U0001F6DF\U0001F6ED-\U0001F6EF\U0001F6F7-\U0001F6FF]</variable>
				<!-- LB 1  Assign a line breaking class to each code point of the input. -->
				<!-- Resolve AI, CB, SA, SG, and XX into other line breaking classes depending on criteria outside the scope of this algorithm. -->
				<!-- NOTE: CB is ok to fall through, but must handle others here. -->
				<variable id="$AL">[$AL $AI $SG $XX [$SA-[[:Mn:][:Mc:]]]]</variable>
				<variable id="$NS">[$NS $CJ]</variable>
				<variable id="$CM">[$CM $ZWJ [$SA &amp; [[:Mn:][:Mc:]]]]</variable>
				<!-- WARNING: Fixes for Rule 9 -->
				<!-- Treat X CM* as if it were X. -->
				<!-- Where X is any line break class except SP, BK, CR, LF, NL or ZW. -->
				<variable id="$X">$CM*</variable>
				<variable id="$Spec1_">[$SP $BK $CR $LF $NL $ZW]</variable> <!-- $CANT_CM in ICU rules -->
				<variable id="$Spec2_">[^ $SP $BK $CR $LF $NL $ZW]</variable> <!-- $CAN_CM in ICU rules -->
				<variable id="$Spec3a_">[^ $SP $BA $HY $CM]</variable>
				<variable id="$Spec3b_">[^ $BA $HY $CM]</variable>
				<variable id="$Spec4_">[^ $NU $CM]</variable>
				<variable id="$AI">($AI $X)</variable>
				<variable id="$AL">($AL $X)</variable>
				<variable id="$B2">($B2 $X)</variable>
				<variable id="$BA">($BA $X)</variable>
				<variable id="$BB">($BB $X)</variable>
				<variable id="$CB">($CB $X)</variable>
				<variable id="$CL">($CL $X)</variable>
				<variable id="$CP">($CP $X)</variable>
				<variable id="$CM">($CM $X)</variable>
				<variable id="$GL">($GL $X)</variable>
				<variable id="$H2">($H2 $X)</variable>
				<variable id="$H3">($H3 $X)</variable>
				<variable id="$HL">($HL $X)</variable>
				<variable id="$HY">($HY $X)</variable>
				<variable id="$ID">($ID $X)</variable>
				<variable id="$IN">($IN $X)</variable>
				<variable id="$IS">($IS $X)</variable>
				<variable id="$JL">($JL $X)</variable>
				<variable id="$JT">($JT $X)</variable>
				<variable id="$JV">($JV $X)</variable>
				<variable id="$NS">($NS $X)</variable>
				<variable id="$NU">($NU $X)</variable>
				<variable id="$OP">($OP $X)</variable>
				<variable id="$PO">($PO $X)</variable>
				<variable id="$PR">($PR $X)</variable>
				<variable id="$QU">($QU $X)</variable>
				<variable id="$SA">($SA $X)</variable>
				<variable id="$SG">($SG $X)</variable>
				<variable id="$SY">($SY $X)</variable>
				<variable id="$WJ">($WJ $X)</variable>
				<variable id="$XX">($XX $X)</variable>
				<variable id="$RI">($RI $X)</variable>
				<variable id="$EB">($EB $X)</variable>
				<variable id="$EM">($EM $X)</variable>
				<variable id="$EmojiNRK">($EmojiNRK $X)</variable>
				<variable id="$Extended_Pict">($Extended_Pict $X)</variable>
				<!-- OUT OF ORDER ON PURPOSE -->
				<!-- LB 10  Treat any remaining combining mark as AL. -->
				<variable id="$AL">($AL | ^ $CM | (?&lt;=$Spec1_) $CM)</variable>
			</variables>
			<segmentRules>
				<!-- LB 4  Always break after hard line breaks (but never between CR and LF). -->
				<rule id="4"> $BK ÷ </rule>
				<!-- LB 5  Treat CR followed by LF, as well as CR, LF and NL as hard line breaks. -->
				<rule id="5.01"> $CR × $LF </rule>
				<rule id="5.02"> $CR ÷ </rule>
				<rule id="5.03"> $LF ÷ </rule>
				<rule id="5.04"> $NL ÷ </rule>
				<!-- LB 6  Do not break before hard line breaks. -->
				<rule id="6"> × ( $BK | $CR | $LF | $NL ) </rule>
				<!-- LB 7  Do not break before spaces or zero-width space. -->
				<rule id="7.01"> × $SP </rule>
				<rule id="7.02"> × $ZW </rule>
				<!-- LB 8  Break before any character following a zero-width space, even if one or more spaces intervene. -->
				<rule id="8"> $ZW $SP* ÷ </rule>
				<!-- LB 8a  Do not break between a zero width joiner and an ideograph, emoji base or emoji modifier. -->
				<rule id="8.1"> $ZWJ × ($ID | $Extended_Pict | $EmojiNRK) </rule>
				<!-- LB 9  Do not break a combining character sequence; treat it as if it has the LB class of the base character -->
				<!-- in all of the following rules. (Where X is any line break class except SP, BK, CR, LF, NL or ZW.) -->
				<rule id="9"> $Spec2_ × $CM </rule>
				<!-- WARNING: this is done by modifying the variable values for all but SP.... That is, $AL is really ($AI $CM*)! -->
				<!-- LB 11  Do not break before or after WORD JOINER and related characters. -->
				<rule id="11.01"> × $WJ </rule>
				<rule id="11.02"> $WJ × </rule>
				<!-- LB 12  Do not break after NBSP and related characters. -->
				<rule id="12.01"> $GL × </rule>
				<!-- LB 12a  Do not break before NBSP and related characters, except after spaces and hyphens. -->
				<rule id="12.11"> $Spec3a_ × $GL </rule>
				<rule id="12.12"> $Spec3b_ $CM+ × $GL </rule>
				<rule id="12.13"> ^ $CM+ × $GL </rule>
				<!-- LB 13  Do not break before ‘]’ or ‘!’ or ‘;’ or ‘/’, even after spaces. -->
				<!-- Using customization 7. -->
				<rule id="13.01"> × $EX </rule>
				<rule id="13.02"> $Spec4_ × ($CL | $CP | $IS | $SY) </rule>
				<rule id="13.03"> $Spec4_ $CM+ × ($CL | $CP | $IS | $SY) </rule>
				<rule id="13.04"> ^ $CM+ × ($CL | $CP | $IS | $SY) </rule>
				<!-- LB 14  Do not break after ‘[’, even after spaces. -->
				<rule id="14"> $OP $SP* × </rule>
				<!-- LB 15  Do not break within ‘"[’, even with intervening spaces. -->
				<rule id="15"> $QU $SP* × $OP </rule>
				<!-- LB 16  Do not break between closing punctuation and a nonstarter (lb=NS), even with intervening spaces. -->
				<rule id="16"> ($CL | $CP) $SP* × $NS </rule>
				<!-- LB 17  Do not break within ‘——’, even with intervening spaces. -->
				<rule id="17"> $B2 $SP* × $B2 </rule>
				<!-- LB 18  Break after spaces. -->
				<rule id="18"> $SP ÷ </rule>
				<!-- LB 19  Do not break before or after ‘"’. -->
				<rule id="19.01"> × $QU </rule>
				<rule id="19.02"> $QU × </rule>
				<!-- LB 20  Break before and after unresolved CB. -->
				<rule id="20.01"> ÷ $CB </rule>
				<rule id="20.02"> $CB ÷ </rule>
				<!-- LB 21  Do not break before hyphen-minus, other hyphens, fixed-width spaces, small kana and other non-starters, or after acute accents. -->
				<rule id="21.01"> × $BA </rule>
				<rule id="21.02"> × $HY </rule>
				<rule id="21.03"> × $NS </rule>
				<rule id="21.04"> $BB × </rule>
				<!-- LB 21a  Do not break before hyphen-minus, other hyphens, fixed-width spaces, small kana and other non-starters, or after acute accents. -->
				<rule id="21.1"> $HL ($HY | $BA) × </rule>
				<!-- LB 21b CLDR addition, do not break after $SY if followed by $HL (break before already prevented by 13.02 above) -->
				<rule id="21.2"> $SY × $HL </rule>
				<!-- LB 22  Do not break between two ellipses, or between letters or numbers and ellipsis. -->
				<rule id="22.01"> ($AL | $HL) × $IN </rule>
				<rule id="22.02"> ($ID | $EB | $EM) × $IN </rule>
				<rule id="22.03"> $IN × $IN </rule>
				<rule id="22.04"> $NU × $IN </rule>
				<rule id="22.05"> $EX × $IN </rule>
				<!-- LB 23  Do not break within ‘a9’, ‘3a’, or ‘H%’. -->
				<rule id="23.01"> ($AL | $HL) × $NU </rule>
				<rule id="23.02"> $NU × ($AL | $HL) </rule>
				<!-- LB 23a  Do not break wbetween numeric prefixes and ideographs, or between ideographs and numeric postfixes. -->
				<rule id="23.11"> $PR × ($ID | $EB | $EM) </rule>
				<rule id="23.12"> ($ID | $EB | $EM) × $PO </rule>
				<!-- LB 24  Do not break between prefix and letters or ideographs. -->
				<rule id="24.01"> ($PR | $PO) × ($AL | $HL) </rule>
				<rule id="24.02"> ($AL | $HL) × ($PR | $PO) </rule>
				<!-- Using customization 7 -->
				<!-- LB 18  Do not break between the following pairs of classes. -->
				<!-- LB 18-alternative: $PR? ( $OP | $HY )? $NU ($NU | $SY | $IS)* ($CL | $CP)? $PO? -->
				<!-- Insert × every place it could go. However, make sure that at least one thing is concrete, otherwise would cause $NU to not break before or after -->
				<rule id="25.01"> ($PR | $PO) × ( $OP | $HY )? $NU </rule>
				<rule id="25.02"> ( $OP | $HY ) × $NU </rule>
				<rule id="25.03"> $NU × ($NU | $SY | $IS) </rule>
				<rule id="25.04"> $NU ($NU | $SY | $IS)* × ($NU | $SY | $IS | $CL | $CP) </rule>
				<rule id="25.05"> $NU ($NU | $SY | $IS)* ($CL | $CP)? × ($PO | $PR) </rule>
				<!-- LB 26 Do not break a Korean syllable. -->
				<rule id="26.01"> $JL × ($JL | $JV | $H2 | $H3) </rule>
				<rule id="26.02"> ($JV | $H2) × ($JV | $JT) </rule>
				<rule id="26.03"> ($JT | $H3) × $JT </rule>
				<!-- LB 27 Treat a Korean Syllable Block the same as ID. -->
				<rule id="27.01"> ($JL | $JV | $JT | $H2 | $H3) × $IN </rule>
				<rule id="27.02"> ($JL | $JV | $JT | $H2 | $H3) × $PO </rule>
				<rule id="27.03"> $PR × ($JL | $JV | $JT | $H2 | $H3) </rule>
				<!-- LB 28  Do not break between alphabetics ("at"). -->
				<rule id="28"> ($AL | $HL) × ($AL | $HL) </rule>
				<!-- LB 29  Do not break between numeric punctuation and alphabetics ("e.g."). -->
				<rule id="29"> $IS × ($AL | $HL) </rule>
				<!-- LB 30  Do not break between letters, numbers or ordinary symbols and opening or closing punctuation. -->
				<rule id="30.01"> ($AL | $HL | $NU) × $OP </rule>
				<rule id="30.02"> $CP × ($AL | $HL | $NU) </rule>
				<!-- LB 30a  Do not break between Regional Indicators. -->
				<rule id="30.11"> $RI × $RI </rule>
				<!-- LB 30b  Do not break between an emoji base and an emoji modifier.. -->
				<rule id="30.21"> $EB × $EM </rule>
			</segmentRules>
		</segmentation>
		<segmentation type="SentenceBreak">
			<variables>
				<variable id="$CR">\p{Sentence_Break=CR}</variable>
				<variable id="$LF">\p{Sentence_Break=LF}</variable>
				<variable id="$Extend">\p{Sentence_Break=Extend}</variable>
				<variable id="$Format">\p{Sentence_Break=Format}</variable>
				<variable id="$Sep">\p{Sentence_Break=Sep}</variable>
				<variable id="$Sp">\p{Sentence_Break=Sp}</variable>
				<variable id="$Lower">\p{Sentence_Break=Lower}</variable>
				<variable id="$Upper">\p{Sentence_Break=Upper}</variable>
				<variable id="$OLetter">\p{Sentence_Break=OLetter}</variable>
				<variable id="$Numeric">\p{Sentence_Break=Numeric}</variable>
				<variable id="$ATerm">\p{Sentence_Break=ATerm}</variable>
				<variable id="$STerm">\p{Sentence_Break=STerm}</variable>
				<variable id="$Close">\p{Sentence_Break=Close}</variable>
				<variable id="$SContinue">\p{Sentence_Break=SContinue}</variable>
				<variable id="$Any">.</variable>
				<!-- Expresses the negation in rule 8; can't do this with normal regex, but works with UnicodeSet, which is all we need. -->
				<!-- WARNING: For Rule 5, now add format and extend to everything but Sep -->
				<variable id="$FE">[$Format $Extend]</variable>
				<variable id="$NotPreLower_">[^ $OLetter $Upper $Lower $Sep $CR $LF $STerm $ATerm]</variable>
				<variable id="$Sp">($Sp $FE*)</variable>
				<variable id="$Lower">($Lower $FE*)</variable>
				<variable id="$Upper">($Upper $FE*)</variable>
				<variable id="$OLetter">($OLetter $FE*)</variable>
				<variable id="$Numeric">($Numeric $FE*)</variable>
				<variable id="$ATerm">($ATerm $FE*)</variable>
				<variable id="$STerm">($STerm $FE*)</variable>
				<variable id="$Close">($Close $FE*)</variable>
				<variable id="$SContinue">($SContinue $FE*)</variable>
			</variables>
			<segmentRules>
				<!-- Do not break within CRLF -->
				<rule id="3"> $CR × $LF </rule>
				<!-- Break after paragraph separators. -->
				<rule id="4"> ($Sep | $CR | $LF) ÷ </rule>
				<!-- Ignore Format and Extend characters, except when they appear at the beginning of a region of text. -->
				<!-- (See Section 6.2 Grapheme Cluster and Format Rules.) -->
				<!-- WARNING: Implemented as don't break before format (except after linebreaks), -->
				<!-- AND add format and extend in all variables definitions that appear after this point! -->
				<rule id="5"> × [$Format $Extend] </rule>
				<!-- Do not break after ambiguous terminators like period, if immediately followed by a number or lowercase letter, -->
				<!-- is between uppercase letters, or if the first following letter (optionally after certain punctuation) is lowercase. -->
				<!-- For example, a period may be an abbreviation or numeric period, and not mark the end of a sentence. -->
				<rule id="6"> $ATerm × $Numeric </rule>
				<rule id="7"> ($Upper | $Lower) $ATerm × $Upper </rule>
				<rule id="8"> $ATerm $Close* $Sp* × $NotPreLower_* $Lower </rule>
				<rule id="8.1"> ($STerm | $ATerm) $Close* $Sp* × ($SContinue | $STerm | $ATerm) </rule>
				<!-- Break after sentence terminators, but include closing punctuation, trailing spaces, and (optionally) a paragraph separator. -->
				<rule id="9"> ( $STerm | $ATerm ) $Close* × ( $Close | $Sp | $Sep | $CR | $LF ) </rule>
				<!-- Note the fix to $Sp*, $Sep? -->
				<rule id="10"> ( $STerm | $ATerm ) $Close* $Sp* × ( $Sp | $Sep | $CR | $LF ) </rule>
				<rule id="11"> ( $STerm | $ATerm ) $Close* $Sp* ($Sep | $CR | $LF)? ÷ </rule>
				<!-- Otherwise, do not break -->
				<rule id="12"> × $Any </rule>
			</segmentRules>
			<suppressions type="standard">
				<!-- root suppression is empty. -->
			</suppressions>
		</segmentation>
		<segmentation type="WordBreak">
			<variables>
				<variable id="$CR">\p{Word_Break=CR}</variable>
				<variable id="$LF">\p{Word_Break=LF}</variable>
				<variable id="$Newline">\p{Word_Break=Newline}</variable>
				<variable id="$Extend">\p{Word_Break=Extend}</variable>
				<!-- Now normal variables -->
				<variable id="$ZWJ">\p{Word_Break=ZWJ}</variable>
				<variable id="$Format">\p{Word_Break=Format}</variable>
				<variable id="$Katakana">\p{Word_Break=Katakana}</variable>
				<variable id="$Hebrew_Letter">\p{Word_Break=Hebrew_Letter}</variable>
				<variable id="$ALetter">\p{Word_Break=ALetter}</variable>
				<variable id="$Single_Quote">\p{Word_Break=Single_Quote}</variable>
				<variable id="$Double_Quote">\p{Word_Break=Double_Quote}</variable>
				<variable id="$MidNumLet">\p{Word_Break=MidNumLet}</variable>
				<variable id="$MidLetter">\p{Word_Break=MidLetter}</variable>
				<variable id="$MidNum">\p{Word_Break=MidNum}</variable>
				<variable id="$Numeric">\p{Word_Break=Numeric}</variable>
				<variable id="$ExtendNumLet">\p{Word_Break=ExtendNumLet}</variable>
				<variable id="$Regional_Indicator">\p{Word_Break=Regional_Indicator}</variable>
				<variable id="$E_Base">[\p{Word_Break = EB}\U0001F3C2\U0001F3C7\U0001F3CC\U0001F46A-\U0001F46D\U0001F46F\U0001F574\U0001F6CC]</variable>
				<variable id="$E_Modifier">\p{Word_Break=EM}</variable>
				<variable id="$Extended_Pict">[\U0001F774-\U0001F77F\u2700-\u2701\u2703-\u2704\u270E\u2710-\u2711\u2765-\u2767\U0001F030-\U0001F093\U0001F094-\U0001F09F\U0001F10D-\U0001F10F\U0001F12F\U0001F16C-\U0001F16F\U0001F1AD-\U0001F1E5\U0001F203-\U0001F20F\U0001F23C-\U0001F23F\U0001F249-\U0001F24F\U0001F252-\U0001F2FF\U0001F7D5-\U0001F7FF\U0001F000-\U0001F003\U0001F005-\U0001F02B\U0001F02C-\U0001F02F\U0001F322-\U0001F323\U0001F394-\U0001F395\U0001F398\U0001F39C-\U0001F39D\U0001F3F1-\U0001F3F2\U0001F3F6\U0001F4FE\U0001F53E-\U0001F548\U0001F54F\U0001F568-\U0001F56E\U0001F571-\U0001F572\U0001F57B-\U0001F586\U0001F588-\U0001F589\U0001F58E-\U0001F58F\U0001F591-\U0001F594\U0001F597-\U0001F5A3\U0001F5A6-\U0001F5A7\U0001F5A9-\U0001F5B0\U0001F5B3-\U0001F5BB\U0001F5BD-\U0001F5C1\U0001F5C5-\U0001F5D0\U0001F5D4-\U0001F5DB\U0001F5DF-\U0001F5E0\U0001F5E2\U0001F5E4-\U0001F5E7\U0001F5E9-\U0001F5EE\U0001F5F0-\U0001F5F2\U0001F5F4-\U0001F5F9\u2605\u2607-\u260D\u260F-\u2610\u2612\u2616-\u2617\u2619-\u261C\u261E-\u261F\u2621\u2624-\u2625\u2627-\u2629\u262B-\u262D\u2630-\u2637\u263B-\u2647\u2654-\u265F\u2661-\u2662\u2664\u2667\u2669-\u267A\u267C-\u267E\u2680-\u2691\u2695\u2698\u269A\u269D-\u269F\u26A2-\u26A9\u26AC-\u26AF\u26B2-\u26BC\u26BF-\u26C3\u26C6-\u26C7\u26C9-\u26CD\u26D0\u26D2\u26D5-\u26E8\u26EB-\u26EF\u26F6\u26FB-\u26FC\u26FE-\u26FF\u2388\U0001FA00-\U0001FFFD\U0001F0A0-\U0001F0AE\U0001F0B1-\U0001F0BF\U0001F0C1-\U0001F0CF\U0001F0D1-\U0001F0F5\U0001F0AF-\U0001F0B0\U0001F0C0\U0001F0D0\U0001F0F6-\U0001F0FF\U0001F80C-\U0001F80F\U0001F848-\U0001F84F\U0001F85A-\U0001F85F\U0001F888-\U0001F88F\U0001F8AE-\U0001F8FF\U0001F900-\U0001F90F\U0001F91F\U0001F928-\U0001F92F\U0001F931-\U0001F932\U0001F93F\U0001F94C-\U0001F94F\U0001F95F-\U0001F97F\U0001F992-\U0001F9BF\U0001F9C1-\U0001F9FF\U0001F6C6-\U0001F6CA\U0001F6E6-\U0001F6E8\U0001F6EA\U0001F6F1-\U0001F6F2\U0001F6D3-\U0001F6DF\U0001F6ED-\U0001F6EF\U0001F6F7-\U0001F6FF]</variable>
				<variable id="$EBG">\p{Word_Break=EBG}</variable>
				<variable id="$EmojiNRK">[[\p{Emoji}] - [\p{Word_Break=Regional_Indicator}\u002a\u00230-9©®™〰〽]]</variable>
				<!-- WARNING: For Rule 4: Fixes for GC, Format -->
				<!-- Add format,extend,zwj to most everything -->
				<variable id="$FEZ">[$Format $Extend $ZWJ]</variable>
				<variable id="$NotBreak_">[^ $Newline $CR $LF ]</variable>
				<variable id="$Katakana">($Katakana $FEZ*)</variable>
				<variable id="$Hebrew_Letter">($Hebrew_Letter $FEZ*)</variable>
				<variable id="$ALetter">($ALetter $FEZ*)</variable>
				<variable id="$Single_Quote">($Single_Quote $FEZ*)</variable>
				<variable id="$Double_Quote">($Double_Quote $FEZ*)</variable>
				<variable id="$MidNumLet">($MidNumLet $FEZ*)</variable>
				<variable id="$MidLetter">($MidLetter $FEZ*)</variable>
				<variable id="$MidNum">($MidNum $FEZ*)</variable>
				<variable id="$Numeric">($Numeric $FEZ*)</variable>
				<variable id="$ExtendNumLet">($ExtendNumLet $FEZ*)</variable>
				<variable id="$Regional_Indicator">($Regional_Indicator $FEZ*)</variable>
				<variable id="$AHLetter">[$ALetter $Hebrew_Letter]</variable>
				<variable id="$MidNumLetQ">[$MidNumLet $Single_Quote]</variable>
			</variables>
			<segmentRules>
				<rule id="3"> $CR × $LF </rule>
				<rule id="3.1"> ($Newline | $CR | $LF) ÷ </rule>
				<rule id="3.2"> ÷ ($Newline | $CR | $LF) </rule>
				<rule id="3.3"> $ZWJ × ($Extended_Pict | $EmojiNRK) </rule>
				<!-- Ignore Format and Extend characters, except when they appear at the beginning of a region of text. -->
				<!-- (See Section 6.2 Grapheme Cluster and Format Rules.) -->
				<!-- WARNING: Implemented as don't break before format (except after linebreaks), -->
				<!-- AND add format and extend in all variables definitions that appear after this point! -->
				<rule id="4"> $NotBreak_ × ($Extend | $Format | $ZWJ)+ </rule>
				<!-- Vanilla rules -->
				<rule id="5"> $AHLetter × $AHLetter </rule>
				<rule id="6"> $AHLetter × ($MidLetter | $MidNumLetQ) $AHLetter </rule>
				<rule id="7"> $ALetter ($MidLetter | $MidNumLet) × $ALetter </rule>
				<rule id="7.1"> $Hebrew_Letter × $Single_Quote </rule>
				<rule id="7.2"> $Hebrew_Letter × $Double_Quote $Hebrew_Letter </rule>
				<rule id="7.3"> $Hebrew_Letter $Double_Quote × $Hebrew_Letter </rule>
				<rule id="8"> $Numeric × $Numeric </rule>
				<rule id="9"> $AHLetter × $Numeric </rule>
				<rule id="10"> $Numeric × $AHLetter </rule>
				<rule id="11"> $Numeric ($MidNum | $MidNumLetQ) × $Numeric </rule>
				<rule id="12"> $Numeric × ($MidNum | $MidNumLetQ) $Numeric </rule>
				<rule id="13"> $Katakana × $Katakana </rule>
				<rule id="13.1"> ($AHLetter | $Numeric | $Katakana | $ExtendNumLet) × $ExtendNumLet </rule>
				<rule id="13.2"> $ExtendNumLet × ($AHLetter | $Numeric | $Katakana) </rule>
				<rule id="14"> ($E_Base | $EBG) ($Format | $Extend | $ZWJ)* × $E_Modifier </rule>
				<rule id="15"> ^$Regional_Indicator × $Regional_Indicator </rule>
			</segmentRules>
		</segmentation>
	</segmentations>
</ldml>




© 2015 - 2025 Weber Informatics LLC | Privacy Policy