All Downloads are FREE. Search and download functionalities are using the official Maven repository.

net.sf.saxon.str.ToUpper Maven / Gradle / Ivy

There is a newer version: 12.5
Show newest version
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Copyright (c) 2018-2023 Saxonica Limited
// This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0.
// If a copy of the MPL was not distributed with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
// This Source Code Form is "Incompatible With Secondary Licenses", as defined by the Mozilla Public License, v. 2.0.
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////

package net.sf.saxon.str;

import net.sf.saxon.z.IntHashMap;
import net.sf.saxon.z.IntIterator;
import net.sf.saxon.z.IntRangeToIntMap;

/**
 * Class to perform uppercase conversion.
 * 

Note we could use the built-in Java function (the rules are compatible with the XPath rules), but * we would then need to convert the UnicodeString to a Java String; and using our own implementation * is the best approach on .NET.

*/ public class ToUpper { private final static IntRangeToIntMap ranges = new IntRangeToIntMap(750); private final static IntHashMap specials = new IntHashMap<>(100); private final static char[] latin = new char[256]; private static void range(int start, int end, int delta) { ranges.addEntry(start, end, delta); if (end < 256) { for (int i=start; i<=end; i++) { latin[i] = (char)(i+delta); } } } private static void single(int cp, int delta) { range(cp, cp, delta); } private static void special(int cp, String result) { specials.put(cp, result); } private static void init() { // Data generated using stylesheet make-uppercase-table in tools/unicode. // Note, the stylesheet relies on having a conformant implementation of fn:upper-case() to start with! for (int i=0; i<256; i++) { latin[i] = (char)i; } // START GENERATED CONTENT range(97, 122, -32); single(181, 743); special(223, "SS"); range(224, 246, -32); range(248, 254, -32); single(255, 121); single(257, -1); single(259, -1); single(261, -1); single(263, -1); single(265, -1); single(267, -1); single(269, -1); single(271, -1); single(273, -1); single(275, -1); single(277, -1); single(279, -1); single(281, -1); single(283, -1); single(285, -1); single(287, -1); single(289, -1); single(291, -1); single(293, -1); single(295, -1); single(297, -1); single(299, -1); single(301, -1); single(303, -1); single(305, -232); single(307, -1); single(309, -1); single(311, -1); single(314, -1); single(316, -1); single(318, -1); single(320, -1); single(322, -1); single(324, -1); single(326, -1); single(328, -1); special(329, "\u02BCN"); single(331, -1); single(333, -1); single(335, -1); single(337, -1); single(339, -1); single(341, -1); single(343, -1); single(345, -1); single(347, -1); single(349, -1); single(351, -1); single(353, -1); single(355, -1); single(357, -1); single(359, -1); single(361, -1); single(363, -1); single(365, -1); single(367, -1); single(369, -1); single(371, -1); single(373, -1); single(375, -1); single(378, -1); single(380, -1); single(382, -1); single(383, -300); single(384, 195); single(387, -1); single(389, -1); single(392, -1); single(396, -1); single(402, -1); single(405, 97); single(409, -1); single(410, 163); single(414, 130); single(417, -1); single(419, -1); single(421, -1); single(424, -1); single(429, -1); single(432, -1); single(436, -1); single(438, -1); single(441, -1); single(445, -1); single(447, 56); single(453, -1); single(454, -2); single(456, -1); single(457, -2); single(459, -1); single(460, -2); single(462, -1); single(464, -1); single(466, -1); single(468, -1); single(470, -1); single(472, -1); single(474, -1); single(476, -1); single(477, -79); single(479, -1); single(481, -1); single(483, -1); single(485, -1); single(487, -1); single(489, -1); single(491, -1); single(493, -1); single(495, -1); special(496, "J\u030C"); single(498, -1); single(499, -2); single(501, -1); single(505, -1); single(507, -1); single(509, -1); single(511, -1); single(513, -1); single(515, -1); single(517, -1); single(519, -1); single(521, -1); single(523, -1); single(525, -1); single(527, -1); single(529, -1); single(531, -1); single(533, -1); single(535, -1); single(537, -1); single(539, -1); single(541, -1); single(543, -1); single(547, -1); single(549, -1); single(551, -1); single(553, -1); single(555, -1); single(557, -1); single(559, -1); single(561, -1); single(563, -1); single(572, -1); range(575, 576, 10815); single(578, -1); single(583, -1); single(585, -1); single(587, -1); single(589, -1); single(591, -1); single(592, 10783); single(593, 10780); single(594, 10782); single(595, -210); single(596, -206); range(598, 599, -205); single(601, -202); single(603, -203); single(608, -205); single(611, -207); single(613, 42280); single(614, 42308); single(616, -209); single(617, -211); single(619, 10743); single(623, -211); single(625, 10749); single(626, -213); single(629, -214); single(637, 10727); single(640, -218); single(643, -218); single(648, -218); single(649, -69); range(650, 651, -217); single(652, -71); single(658, -219); single(837, 84); single(881, -1); single(883, -1); single(887, -1); range(891, 893, 130); special(912, "\u0399\u0308\u0301"); single(940, -38); range(941, 943, -37); special(944, "\u03A5\u0308\u0301"); range(945, 961, -32); single(962, -31); range(963, 971, -32); single(972, -64); range(973, 974, -63); single(976, -62); single(977, -57); single(981, -47); single(982, -54); single(983, -8); single(985, -1); single(987, -1); single(989, -1); single(991, -1); single(993, -1); single(995, -1); single(997, -1); single(999, -1); single(1001, -1); single(1003, -1); single(1005, -1); single(1007, -1); single(1008, -86); single(1009, -80); single(1010, 7); single(1013, -96); single(1016, -1); single(1019, -1); range(1072, 1103, -32); range(1104, 1119, -80); single(1121, -1); single(1123, -1); single(1125, -1); single(1127, -1); single(1129, -1); single(1131, -1); single(1133, -1); single(1135, -1); single(1137, -1); single(1139, -1); single(1141, -1); single(1143, -1); single(1145, -1); single(1147, -1); single(1149, -1); single(1151, -1); single(1153, -1); single(1163, -1); single(1165, -1); single(1167, -1); single(1169, -1); single(1171, -1); single(1173, -1); single(1175, -1); single(1177, -1); single(1179, -1); single(1181, -1); single(1183, -1); single(1185, -1); single(1187, -1); single(1189, -1); single(1191, -1); single(1193, -1); single(1195, -1); single(1197, -1); single(1199, -1); single(1201, -1); single(1203, -1); single(1205, -1); single(1207, -1); single(1209, -1); single(1211, -1); single(1213, -1); single(1215, -1); single(1218, -1); single(1220, -1); single(1222, -1); single(1224, -1); single(1226, -1); single(1228, -1); single(1230, -1); single(1231, -15); single(1233, -1); single(1235, -1); single(1237, -1); single(1239, -1); single(1241, -1); single(1243, -1); single(1245, -1); single(1247, -1); single(1249, -1); single(1251, -1); single(1253, -1); single(1255, -1); single(1257, -1); single(1259, -1); single(1261, -1); single(1263, -1); single(1265, -1); single(1267, -1); single(1269, -1); single(1271, -1); single(1273, -1); single(1275, -1); single(1277, -1); single(1279, -1); single(1281, -1); single(1283, -1); single(1285, -1); single(1287, -1); single(1289, -1); single(1291, -1); single(1293, -1); single(1295, -1); single(1297, -1); single(1299, -1); single(1301, -1); single(1303, -1); single(1305, -1); single(1307, -1); single(1309, -1); single(1311, -1); single(1313, -1); single(1315, -1); single(1317, -1); single(1319, -1); range(1377, 1414, -48); special(1415, "\u0535\u0552"); single(7545, 35332); single(7549, 3814); single(7681, -1); single(7683, -1); single(7685, -1); single(7687, -1); single(7689, -1); single(7691, -1); single(7693, -1); single(7695, -1); single(7697, -1); single(7699, -1); single(7701, -1); single(7703, -1); single(7705, -1); single(7707, -1); single(7709, -1); single(7711, -1); single(7713, -1); single(7715, -1); single(7717, -1); single(7719, -1); single(7721, -1); single(7723, -1); single(7725, -1); single(7727, -1); single(7729, -1); single(7731, -1); single(7733, -1); single(7735, -1); single(7737, -1); single(7739, -1); single(7741, -1); single(7743, -1); single(7745, -1); single(7747, -1); single(7749, -1); single(7751, -1); single(7753, -1); single(7755, -1); single(7757, -1); single(7759, -1); single(7761, -1); single(7763, -1); single(7765, -1); single(7767, -1); single(7769, -1); single(7771, -1); single(7773, -1); single(7775, -1); single(7777, -1); single(7779, -1); single(7781, -1); single(7783, -1); single(7785, -1); single(7787, -1); single(7789, -1); single(7791, -1); single(7793, -1); single(7795, -1); single(7797, -1); single(7799, -1); single(7801, -1); single(7803, -1); single(7805, -1); single(7807, -1); single(7809, -1); single(7811, -1); single(7813, -1); single(7815, -1); single(7817, -1); single(7819, -1); single(7821, -1); single(7823, -1); single(7825, -1); single(7827, -1); single(7829, -1); special(7830, "H\u0331"); special(7831, "T\u0308"); special(7832, "W\u030A"); special(7833, "Y\u030A"); special(7834, "A\u02BE"); single(7835, -59); single(7841, -1); single(7843, -1); single(7845, -1); single(7847, -1); single(7849, -1); single(7851, -1); single(7853, -1); single(7855, -1); single(7857, -1); single(7859, -1); single(7861, -1); single(7863, -1); single(7865, -1); single(7867, -1); single(7869, -1); single(7871, -1); single(7873, -1); single(7875, -1); single(7877, -1); single(7879, -1); single(7881, -1); single(7883, -1); single(7885, -1); single(7887, -1); single(7889, -1); single(7891, -1); single(7893, -1); single(7895, -1); single(7897, -1); single(7899, -1); single(7901, -1); single(7903, -1); single(7905, -1); single(7907, -1); single(7909, -1); single(7911, -1); single(7913, -1); single(7915, -1); single(7917, -1); single(7919, -1); single(7921, -1); single(7923, -1); single(7925, -1); single(7927, -1); single(7929, -1); single(7931, -1); single(7933, -1); single(7935, -1); range(7936, 7943, 8); range(7952, 7957, 8); range(7968, 7975, 8); range(7984, 7991, 8); range(8000, 8005, 8); special(8016, "\u03A5\u0313"); single(8017, 8); special(8018, "\u03A5\u0313\u0300"); single(8019, 8); special(8020, "\u03A5\u0313\u0301"); single(8021, 8); special(8022, "\u03A5\u0313\u0342"); single(8023, 8); range(8032, 8039, 8); range(8048, 8049, 74); range(8050, 8053, 86); range(8054, 8055, 100); range(8056, 8057, 128); range(8058, 8059, 112); range(8060, 8061, 126); special(8064, "\u1F08\u0399"); special(8065, "\u1F09\u0399"); special(8066, "\u1F0A\u0399"); special(8067, "\u1F0B\u0399"); special(8068, "\u1F0C\u0399"); special(8069, "\u1F0D\u0399"); special(8070, "\u1F0E\u0399"); special(8071, "\u1F0F\u0399"); special(8072, "\u1F08\u0399"); special(8073, "\u1F09\u0399"); special(8074, "\u1F0A\u0399"); special(8075, "\u1F0B\u0399"); special(8076, "\u1F0C\u0399"); special(8077, "\u1F0D\u0399"); special(8078, "\u1F0E\u0399"); special(8079, "\u1F0F\u0399"); special(8080, "\u1F28\u0399"); special(8081, "\u1F29\u0399"); special(8082, "\u1F2A\u0399"); special(8083, "\u1F2B\u0399"); special(8084, "\u1F2C\u0399"); special(8085, "\u1F2D\u0399"); special(8086, "\u1F2E\u0399"); special(8087, "\u1F2F\u0399"); special(8088, "\u1F28\u0399"); special(8089, "\u1F29\u0399"); special(8090, "\u1F2A\u0399"); special(8091, "\u1F2B\u0399"); special(8092, "\u1F2C\u0399"); special(8093, "\u1F2D\u0399"); special(8094, "\u1F2E\u0399"); special(8095, "\u1F2F\u0399"); special(8096, "\u1F68\u0399"); special(8097, "\u1F69\u0399"); special(8098, "\u1F6A\u0399"); special(8099, "\u1F6B\u0399"); special(8100, "\u1F6C\u0399"); special(8101, "\u1F6D\u0399"); special(8102, "\u1F6E\u0399"); special(8103, "\u1F6F\u0399"); special(8104, "\u1F68\u0399"); special(8105, "\u1F69\u0399"); special(8106, "\u1F6A\u0399"); special(8107, "\u1F6B\u0399"); special(8108, "\u1F6C\u0399"); special(8109, "\u1F6D\u0399"); special(8110, "\u1F6E\u0399"); special(8111, "\u1F6F\u0399"); range(8112, 8113, 8); special(8114, "\u1FBA\u0399"); special(8115, "\u0391\u0399"); special(8116, "\u0386\u0399"); special(8118, "\u0391\u0342"); special(8119, "\u0391\u0342\u0399"); special(8124, "\u0391\u0399"); single(8126, -7205); special(8130, "\u1FCA\u0399"); special(8131, "\u0397\u0399"); special(8132, "\u0389\u0399"); special(8134, "\u0397\u0342"); special(8135, "\u0397\u0342\u0399"); special(8140, "\u0397\u0399"); range(8144, 8145, 8); special(8146, "\u0399\u0308\u0300"); special(8147, "\u0399\u0308\u0301"); special(8150, "\u0399\u0342"); special(8151, "\u0399\u0308\u0342"); range(8160, 8161, 8); special(8162, "\u03A5\u0308\u0300"); special(8163, "\u03A5\u0308\u0301"); special(8164, "\u03A1\u0313"); single(8165, 7); special(8166, "\u03A5\u0342"); special(8167, "\u03A5\u0308\u0342"); special(8178, "\u1FFA\u0399"); special(8179, "\u03A9\u0399"); special(8180, "\u038F\u0399"); special(8182, "\u03A9\u0342"); special(8183, "\u03A9\u0342\u0399"); special(8188, "\u03A9\u0399"); single(8526, -28); range(8560, 8575, -16); single(8580, -1); range(9424, 9449, -26); range(11312, 11358, -48); single(11361, -1); single(11365, -10795); single(11366, -10792); single(11368, -1); single(11370, -1); single(11372, -1); single(11379, -1); single(11382, -1); single(11393, -1); single(11395, -1); single(11397, -1); single(11399, -1); single(11401, -1); single(11403, -1); single(11405, -1); single(11407, -1); single(11409, -1); single(11411, -1); single(11413, -1); single(11415, -1); single(11417, -1); single(11419, -1); single(11421, -1); single(11423, -1); single(11425, -1); single(11427, -1); single(11429, -1); single(11431, -1); single(11433, -1); single(11435, -1); single(11437, -1); single(11439, -1); single(11441, -1); single(11443, -1); single(11445, -1); single(11447, -1); single(11449, -1); single(11451, -1); single(11453, -1); single(11455, -1); single(11457, -1); single(11459, -1); single(11461, -1); single(11463, -1); single(11465, -1); single(11467, -1); single(11469, -1); single(11471, -1); single(11473, -1); single(11475, -1); single(11477, -1); single(11479, -1); single(11481, -1); single(11483, -1); single(11485, -1); single(11487, -1); single(11489, -1); single(11491, -1); single(11500, -1); single(11502, -1); single(11507, -1); range(11520, 11557, -7264); single(11559, -7264); single(11565, -7264); single(42561, -1); single(42563, -1); single(42565, -1); single(42567, -1); single(42569, -1); single(42571, -1); single(42573, -1); single(42575, -1); single(42577, -1); single(42579, -1); single(42581, -1); single(42583, -1); single(42585, -1); single(42587, -1); single(42589, -1); single(42591, -1); single(42593, -1); single(42595, -1); single(42597, -1); single(42599, -1); single(42601, -1); single(42603, -1); single(42605, -1); single(42625, -1); single(42627, -1); single(42629, -1); single(42631, -1); single(42633, -1); single(42635, -1); single(42637, -1); single(42639, -1); single(42641, -1); single(42643, -1); single(42645, -1); single(42647, -1); single(42787, -1); single(42789, -1); single(42791, -1); single(42793, -1); single(42795, -1); single(42797, -1); single(42799, -1); single(42803, -1); single(42805, -1); single(42807, -1); single(42809, -1); single(42811, -1); single(42813, -1); single(42815, -1); single(42817, -1); single(42819, -1); single(42821, -1); single(42823, -1); single(42825, -1); single(42827, -1); single(42829, -1); single(42831, -1); single(42833, -1); single(42835, -1); single(42837, -1); single(42839, -1); single(42841, -1); single(42843, -1); single(42845, -1); single(42847, -1); single(42849, -1); single(42851, -1); single(42853, -1); single(42855, -1); single(42857, -1); single(42859, -1); single(42861, -1); single(42863, -1); single(42874, -1); single(42876, -1); single(42879, -1); single(42881, -1); single(42883, -1); single(42885, -1); single(42887, -1); single(42892, -1); single(42897, -1); single(42899, -1); single(42913, -1); single(42915, -1); single(42917, -1); single(42919, -1); single(42921, -1); special(64256, "FF"); special(64257, "FI"); special(64258, "FL"); special(64259, "FFI"); special(64260, "FFL"); special(64261, "ST"); special(64262, "ST"); special(64275, "\u0544\u0546"); special(64276, "\u0544\u0535"); special(64277, "\u0544\u053B"); special(64278, "\u054E\u0546"); special(64279, "\u0544\u053D"); range(65345, 65370, -32); range(66600, 66639, -40); // END GENERATED CONTENT } static { init(); } public static UnicodeString toUpper(UnicodeString input) { UnicodeBuilder ub = new UnicodeBuilder((int)input.estimatedLength()); IntIterator iter = input.codePoints(); while (iter.hasNext()) { int cp = iter.next(); if (cp < 256 && cp != 223) { // Fast path for latin-1 characters other than eszet ub.append(latin[cp]); } else { String special = specials.get(cp); if (special != null) { ub.append(special); } else { int delta = ranges.get(cp); if (delta == Integer.MIN_VALUE) { ub.append(cp); } else { ub.append(cp + delta); } } } } return ub.toUnicodeString(); } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy