net.sf.saxon.str.ToUpper Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of Saxon-HE Show documentation
Show all versions of Saxon-HE Show documentation
The XSLT and XQuery Processor
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Copyright (c) 2018-2023 Saxonica Limited
// This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0.
// If a copy of the MPL was not distributed with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
// This Source Code Form is "Incompatible With Secondary Licenses", as defined by the Mozilla Public License, v. 2.0.
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
package net.sf.saxon.str;
import net.sf.saxon.z.IntHashMap;
import net.sf.saxon.z.IntIterator;
import net.sf.saxon.z.IntRangeToIntMap;
/**
* Class to perform uppercase conversion.
* Note we could use the built-in Java function (the rules are compatible with the XPath rules), but
* we would then need to convert the UnicodeString to a Java String; and using our own implementation
* is the best approach on .NET.
*/
public class ToUpper {
private final static IntRangeToIntMap ranges = new IntRangeToIntMap(750);
private final static IntHashMap specials = new IntHashMap<>(100);
private final static char[] latin = new char[256];
private static void range(int start, int end, int delta) {
ranges.addEntry(start, end, delta);
if (end < 256) {
for (int i=start; i<=end; i++) {
latin[i] = (char)(i+delta);
}
}
}
private static void single(int cp, int delta) {
range(cp, cp, delta);
}
private static void special(int cp, String result) {
specials.put(cp, result);
}
private static void init() {
// Data generated using stylesheet make-uppercase-table in tools/unicode.
// Note, the stylesheet relies on having a conformant implementation of fn:upper-case() to start with!
for (int i=0; i<256; i++) {
latin[i] = (char)i;
}
// START GENERATED CONTENT
range(97, 122, -32);
single(181, 743);
special(223, "SS");
range(224, 246, -32);
range(248, 254, -32);
single(255, 121);
single(257, -1);
single(259, -1);
single(261, -1);
single(263, -1);
single(265, -1);
single(267, -1);
single(269, -1);
single(271, -1);
single(273, -1);
single(275, -1);
single(277, -1);
single(279, -1);
single(281, -1);
single(283, -1);
single(285, -1);
single(287, -1);
single(289, -1);
single(291, -1);
single(293, -1);
single(295, -1);
single(297, -1);
single(299, -1);
single(301, -1);
single(303, -1);
single(305, -232);
single(307, -1);
single(309, -1);
single(311, -1);
single(314, -1);
single(316, -1);
single(318, -1);
single(320, -1);
single(322, -1);
single(324, -1);
single(326, -1);
single(328, -1);
special(329, "\u02BCN");
single(331, -1);
single(333, -1);
single(335, -1);
single(337, -1);
single(339, -1);
single(341, -1);
single(343, -1);
single(345, -1);
single(347, -1);
single(349, -1);
single(351, -1);
single(353, -1);
single(355, -1);
single(357, -1);
single(359, -1);
single(361, -1);
single(363, -1);
single(365, -1);
single(367, -1);
single(369, -1);
single(371, -1);
single(373, -1);
single(375, -1);
single(378, -1);
single(380, -1);
single(382, -1);
single(383, -300);
single(384, 195);
single(387, -1);
single(389, -1);
single(392, -1);
single(396, -1);
single(402, -1);
single(405, 97);
single(409, -1);
single(410, 163);
single(414, 130);
single(417, -1);
single(419, -1);
single(421, -1);
single(424, -1);
single(429, -1);
single(432, -1);
single(436, -1);
single(438, -1);
single(441, -1);
single(445, -1);
single(447, 56);
single(453, -1);
single(454, -2);
single(456, -1);
single(457, -2);
single(459, -1);
single(460, -2);
single(462, -1);
single(464, -1);
single(466, -1);
single(468, -1);
single(470, -1);
single(472, -1);
single(474, -1);
single(476, -1);
single(477, -79);
single(479, -1);
single(481, -1);
single(483, -1);
single(485, -1);
single(487, -1);
single(489, -1);
single(491, -1);
single(493, -1);
single(495, -1);
special(496, "J\u030C");
single(498, -1);
single(499, -2);
single(501, -1);
single(505, -1);
single(507, -1);
single(509, -1);
single(511, -1);
single(513, -1);
single(515, -1);
single(517, -1);
single(519, -1);
single(521, -1);
single(523, -1);
single(525, -1);
single(527, -1);
single(529, -1);
single(531, -1);
single(533, -1);
single(535, -1);
single(537, -1);
single(539, -1);
single(541, -1);
single(543, -1);
single(547, -1);
single(549, -1);
single(551, -1);
single(553, -1);
single(555, -1);
single(557, -1);
single(559, -1);
single(561, -1);
single(563, -1);
single(572, -1);
range(575, 576, 10815);
single(578, -1);
single(583, -1);
single(585, -1);
single(587, -1);
single(589, -1);
single(591, -1);
single(592, 10783);
single(593, 10780);
single(594, 10782);
single(595, -210);
single(596, -206);
range(598, 599, -205);
single(601, -202);
single(603, -203);
single(608, -205);
single(611, -207);
single(613, 42280);
single(614, 42308);
single(616, -209);
single(617, -211);
single(619, 10743);
single(623, -211);
single(625, 10749);
single(626, -213);
single(629, -214);
single(637, 10727);
single(640, -218);
single(643, -218);
single(648, -218);
single(649, -69);
range(650, 651, -217);
single(652, -71);
single(658, -219);
single(837, 84);
single(881, -1);
single(883, -1);
single(887, -1);
range(891, 893, 130);
special(912, "\u0399\u0308\u0301");
single(940, -38);
range(941, 943, -37);
special(944, "\u03A5\u0308\u0301");
range(945, 961, -32);
single(962, -31);
range(963, 971, -32);
single(972, -64);
range(973, 974, -63);
single(976, -62);
single(977, -57);
single(981, -47);
single(982, -54);
single(983, -8);
single(985, -1);
single(987, -1);
single(989, -1);
single(991, -1);
single(993, -1);
single(995, -1);
single(997, -1);
single(999, -1);
single(1001, -1);
single(1003, -1);
single(1005, -1);
single(1007, -1);
single(1008, -86);
single(1009, -80);
single(1010, 7);
single(1013, -96);
single(1016, -1);
single(1019, -1);
range(1072, 1103, -32);
range(1104, 1119, -80);
single(1121, -1);
single(1123, -1);
single(1125, -1);
single(1127, -1);
single(1129, -1);
single(1131, -1);
single(1133, -1);
single(1135, -1);
single(1137, -1);
single(1139, -1);
single(1141, -1);
single(1143, -1);
single(1145, -1);
single(1147, -1);
single(1149, -1);
single(1151, -1);
single(1153, -1);
single(1163, -1);
single(1165, -1);
single(1167, -1);
single(1169, -1);
single(1171, -1);
single(1173, -1);
single(1175, -1);
single(1177, -1);
single(1179, -1);
single(1181, -1);
single(1183, -1);
single(1185, -1);
single(1187, -1);
single(1189, -1);
single(1191, -1);
single(1193, -1);
single(1195, -1);
single(1197, -1);
single(1199, -1);
single(1201, -1);
single(1203, -1);
single(1205, -1);
single(1207, -1);
single(1209, -1);
single(1211, -1);
single(1213, -1);
single(1215, -1);
single(1218, -1);
single(1220, -1);
single(1222, -1);
single(1224, -1);
single(1226, -1);
single(1228, -1);
single(1230, -1);
single(1231, -15);
single(1233, -1);
single(1235, -1);
single(1237, -1);
single(1239, -1);
single(1241, -1);
single(1243, -1);
single(1245, -1);
single(1247, -1);
single(1249, -1);
single(1251, -1);
single(1253, -1);
single(1255, -1);
single(1257, -1);
single(1259, -1);
single(1261, -1);
single(1263, -1);
single(1265, -1);
single(1267, -1);
single(1269, -1);
single(1271, -1);
single(1273, -1);
single(1275, -1);
single(1277, -1);
single(1279, -1);
single(1281, -1);
single(1283, -1);
single(1285, -1);
single(1287, -1);
single(1289, -1);
single(1291, -1);
single(1293, -1);
single(1295, -1);
single(1297, -1);
single(1299, -1);
single(1301, -1);
single(1303, -1);
single(1305, -1);
single(1307, -1);
single(1309, -1);
single(1311, -1);
single(1313, -1);
single(1315, -1);
single(1317, -1);
single(1319, -1);
range(1377, 1414, -48);
special(1415, "\u0535\u0552");
single(7545, 35332);
single(7549, 3814);
single(7681, -1);
single(7683, -1);
single(7685, -1);
single(7687, -1);
single(7689, -1);
single(7691, -1);
single(7693, -1);
single(7695, -1);
single(7697, -1);
single(7699, -1);
single(7701, -1);
single(7703, -1);
single(7705, -1);
single(7707, -1);
single(7709, -1);
single(7711, -1);
single(7713, -1);
single(7715, -1);
single(7717, -1);
single(7719, -1);
single(7721, -1);
single(7723, -1);
single(7725, -1);
single(7727, -1);
single(7729, -1);
single(7731, -1);
single(7733, -1);
single(7735, -1);
single(7737, -1);
single(7739, -1);
single(7741, -1);
single(7743, -1);
single(7745, -1);
single(7747, -1);
single(7749, -1);
single(7751, -1);
single(7753, -1);
single(7755, -1);
single(7757, -1);
single(7759, -1);
single(7761, -1);
single(7763, -1);
single(7765, -1);
single(7767, -1);
single(7769, -1);
single(7771, -1);
single(7773, -1);
single(7775, -1);
single(7777, -1);
single(7779, -1);
single(7781, -1);
single(7783, -1);
single(7785, -1);
single(7787, -1);
single(7789, -1);
single(7791, -1);
single(7793, -1);
single(7795, -1);
single(7797, -1);
single(7799, -1);
single(7801, -1);
single(7803, -1);
single(7805, -1);
single(7807, -1);
single(7809, -1);
single(7811, -1);
single(7813, -1);
single(7815, -1);
single(7817, -1);
single(7819, -1);
single(7821, -1);
single(7823, -1);
single(7825, -1);
single(7827, -1);
single(7829, -1);
special(7830, "H\u0331");
special(7831, "T\u0308");
special(7832, "W\u030A");
special(7833, "Y\u030A");
special(7834, "A\u02BE");
single(7835, -59);
single(7841, -1);
single(7843, -1);
single(7845, -1);
single(7847, -1);
single(7849, -1);
single(7851, -1);
single(7853, -1);
single(7855, -1);
single(7857, -1);
single(7859, -1);
single(7861, -1);
single(7863, -1);
single(7865, -1);
single(7867, -1);
single(7869, -1);
single(7871, -1);
single(7873, -1);
single(7875, -1);
single(7877, -1);
single(7879, -1);
single(7881, -1);
single(7883, -1);
single(7885, -1);
single(7887, -1);
single(7889, -1);
single(7891, -1);
single(7893, -1);
single(7895, -1);
single(7897, -1);
single(7899, -1);
single(7901, -1);
single(7903, -1);
single(7905, -1);
single(7907, -1);
single(7909, -1);
single(7911, -1);
single(7913, -1);
single(7915, -1);
single(7917, -1);
single(7919, -1);
single(7921, -1);
single(7923, -1);
single(7925, -1);
single(7927, -1);
single(7929, -1);
single(7931, -1);
single(7933, -1);
single(7935, -1);
range(7936, 7943, 8);
range(7952, 7957, 8);
range(7968, 7975, 8);
range(7984, 7991, 8);
range(8000, 8005, 8);
special(8016, "\u03A5\u0313");
single(8017, 8);
special(8018, "\u03A5\u0313\u0300");
single(8019, 8);
special(8020, "\u03A5\u0313\u0301");
single(8021, 8);
special(8022, "\u03A5\u0313\u0342");
single(8023, 8);
range(8032, 8039, 8);
range(8048, 8049, 74);
range(8050, 8053, 86);
range(8054, 8055, 100);
range(8056, 8057, 128);
range(8058, 8059, 112);
range(8060, 8061, 126);
special(8064, "\u1F08\u0399");
special(8065, "\u1F09\u0399");
special(8066, "\u1F0A\u0399");
special(8067, "\u1F0B\u0399");
special(8068, "\u1F0C\u0399");
special(8069, "\u1F0D\u0399");
special(8070, "\u1F0E\u0399");
special(8071, "\u1F0F\u0399");
special(8072, "\u1F08\u0399");
special(8073, "\u1F09\u0399");
special(8074, "\u1F0A\u0399");
special(8075, "\u1F0B\u0399");
special(8076, "\u1F0C\u0399");
special(8077, "\u1F0D\u0399");
special(8078, "\u1F0E\u0399");
special(8079, "\u1F0F\u0399");
special(8080, "\u1F28\u0399");
special(8081, "\u1F29\u0399");
special(8082, "\u1F2A\u0399");
special(8083, "\u1F2B\u0399");
special(8084, "\u1F2C\u0399");
special(8085, "\u1F2D\u0399");
special(8086, "\u1F2E\u0399");
special(8087, "\u1F2F\u0399");
special(8088, "\u1F28\u0399");
special(8089, "\u1F29\u0399");
special(8090, "\u1F2A\u0399");
special(8091, "\u1F2B\u0399");
special(8092, "\u1F2C\u0399");
special(8093, "\u1F2D\u0399");
special(8094, "\u1F2E\u0399");
special(8095, "\u1F2F\u0399");
special(8096, "\u1F68\u0399");
special(8097, "\u1F69\u0399");
special(8098, "\u1F6A\u0399");
special(8099, "\u1F6B\u0399");
special(8100, "\u1F6C\u0399");
special(8101, "\u1F6D\u0399");
special(8102, "\u1F6E\u0399");
special(8103, "\u1F6F\u0399");
special(8104, "\u1F68\u0399");
special(8105, "\u1F69\u0399");
special(8106, "\u1F6A\u0399");
special(8107, "\u1F6B\u0399");
special(8108, "\u1F6C\u0399");
special(8109, "\u1F6D\u0399");
special(8110, "\u1F6E\u0399");
special(8111, "\u1F6F\u0399");
range(8112, 8113, 8);
special(8114, "\u1FBA\u0399");
special(8115, "\u0391\u0399");
special(8116, "\u0386\u0399");
special(8118, "\u0391\u0342");
special(8119, "\u0391\u0342\u0399");
special(8124, "\u0391\u0399");
single(8126, -7205);
special(8130, "\u1FCA\u0399");
special(8131, "\u0397\u0399");
special(8132, "\u0389\u0399");
special(8134, "\u0397\u0342");
special(8135, "\u0397\u0342\u0399");
special(8140, "\u0397\u0399");
range(8144, 8145, 8);
special(8146, "\u0399\u0308\u0300");
special(8147, "\u0399\u0308\u0301");
special(8150, "\u0399\u0342");
special(8151, "\u0399\u0308\u0342");
range(8160, 8161, 8);
special(8162, "\u03A5\u0308\u0300");
special(8163, "\u03A5\u0308\u0301");
special(8164, "\u03A1\u0313");
single(8165, 7);
special(8166, "\u03A5\u0342");
special(8167, "\u03A5\u0308\u0342");
special(8178, "\u1FFA\u0399");
special(8179, "\u03A9\u0399");
special(8180, "\u038F\u0399");
special(8182, "\u03A9\u0342");
special(8183, "\u03A9\u0342\u0399");
special(8188, "\u03A9\u0399");
single(8526, -28);
range(8560, 8575, -16);
single(8580, -1);
range(9424, 9449, -26);
range(11312, 11358, -48);
single(11361, -1);
single(11365, -10795);
single(11366, -10792);
single(11368, -1);
single(11370, -1);
single(11372, -1);
single(11379, -1);
single(11382, -1);
single(11393, -1);
single(11395, -1);
single(11397, -1);
single(11399, -1);
single(11401, -1);
single(11403, -1);
single(11405, -1);
single(11407, -1);
single(11409, -1);
single(11411, -1);
single(11413, -1);
single(11415, -1);
single(11417, -1);
single(11419, -1);
single(11421, -1);
single(11423, -1);
single(11425, -1);
single(11427, -1);
single(11429, -1);
single(11431, -1);
single(11433, -1);
single(11435, -1);
single(11437, -1);
single(11439, -1);
single(11441, -1);
single(11443, -1);
single(11445, -1);
single(11447, -1);
single(11449, -1);
single(11451, -1);
single(11453, -1);
single(11455, -1);
single(11457, -1);
single(11459, -1);
single(11461, -1);
single(11463, -1);
single(11465, -1);
single(11467, -1);
single(11469, -1);
single(11471, -1);
single(11473, -1);
single(11475, -1);
single(11477, -1);
single(11479, -1);
single(11481, -1);
single(11483, -1);
single(11485, -1);
single(11487, -1);
single(11489, -1);
single(11491, -1);
single(11500, -1);
single(11502, -1);
single(11507, -1);
range(11520, 11557, -7264);
single(11559, -7264);
single(11565, -7264);
single(42561, -1);
single(42563, -1);
single(42565, -1);
single(42567, -1);
single(42569, -1);
single(42571, -1);
single(42573, -1);
single(42575, -1);
single(42577, -1);
single(42579, -1);
single(42581, -1);
single(42583, -1);
single(42585, -1);
single(42587, -1);
single(42589, -1);
single(42591, -1);
single(42593, -1);
single(42595, -1);
single(42597, -1);
single(42599, -1);
single(42601, -1);
single(42603, -1);
single(42605, -1);
single(42625, -1);
single(42627, -1);
single(42629, -1);
single(42631, -1);
single(42633, -1);
single(42635, -1);
single(42637, -1);
single(42639, -1);
single(42641, -1);
single(42643, -1);
single(42645, -1);
single(42647, -1);
single(42787, -1);
single(42789, -1);
single(42791, -1);
single(42793, -1);
single(42795, -1);
single(42797, -1);
single(42799, -1);
single(42803, -1);
single(42805, -1);
single(42807, -1);
single(42809, -1);
single(42811, -1);
single(42813, -1);
single(42815, -1);
single(42817, -1);
single(42819, -1);
single(42821, -1);
single(42823, -1);
single(42825, -1);
single(42827, -1);
single(42829, -1);
single(42831, -1);
single(42833, -1);
single(42835, -1);
single(42837, -1);
single(42839, -1);
single(42841, -1);
single(42843, -1);
single(42845, -1);
single(42847, -1);
single(42849, -1);
single(42851, -1);
single(42853, -1);
single(42855, -1);
single(42857, -1);
single(42859, -1);
single(42861, -1);
single(42863, -1);
single(42874, -1);
single(42876, -1);
single(42879, -1);
single(42881, -1);
single(42883, -1);
single(42885, -1);
single(42887, -1);
single(42892, -1);
single(42897, -1);
single(42899, -1);
single(42913, -1);
single(42915, -1);
single(42917, -1);
single(42919, -1);
single(42921, -1);
special(64256, "FF");
special(64257, "FI");
special(64258, "FL");
special(64259, "FFI");
special(64260, "FFL");
special(64261, "ST");
special(64262, "ST");
special(64275, "\u0544\u0546");
special(64276, "\u0544\u0535");
special(64277, "\u0544\u053B");
special(64278, "\u054E\u0546");
special(64279, "\u0544\u053D");
range(65345, 65370, -32);
range(66600, 66639, -40);
// END GENERATED CONTENT
}
static {
init();
}
public static UnicodeString toUpper(UnicodeString input) {
UnicodeBuilder ub = new UnicodeBuilder((int)input.estimatedLength());
IntIterator iter = input.codePoints();
while (iter.hasNext()) {
int cp = iter.next();
if (cp < 256 && cp != 223) {
// Fast path for latin-1 characters other than eszet
ub.append(latin[cp]);
} else {
String special = specials.get(cp);
if (special != null) {
ub.append(special);
} else {
int delta = ranges.get(cp);
if (delta == Integer.MIN_VALUE) {
ub.append(cp);
} else {
ub.append(cp + delta);
}
}
}
}
return ub.toUnicodeString();
}
}