net.sf.saxon.str.ToLower Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of Saxon-HE Show documentation
Show all versions of Saxon-HE Show documentation
The XSLT and XQuery Processor
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Copyright (c) 2018-2022 Saxonica Limited
// This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0.
// If a copy of the MPL was not distributed with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
// This Source Code Form is "Incompatible With Secondary Licenses", as defined by the Mozilla Public License, v. 2.0.
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
package net.sf.saxon.str;
import net.sf.saxon.z.IntHashMap;
import net.sf.saxon.z.IntIterator;
import net.sf.saxon.z.IntRangeToIntMap;
/**
* Class to perform lowercase conversion.
* Note we could use the built-in Java function (the rules are compatible with the XPath rules), but
* we would then need to convert the UnicodeString to a Java String.
*/
public class ToLower {
private final static IntRangeToIntMap ranges = new IntRangeToIntMap(650);
private final static IntHashMap specials = new IntHashMap<>(10); // Actually there's only one, but keep it general
private final static char[] latin = new char[256];
private static void range(int start, int end, int delta) {
ranges.addEntry(start, end, delta);
if (end < 256) {
for (int i=start; i<=end; i++) {
latin[i] = (char)(i+delta);
}
}
}
private static void single(int cp, int delta) {
range(cp, cp, delta);
}
private static void special(int cp, String result) {
specials.put(cp, result);
}
private static void init() {
// Data generated using stylesheet make-uppercase-table in tools/unicode.
// Note, the stylesheet relies on having a conformant implementation of fn:upper-case() to start with!
for (int i=0; i<256; i++) {
latin[i] = (char)i;
}
// START GENERATED CONTENT
range(65, 90, 32);
range(192, 214, 32);
range(216, 222, 32);
single(256, 1);
single(258, 1);
single(260, 1);
single(262, 1);
single(264, 1);
single(266, 1);
single(268, 1);
single(270, 1);
single(272, 1);
single(274, 1);
single(276, 1);
single(278, 1);
single(280, 1);
single(282, 1);
single(284, 1);
single(286, 1);
single(288, 1);
single(290, 1);
single(292, 1);
single(294, 1);
single(296, 1);
single(298, 1);
single(300, 1);
single(302, 1);
special(304, "i\u0307");
single(306, 1);
single(308, 1);
single(310, 1);
single(313, 1);
single(315, 1);
single(317, 1);
single(319, 1);
single(321, 1);
single(323, 1);
single(325, 1);
single(327, 1);
single(330, 1);
single(332, 1);
single(334, 1);
single(336, 1);
single(338, 1);
single(340, 1);
single(342, 1);
single(344, 1);
single(346, 1);
single(348, 1);
single(350, 1);
single(352, 1);
single(354, 1);
single(356, 1);
single(358, 1);
single(360, 1);
single(362, 1);
single(364, 1);
single(366, 1);
single(368, 1);
single(370, 1);
single(372, 1);
single(374, 1);
single(376, -121);
single(377, 1);
single(379, 1);
single(381, 1);
single(385, 210);
single(386, 1);
single(388, 1);
single(390, 206);
single(391, 1);
range(393, 394, 205);
single(395, 1);
single(398, 79);
single(399, 202);
single(400, 203);
single(401, 1);
single(403, 205);
single(404, 207);
single(406, 211);
single(407, 209);
single(408, 1);
single(412, 211);
single(413, 213);
single(415, 214);
single(416, 1);
single(418, 1);
single(420, 1);
single(422, 218);
single(423, 1);
single(425, 218);
single(428, 1);
single(430, 218);
single(431, 1);
range(433, 434, 217);
single(435, 1);
single(437, 1);
single(439, 219);
single(440, 1);
single(444, 1);
single(452, 2);
single(453, 1);
single(455, 2);
single(456, 1);
single(458, 2);
single(459, 1);
single(461, 1);
single(463, 1);
single(465, 1);
single(467, 1);
single(469, 1);
single(471, 1);
single(473, 1);
single(475, 1);
single(478, 1);
single(480, 1);
single(482, 1);
single(484, 1);
single(486, 1);
single(488, 1);
single(490, 1);
single(492, 1);
single(494, 1);
single(497, 2);
single(498, 1);
single(500, 1);
single(502, -97);
single(503, -56);
single(504, 1);
single(506, 1);
single(508, 1);
single(510, 1);
single(512, 1);
single(514, 1);
single(516, 1);
single(518, 1);
single(520, 1);
single(522, 1);
single(524, 1);
single(526, 1);
single(528, 1);
single(530, 1);
single(532, 1);
single(534, 1);
single(536, 1);
single(538, 1);
single(540, 1);
single(542, 1);
single(544, -130);
single(546, 1);
single(548, 1);
single(550, 1);
single(552, 1);
single(554, 1);
single(556, 1);
single(558, 1);
single(560, 1);
single(562, 1);
single(570, 10795);
single(571, 1);
single(573, -163);
single(574, 10792);
single(577, 1);
single(579, -195);
single(580, 69);
single(581, 71);
single(582, 1);
single(584, 1);
single(586, 1);
single(588, 1);
single(590, 1);
single(880, 1);
single(882, 1);
single(886, 1);
single(902, 38);
range(904, 906, 37);
single(908, 64);
range(910, 911, 63);
range(913, 929, 32);
range(931, 939, 32);
single(975, 8);
single(984, 1);
single(986, 1);
single(988, 1);
single(990, 1);
single(992, 1);
single(994, 1);
single(996, 1);
single(998, 1);
single(1000, 1);
single(1002, 1);
single(1004, 1);
single(1006, 1);
single(1012, -60);
single(1015, 1);
single(1017, -7);
single(1018, 1);
range(1021, 1023, -130);
range(1024, 1039, 80);
range(1040, 1071, 32);
single(1120, 1);
single(1122, 1);
single(1124, 1);
single(1126, 1);
single(1128, 1);
single(1130, 1);
single(1132, 1);
single(1134, 1);
single(1136, 1);
single(1138, 1);
single(1140, 1);
single(1142, 1);
single(1144, 1);
single(1146, 1);
single(1148, 1);
single(1150, 1);
single(1152, 1);
single(1162, 1);
single(1164, 1);
single(1166, 1);
single(1168, 1);
single(1170, 1);
single(1172, 1);
single(1174, 1);
single(1176, 1);
single(1178, 1);
single(1180, 1);
single(1182, 1);
single(1184, 1);
single(1186, 1);
single(1188, 1);
single(1190, 1);
single(1192, 1);
single(1194, 1);
single(1196, 1);
single(1198, 1);
single(1200, 1);
single(1202, 1);
single(1204, 1);
single(1206, 1);
single(1208, 1);
single(1210, 1);
single(1212, 1);
single(1214, 1);
single(1216, 15);
single(1217, 1);
single(1219, 1);
single(1221, 1);
single(1223, 1);
single(1225, 1);
single(1227, 1);
single(1229, 1);
single(1232, 1);
single(1234, 1);
single(1236, 1);
single(1238, 1);
single(1240, 1);
single(1242, 1);
single(1244, 1);
single(1246, 1);
single(1248, 1);
single(1250, 1);
single(1252, 1);
single(1254, 1);
single(1256, 1);
single(1258, 1);
single(1260, 1);
single(1262, 1);
single(1264, 1);
single(1266, 1);
single(1268, 1);
single(1270, 1);
single(1272, 1);
single(1274, 1);
single(1276, 1);
single(1278, 1);
single(1280, 1);
single(1282, 1);
single(1284, 1);
single(1286, 1);
single(1288, 1);
single(1290, 1);
single(1292, 1);
single(1294, 1);
single(1296, 1);
single(1298, 1);
single(1300, 1);
single(1302, 1);
single(1304, 1);
single(1306, 1);
single(1308, 1);
single(1310, 1);
single(1312, 1);
single(1314, 1);
single(1316, 1);
single(1318, 1);
range(1329, 1366, 48);
range(4256, 4293, 7264);
single(4295, 7264);
single(4301, 7264);
single(7680, 1);
single(7682, 1);
single(7684, 1);
single(7686, 1);
single(7688, 1);
single(7690, 1);
single(7692, 1);
single(7694, 1);
single(7696, 1);
single(7698, 1);
single(7700, 1);
single(7702, 1);
single(7704, 1);
single(7706, 1);
single(7708, 1);
single(7710, 1);
single(7712, 1);
single(7714, 1);
single(7716, 1);
single(7718, 1);
single(7720, 1);
single(7722, 1);
single(7724, 1);
single(7726, 1);
single(7728, 1);
single(7730, 1);
single(7732, 1);
single(7734, 1);
single(7736, 1);
single(7738, 1);
single(7740, 1);
single(7742, 1);
single(7744, 1);
single(7746, 1);
single(7748, 1);
single(7750, 1);
single(7752, 1);
single(7754, 1);
single(7756, 1);
single(7758, 1);
single(7760, 1);
single(7762, 1);
single(7764, 1);
single(7766, 1);
single(7768, 1);
single(7770, 1);
single(7772, 1);
single(7774, 1);
single(7776, 1);
single(7778, 1);
single(7780, 1);
single(7782, 1);
single(7784, 1);
single(7786, 1);
single(7788, 1);
single(7790, 1);
single(7792, 1);
single(7794, 1);
single(7796, 1);
single(7798, 1);
single(7800, 1);
single(7802, 1);
single(7804, 1);
single(7806, 1);
single(7808, 1);
single(7810, 1);
single(7812, 1);
single(7814, 1);
single(7816, 1);
single(7818, 1);
single(7820, 1);
single(7822, 1);
single(7824, 1);
single(7826, 1);
single(7828, 1);
single(7838, -7615);
single(7840, 1);
single(7842, 1);
single(7844, 1);
single(7846, 1);
single(7848, 1);
single(7850, 1);
single(7852, 1);
single(7854, 1);
single(7856, 1);
single(7858, 1);
single(7860, 1);
single(7862, 1);
single(7864, 1);
single(7866, 1);
single(7868, 1);
single(7870, 1);
single(7872, 1);
single(7874, 1);
single(7876, 1);
single(7878, 1);
single(7880, 1);
single(7882, 1);
single(7884, 1);
single(7886, 1);
single(7888, 1);
single(7890, 1);
single(7892, 1);
single(7894, 1);
single(7896, 1);
single(7898, 1);
single(7900, 1);
single(7902, 1);
single(7904, 1);
single(7906, 1);
single(7908, 1);
single(7910, 1);
single(7912, 1);
single(7914, 1);
single(7916, 1);
single(7918, 1);
single(7920, 1);
single(7922, 1);
single(7924, 1);
single(7926, 1);
single(7928, 1);
single(7930, 1);
single(7932, 1);
single(7934, 1);
range(7944, 7951, -8);
range(7960, 7965, -8);
range(7976, 7983, -8);
range(7992, 7999, -8);
range(8008, 8013, -8);
single(8025, -8);
single(8027, -8);
single(8029, -8);
single(8031, -8);
range(8040, 8047, -8);
range(8072, 8079, -8);
range(8088, 8095, -8);
range(8104, 8111, -8);
range(8120, 8121, -8);
range(8122, 8123, -74);
single(8124, -9);
range(8136, 8139, -86);
single(8140, -9);
range(8152, 8153, -8);
range(8154, 8155, -100);
range(8168, 8169, -8);
range(8170, 8171, -112);
single(8172, -7);
range(8184, 8185, -128);
range(8186, 8187, -126);
single(8188, -9);
single(8486, -7517);
single(8490, -8383);
single(8491, -8262);
single(8498, 28);
range(8544, 8559, 16);
single(8579, 1);
range(9398, 9423, 26);
range(11264, 11310, 48);
single(11360, 1);
single(11362, -10743);
single(11363, -3814);
single(11364, -10727);
single(11367, 1);
single(11369, 1);
single(11371, 1);
single(11373, -10780);
single(11374, -10749);
single(11375, -10783);
single(11376, -10782);
single(11378, 1);
single(11381, 1);
range(11390, 11391, -10815);
single(11392, 1);
single(11394, 1);
single(11396, 1);
single(11398, 1);
single(11400, 1);
single(11402, 1);
single(11404, 1);
single(11406, 1);
single(11408, 1);
single(11410, 1);
single(11412, 1);
single(11414, 1);
single(11416, 1);
single(11418, 1);
single(11420, 1);
single(11422, 1);
single(11424, 1);
single(11426, 1);
single(11428, 1);
single(11430, 1);
single(11432, 1);
single(11434, 1);
single(11436, 1);
single(11438, 1);
single(11440, 1);
single(11442, 1);
single(11444, 1);
single(11446, 1);
single(11448, 1);
single(11450, 1);
single(11452, 1);
single(11454, 1);
single(11456, 1);
single(11458, 1);
single(11460, 1);
single(11462, 1);
single(11464, 1);
single(11466, 1);
single(11468, 1);
single(11470, 1);
single(11472, 1);
single(11474, 1);
single(11476, 1);
single(11478, 1);
single(11480, 1);
single(11482, 1);
single(11484, 1);
single(11486, 1);
single(11488, 1);
single(11490, 1);
single(11499, 1);
single(11501, 1);
single(11506, 1);
single(42560, 1);
single(42562, 1);
single(42564, 1);
single(42566, 1);
single(42568, 1);
single(42570, 1);
single(42572, 1);
single(42574, 1);
single(42576, 1);
single(42578, 1);
single(42580, 1);
single(42582, 1);
single(42584, 1);
single(42586, 1);
single(42588, 1);
single(42590, 1);
single(42592, 1);
single(42594, 1);
single(42596, 1);
single(42598, 1);
single(42600, 1);
single(42602, 1);
single(42604, 1);
single(42624, 1);
single(42626, 1);
single(42628, 1);
single(42630, 1);
single(42632, 1);
single(42634, 1);
single(42636, 1);
single(42638, 1);
single(42640, 1);
single(42642, 1);
single(42644, 1);
single(42646, 1);
single(42786, 1);
single(42788, 1);
single(42790, 1);
single(42792, 1);
single(42794, 1);
single(42796, 1);
single(42798, 1);
single(42802, 1);
single(42804, 1);
single(42806, 1);
single(42808, 1);
single(42810, 1);
single(42812, 1);
single(42814, 1);
single(42816, 1);
single(42818, 1);
single(42820, 1);
single(42822, 1);
single(42824, 1);
single(42826, 1);
single(42828, 1);
single(42830, 1);
single(42832, 1);
single(42834, 1);
single(42836, 1);
single(42838, 1);
single(42840, 1);
single(42842, 1);
single(42844, 1);
single(42846, 1);
single(42848, 1);
single(42850, 1);
single(42852, 1);
single(42854, 1);
single(42856, 1);
single(42858, 1);
single(42860, 1);
single(42862, 1);
single(42873, 1);
single(42875, 1);
single(42877, -35332);
single(42878, 1);
single(42880, 1);
single(42882, 1);
single(42884, 1);
single(42886, 1);
single(42891, 1);
single(42893, -42280);
single(42896, 1);
single(42898, 1);
single(42912, 1);
single(42914, 1);
single(42916, 1);
single(42918, 1);
single(42920, 1);
single(42922, -42308);
range(65313, 65338, 32);
range(66560, 66599, 40);
// END GENERATED CONTENT
}
static {
init();
}
public static UnicodeString toLower(UnicodeString input) {
UnicodeBuilder ub = new UnicodeBuilder((int)input.estimatedLength());
IntIterator iter = input.codePoints();
while (iter.hasNext()) {
int cp = iter.next();
if (cp < 256) {
// Fast path for latin-1 characters
ub.append(latin[cp]);
} else {
String special = specials.get(cp);
if (special != null) {
ub.append(special);
} else {
int delta = ranges.get(cp);
if (delta == Integer.MIN_VALUE) {
ub.append(cp);
} else {
ub.append(cp + delta);
}
}
}
}
return ub.toUnicodeString();
}
}