All Downloads are FREE. Search and download functionalities are using the official Maven repository.

net.sf.saxon.str.ToLower Maven / Gradle / Ivy

There is a newer version: 12.5
Show newest version
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Copyright (c) 2018-2022 Saxonica Limited
// This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0.
// If a copy of the MPL was not distributed with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
// This Source Code Form is "Incompatible With Secondary Licenses", as defined by the Mozilla Public License, v. 2.0.
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////

package net.sf.saxon.str;

import net.sf.saxon.z.IntHashMap;
import net.sf.saxon.z.IntIterator;
import net.sf.saxon.z.IntRangeToIntMap;

/**
 * Class to perform lowercase conversion.
 * 

Note we could use the built-in Java function (the rules are compatible with the XPath rules), but * we would then need to convert the UnicodeString to a Java String.

*/ public class ToLower { private final static IntRangeToIntMap ranges = new IntRangeToIntMap(650); private final static IntHashMap specials = new IntHashMap<>(10); // Actually there's only one, but keep it general private final static char[] latin = new char[256]; private static void range(int start, int end, int delta) { ranges.addEntry(start, end, delta); if (end < 256) { for (int i=start; i<=end; i++) { latin[i] = (char)(i+delta); } } } private static void single(int cp, int delta) { range(cp, cp, delta); } private static void special(int cp, String result) { specials.put(cp, result); } private static void init() { // Data generated using stylesheet make-uppercase-table in tools/unicode. // Note, the stylesheet relies on having a conformant implementation of fn:upper-case() to start with! for (int i=0; i<256; i++) { latin[i] = (char)i; } // START GENERATED CONTENT range(65, 90, 32); range(192, 214, 32); range(216, 222, 32); single(256, 1); single(258, 1); single(260, 1); single(262, 1); single(264, 1); single(266, 1); single(268, 1); single(270, 1); single(272, 1); single(274, 1); single(276, 1); single(278, 1); single(280, 1); single(282, 1); single(284, 1); single(286, 1); single(288, 1); single(290, 1); single(292, 1); single(294, 1); single(296, 1); single(298, 1); single(300, 1); single(302, 1); special(304, "i\u0307"); single(306, 1); single(308, 1); single(310, 1); single(313, 1); single(315, 1); single(317, 1); single(319, 1); single(321, 1); single(323, 1); single(325, 1); single(327, 1); single(330, 1); single(332, 1); single(334, 1); single(336, 1); single(338, 1); single(340, 1); single(342, 1); single(344, 1); single(346, 1); single(348, 1); single(350, 1); single(352, 1); single(354, 1); single(356, 1); single(358, 1); single(360, 1); single(362, 1); single(364, 1); single(366, 1); single(368, 1); single(370, 1); single(372, 1); single(374, 1); single(376, -121); single(377, 1); single(379, 1); single(381, 1); single(385, 210); single(386, 1); single(388, 1); single(390, 206); single(391, 1); range(393, 394, 205); single(395, 1); single(398, 79); single(399, 202); single(400, 203); single(401, 1); single(403, 205); single(404, 207); single(406, 211); single(407, 209); single(408, 1); single(412, 211); single(413, 213); single(415, 214); single(416, 1); single(418, 1); single(420, 1); single(422, 218); single(423, 1); single(425, 218); single(428, 1); single(430, 218); single(431, 1); range(433, 434, 217); single(435, 1); single(437, 1); single(439, 219); single(440, 1); single(444, 1); single(452, 2); single(453, 1); single(455, 2); single(456, 1); single(458, 2); single(459, 1); single(461, 1); single(463, 1); single(465, 1); single(467, 1); single(469, 1); single(471, 1); single(473, 1); single(475, 1); single(478, 1); single(480, 1); single(482, 1); single(484, 1); single(486, 1); single(488, 1); single(490, 1); single(492, 1); single(494, 1); single(497, 2); single(498, 1); single(500, 1); single(502, -97); single(503, -56); single(504, 1); single(506, 1); single(508, 1); single(510, 1); single(512, 1); single(514, 1); single(516, 1); single(518, 1); single(520, 1); single(522, 1); single(524, 1); single(526, 1); single(528, 1); single(530, 1); single(532, 1); single(534, 1); single(536, 1); single(538, 1); single(540, 1); single(542, 1); single(544, -130); single(546, 1); single(548, 1); single(550, 1); single(552, 1); single(554, 1); single(556, 1); single(558, 1); single(560, 1); single(562, 1); single(570, 10795); single(571, 1); single(573, -163); single(574, 10792); single(577, 1); single(579, -195); single(580, 69); single(581, 71); single(582, 1); single(584, 1); single(586, 1); single(588, 1); single(590, 1); single(880, 1); single(882, 1); single(886, 1); single(902, 38); range(904, 906, 37); single(908, 64); range(910, 911, 63); range(913, 929, 32); range(931, 939, 32); single(975, 8); single(984, 1); single(986, 1); single(988, 1); single(990, 1); single(992, 1); single(994, 1); single(996, 1); single(998, 1); single(1000, 1); single(1002, 1); single(1004, 1); single(1006, 1); single(1012, -60); single(1015, 1); single(1017, -7); single(1018, 1); range(1021, 1023, -130); range(1024, 1039, 80); range(1040, 1071, 32); single(1120, 1); single(1122, 1); single(1124, 1); single(1126, 1); single(1128, 1); single(1130, 1); single(1132, 1); single(1134, 1); single(1136, 1); single(1138, 1); single(1140, 1); single(1142, 1); single(1144, 1); single(1146, 1); single(1148, 1); single(1150, 1); single(1152, 1); single(1162, 1); single(1164, 1); single(1166, 1); single(1168, 1); single(1170, 1); single(1172, 1); single(1174, 1); single(1176, 1); single(1178, 1); single(1180, 1); single(1182, 1); single(1184, 1); single(1186, 1); single(1188, 1); single(1190, 1); single(1192, 1); single(1194, 1); single(1196, 1); single(1198, 1); single(1200, 1); single(1202, 1); single(1204, 1); single(1206, 1); single(1208, 1); single(1210, 1); single(1212, 1); single(1214, 1); single(1216, 15); single(1217, 1); single(1219, 1); single(1221, 1); single(1223, 1); single(1225, 1); single(1227, 1); single(1229, 1); single(1232, 1); single(1234, 1); single(1236, 1); single(1238, 1); single(1240, 1); single(1242, 1); single(1244, 1); single(1246, 1); single(1248, 1); single(1250, 1); single(1252, 1); single(1254, 1); single(1256, 1); single(1258, 1); single(1260, 1); single(1262, 1); single(1264, 1); single(1266, 1); single(1268, 1); single(1270, 1); single(1272, 1); single(1274, 1); single(1276, 1); single(1278, 1); single(1280, 1); single(1282, 1); single(1284, 1); single(1286, 1); single(1288, 1); single(1290, 1); single(1292, 1); single(1294, 1); single(1296, 1); single(1298, 1); single(1300, 1); single(1302, 1); single(1304, 1); single(1306, 1); single(1308, 1); single(1310, 1); single(1312, 1); single(1314, 1); single(1316, 1); single(1318, 1); range(1329, 1366, 48); range(4256, 4293, 7264); single(4295, 7264); single(4301, 7264); single(7680, 1); single(7682, 1); single(7684, 1); single(7686, 1); single(7688, 1); single(7690, 1); single(7692, 1); single(7694, 1); single(7696, 1); single(7698, 1); single(7700, 1); single(7702, 1); single(7704, 1); single(7706, 1); single(7708, 1); single(7710, 1); single(7712, 1); single(7714, 1); single(7716, 1); single(7718, 1); single(7720, 1); single(7722, 1); single(7724, 1); single(7726, 1); single(7728, 1); single(7730, 1); single(7732, 1); single(7734, 1); single(7736, 1); single(7738, 1); single(7740, 1); single(7742, 1); single(7744, 1); single(7746, 1); single(7748, 1); single(7750, 1); single(7752, 1); single(7754, 1); single(7756, 1); single(7758, 1); single(7760, 1); single(7762, 1); single(7764, 1); single(7766, 1); single(7768, 1); single(7770, 1); single(7772, 1); single(7774, 1); single(7776, 1); single(7778, 1); single(7780, 1); single(7782, 1); single(7784, 1); single(7786, 1); single(7788, 1); single(7790, 1); single(7792, 1); single(7794, 1); single(7796, 1); single(7798, 1); single(7800, 1); single(7802, 1); single(7804, 1); single(7806, 1); single(7808, 1); single(7810, 1); single(7812, 1); single(7814, 1); single(7816, 1); single(7818, 1); single(7820, 1); single(7822, 1); single(7824, 1); single(7826, 1); single(7828, 1); single(7838, -7615); single(7840, 1); single(7842, 1); single(7844, 1); single(7846, 1); single(7848, 1); single(7850, 1); single(7852, 1); single(7854, 1); single(7856, 1); single(7858, 1); single(7860, 1); single(7862, 1); single(7864, 1); single(7866, 1); single(7868, 1); single(7870, 1); single(7872, 1); single(7874, 1); single(7876, 1); single(7878, 1); single(7880, 1); single(7882, 1); single(7884, 1); single(7886, 1); single(7888, 1); single(7890, 1); single(7892, 1); single(7894, 1); single(7896, 1); single(7898, 1); single(7900, 1); single(7902, 1); single(7904, 1); single(7906, 1); single(7908, 1); single(7910, 1); single(7912, 1); single(7914, 1); single(7916, 1); single(7918, 1); single(7920, 1); single(7922, 1); single(7924, 1); single(7926, 1); single(7928, 1); single(7930, 1); single(7932, 1); single(7934, 1); range(7944, 7951, -8); range(7960, 7965, -8); range(7976, 7983, -8); range(7992, 7999, -8); range(8008, 8013, -8); single(8025, -8); single(8027, -8); single(8029, -8); single(8031, -8); range(8040, 8047, -8); range(8072, 8079, -8); range(8088, 8095, -8); range(8104, 8111, -8); range(8120, 8121, -8); range(8122, 8123, -74); single(8124, -9); range(8136, 8139, -86); single(8140, -9); range(8152, 8153, -8); range(8154, 8155, -100); range(8168, 8169, -8); range(8170, 8171, -112); single(8172, -7); range(8184, 8185, -128); range(8186, 8187, -126); single(8188, -9); single(8486, -7517); single(8490, -8383); single(8491, -8262); single(8498, 28); range(8544, 8559, 16); single(8579, 1); range(9398, 9423, 26); range(11264, 11310, 48); single(11360, 1); single(11362, -10743); single(11363, -3814); single(11364, -10727); single(11367, 1); single(11369, 1); single(11371, 1); single(11373, -10780); single(11374, -10749); single(11375, -10783); single(11376, -10782); single(11378, 1); single(11381, 1); range(11390, 11391, -10815); single(11392, 1); single(11394, 1); single(11396, 1); single(11398, 1); single(11400, 1); single(11402, 1); single(11404, 1); single(11406, 1); single(11408, 1); single(11410, 1); single(11412, 1); single(11414, 1); single(11416, 1); single(11418, 1); single(11420, 1); single(11422, 1); single(11424, 1); single(11426, 1); single(11428, 1); single(11430, 1); single(11432, 1); single(11434, 1); single(11436, 1); single(11438, 1); single(11440, 1); single(11442, 1); single(11444, 1); single(11446, 1); single(11448, 1); single(11450, 1); single(11452, 1); single(11454, 1); single(11456, 1); single(11458, 1); single(11460, 1); single(11462, 1); single(11464, 1); single(11466, 1); single(11468, 1); single(11470, 1); single(11472, 1); single(11474, 1); single(11476, 1); single(11478, 1); single(11480, 1); single(11482, 1); single(11484, 1); single(11486, 1); single(11488, 1); single(11490, 1); single(11499, 1); single(11501, 1); single(11506, 1); single(42560, 1); single(42562, 1); single(42564, 1); single(42566, 1); single(42568, 1); single(42570, 1); single(42572, 1); single(42574, 1); single(42576, 1); single(42578, 1); single(42580, 1); single(42582, 1); single(42584, 1); single(42586, 1); single(42588, 1); single(42590, 1); single(42592, 1); single(42594, 1); single(42596, 1); single(42598, 1); single(42600, 1); single(42602, 1); single(42604, 1); single(42624, 1); single(42626, 1); single(42628, 1); single(42630, 1); single(42632, 1); single(42634, 1); single(42636, 1); single(42638, 1); single(42640, 1); single(42642, 1); single(42644, 1); single(42646, 1); single(42786, 1); single(42788, 1); single(42790, 1); single(42792, 1); single(42794, 1); single(42796, 1); single(42798, 1); single(42802, 1); single(42804, 1); single(42806, 1); single(42808, 1); single(42810, 1); single(42812, 1); single(42814, 1); single(42816, 1); single(42818, 1); single(42820, 1); single(42822, 1); single(42824, 1); single(42826, 1); single(42828, 1); single(42830, 1); single(42832, 1); single(42834, 1); single(42836, 1); single(42838, 1); single(42840, 1); single(42842, 1); single(42844, 1); single(42846, 1); single(42848, 1); single(42850, 1); single(42852, 1); single(42854, 1); single(42856, 1); single(42858, 1); single(42860, 1); single(42862, 1); single(42873, 1); single(42875, 1); single(42877, -35332); single(42878, 1); single(42880, 1); single(42882, 1); single(42884, 1); single(42886, 1); single(42891, 1); single(42893, -42280); single(42896, 1); single(42898, 1); single(42912, 1); single(42914, 1); single(42916, 1); single(42918, 1); single(42920, 1); single(42922, -42308); range(65313, 65338, 32); range(66560, 66599, 40); // END GENERATED CONTENT } static { init(); } public static UnicodeString toLower(UnicodeString input) { UnicodeBuilder ub = new UnicodeBuilder((int)input.estimatedLength()); IntIterator iter = input.codePoints(); while (iter.hasNext()) { int cp = iter.next(); if (cp < 256) { // Fast path for latin-1 characters ub.append(latin[cp]); } else { String special = specials.get(cp); if (special != null) { ub.append(special); } else { int delta = ranges.get(cp); if (delta == Integer.MIN_VALUE) { ub.append(cp); } else { ub.append(cp + delta); } } } } return ub.toUnicodeString(); } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy