net.sf.saxon.regex.CaseVariants Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of Saxon-HE Show documentation
Show all versions of Saxon-HE Show documentation
The XSLT and XQuery Processor
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Copyright (c) 2018-2023 Saxonica Limited
// This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0.
// If a copy of the MPL was not distributed with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
// This Source Code Form is "Incompatible With Secondary Licenses", as defined by the Mozilla Public License, v. 2.0.
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
package net.sf.saxon.regex;
import net.sf.saxon.Configuration;
import net.sf.saxon.Version;
import net.sf.saxon.lib.ParseOptions;
import net.sf.saxon.lib.Validation;
import net.sf.saxon.om.AxisInfo;
import net.sf.saxon.om.NamespaceUri;
import net.sf.saxon.om.NodeInfo;
import net.sf.saxon.pattern.NameTest;
import net.sf.saxon.trans.XPathException;
import net.sf.saxon.tree.iter.AxisIterator;
import net.sf.saxon.type.Type;
import net.sf.saxon.z.IntArraySet;
import net.sf.saxon.z.IntHashMap;
import net.sf.saxon.z.IntToIntHashMap;
import net.sf.saxon.z.IntToIntMap;
import javax.xml.transform.stream.StreamSource;
import java.io.InputStream;
import java.util.ArrayList;
/**
* This class holds data about the case-variants of Unicode characters. The data is automatically
* generated from the Unicode database.
*/
public class CaseVariants {
// Use one hashmap for characters with a single case variant, another for characters with multiple
// case variants, to reduce the number of objects that need to be allocated
private static IntToIntMap monoVariants = null;
private static IntHashMap polyVariants = null;
static void build() {
monoVariants = new IntToIntHashMap(2500);
polyVariants = new IntHashMap<>(100);
InputStream in = Version.platform.locateResource("casevariants.xml", new ArrayList<>());
if (in == null) {
throw new RuntimeException("Unable to read casevariants.xml file");
}
Configuration config = new Configuration();
ParseOptions options = new ParseOptions()
.withSchemaValidationMode(Validation.SKIP)
.withDTDValidationMode(Validation.SKIP)
.withPleaseCloseAfterUse(true);
NodeInfo doc;
try {
doc = config.buildDocumentTree(new StreamSource(in, "casevariants.xml"), options).getRootNode();
} catch (XPathException e) {
throw new RuntimeException("Failed to build casevariants.xml", e);
}
AxisIterator iter = doc.iterateAxis(AxisInfo.DESCENDANT,
new NameTest(Type.ELEMENT, NamespaceUri.NULL, "c", config.getNamePool()));
while (true) {
NodeInfo item = iter.next();
if (item == null) {
break;
}
String code = item.getAttributeValue(NamespaceUri.NULL, "n");
int icode = Integer.parseInt(code, 16);
String variants = item.getAttributeValue(NamespaceUri.NULL, "v");
String[] vhex = variants.split(",");
int[] vint = new int[vhex.length];
for (int i=0; i{
// let $chars := doc('ucd.all.flat.xml')/ * / * /u:char[@suc!='#' or @slc!='#']
// for $c in $chars
// let $variants := ($chars[(@cp, @suc[.!='#']) = $c/(@cp, @suc[.!='#'])] |
// $chars[(@cp, @slc[.!='#']) = $c/(@cp, @slc[.!='#'])]) except $c
// return
// if (count($variants) gt 0) then
//
// else ()
//
// }
}