All Downloads are FREE. Search and download functionalities are using the official Maven repository.

net.sf.saxon.regex.CaseVariants Maven / Gradle / Ivy

There is a newer version: 12.5
Show newest version
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Copyright (c) 2018-2023 Saxonica Limited
// This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0.
// If a copy of the MPL was not distributed with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
// This Source Code Form is "Incompatible With Secondary Licenses", as defined by the Mozilla Public License, v. 2.0.
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////

package net.sf.saxon.regex;

import net.sf.saxon.Configuration;
import net.sf.saxon.Version;
import net.sf.saxon.lib.ParseOptions;
import net.sf.saxon.lib.Validation;
import net.sf.saxon.om.AxisInfo;
import net.sf.saxon.om.NamespaceUri;
import net.sf.saxon.om.NodeInfo;
import net.sf.saxon.pattern.NameTest;
import net.sf.saxon.trans.XPathException;
import net.sf.saxon.tree.iter.AxisIterator;
import net.sf.saxon.type.Type;
import net.sf.saxon.z.IntArraySet;
import net.sf.saxon.z.IntHashMap;
import net.sf.saxon.z.IntToIntHashMap;
import net.sf.saxon.z.IntToIntMap;

import javax.xml.transform.stream.StreamSource;
import java.io.InputStream;
import java.util.ArrayList;

/**
 * This class holds data about the case-variants of Unicode characters. The data is automatically
 * generated from the Unicode database.
 */
public class CaseVariants {

    // Use one hashmap for characters with a single case variant, another for characters with multiple
    // case variants, to reduce the number of objects that need to be allocated

    private static IntToIntMap monoVariants = null;
    private static IntHashMap polyVariants = null;


    static void build() {

        monoVariants = new IntToIntHashMap(2500);
        polyVariants = new IntHashMap<>(100);

        InputStream in = Version.platform.locateResource("casevariants.xml", new ArrayList<>());
        if (in == null) {
            throw new RuntimeException("Unable to read casevariants.xml file");
        }

        Configuration config = new Configuration();
        ParseOptions options = new ParseOptions()
                .withSchemaValidationMode(Validation.SKIP)
                .withDTDValidationMode(Validation.SKIP)
                .withPleaseCloseAfterUse(true);
        NodeInfo doc;
        try {
            doc = config.buildDocumentTree(new StreamSource(in, "casevariants.xml"), options).getRootNode();
        } catch (XPathException e) {
            throw new RuntimeException("Failed to build casevariants.xml", e);
        }

        AxisIterator iter = doc.iterateAxis(AxisInfo.DESCENDANT,
                                            new NameTest(Type.ELEMENT, NamespaceUri.NULL, "c", config.getNamePool()));
        while (true) {
            NodeInfo item = iter.next();
            if (item == null) {
                break;
            }
            String code = item.getAttributeValue(NamespaceUri.NULL, "n");
            int icode = Integer.parseInt(code, 16);
            String variants = item.getAttributeValue(NamespaceUri.NULL, "v");
            String[] vhex = variants.split(",");
            int[] vint = new int[vhex.length];
            for (int i=0; i{
//    let $chars := doc('ucd.all.flat.xml')/ * / * /u:char[@suc!='#' or @slc!='#']
//    for $c in $chars
//    let $variants := ($chars[(@cp, @suc[.!='#']) = $c/(@cp, @suc[.!='#'])] |
//                          $chars[(@cp, @slc[.!='#']) = $c/(@cp, @slc[.!='#'])]) except $c
//    return
//         if (count($variants) gt 0) then
//           
//         else ()
//
//    }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy