net.sf.saxon.serialize.CharacterMap Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of Saxon-HE Show documentation
Show all versions of Saxon-HE Show documentation
The XSLT and XQuery Processor
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Copyright (c) 2018-2022 Saxonica Limited
// This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0.
// If a copy of the MPL was not distributed with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
// This Source Code Form is "Incompatible With Secondary Licenses", as defined by the Mozilla Public License, v. 2.0.
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
package net.sf.saxon.serialize;
import net.sf.saxon.om.StructuredQName;
import net.sf.saxon.str.UnicodeBuilder;
import net.sf.saxon.str.UnicodeString;
import net.sf.saxon.str.WhitespaceString;
import net.sf.saxon.trace.ExpressionPresenter;
import net.sf.saxon.value.Whitespace;
import net.sf.saxon.z.IntHashMap;
import net.sf.saxon.z.IntIterator;
/**
* This class defines a character map, that is, a mapping from characters to strings used by the serializer
* when mapping individual characters in the output.
*/
public class CharacterMap {
private StructuredQName name;
private final IntHashMap charMap;
private int min = Integer.MAX_VALUE; // the lowest mapped character
private int max = 0; // the highest mapped character
private boolean mapsWhitespace = false;
/**
* Create a CharacterMap from a raw map of integers to strings
*
* @param name the name of the CharacterMap
* @param map the mapping of integer Unicode character codes to strings. This must not be subsequently changed.
*/
public CharacterMap(StructuredQName name, IntHashMap map) {
this.name = name;
this.charMap = map;
init();
}
/**
* Create a CharacterMap that combines a set of existing character maps.
*
* @param list the list of existing character maps. If the same character
* is mapped by more than one map in the list, the last mapping takes
* precedence
*/
public CharacterMap(Iterable list) {
charMap = new IntHashMap<>(64);
for (CharacterMap map : list) {
IntIterator keys = map.charMap.keyIterator();
while (keys.hasNext()) {
int next = keys.next();
charMap.put(next, map.charMap.get(next));
}
}
init();
}
private void init() {
IntIterator keys = charMap.keyIterator();
while (keys.hasNext()) {
int next = keys.next();
if (next < min) {
min = next;
}
if (next > max) {
max = next;
}
if (!mapsWhitespace && Whitespace.isWhite(next)) {
mapsWhitespace = true;
}
}
if (min > 0xD800) {
// if all the mapped characters are above the BMP, we need to check
// surrogates
min = 0xD800;
}
}
/**
* Get the name of the character map
* @return the name given in the stylesheet if this is an un-expanded character map, or null if
* this is an expanded map (a map combined with the character maps that it references)
*/
public StructuredQName getName() {
return name;
}
/**
* Get the contents of the character map
* @return the character map, as a map from integer codepoints to strings
*/
public IntHashMap getMap() {
return charMap;
}
/**
* Expand all the characters in a string using this character mapping
*
* @param in the input string to be mapped
* @param insertNulls true if null (0) characters are to be inserted before
* and after replacement characters. This is done to signal
* that output escaping of these characters is disabled. The flag is set to true when writing
* XML or HTML, but to false when writing TEXT.
* @return a string with all characters expanded
*/
/*@NotNull*/
public UnicodeString map(UnicodeString in, boolean insertNulls) {
if (!mapsWhitespace && in instanceof WhitespaceString) {
return in;
}
// First scan the string to see if there are any possible mapped
// characters; if not, don't bother creating the new buffer
boolean move = false;
IntIterator iter = in.codePoints();
while (iter.hasNext()) {
int c = iter.next();
if (c >= min && c <= max) {
move = true;
break;
}
}
if (!move) {
return in;
}
UnicodeBuilder buffer = new UnicodeBuilder();
iter = in.codePoints();
while (iter.hasNext()) {
int c = iter.next();
if (c >= min && c <= max) {
String rep = charMap.get(c);
if (rep == null) {
buffer.append(c);
} else {
if (insertNulls) {
buffer.append((char) 0);
buffer.append(rep);
buffer.append((char) 0);
} else {
buffer.append(rep);
}
}
} else {
buffer.append(c);
}
}
return buffer.toUnicodeString();
}
/**
* Write the character map to an export file
* @param out the output destination
*/
public void export(ExpressionPresenter out) {
out.startElement("charMap");
out.emitAttribute("name", name);
for (IntIterator iter = charMap.keyIterator(); iter.hasNext();) {
int c = iter.next();
String s = charMap.get(c);
out.startElement("m");
out.emitAttribute("c", c+"");
out.emitAttribute("s", s);
out.endElement();
}
out.endElement();
}
}