org.modeshape.jcr.xml.NodeImportXmlHandler Maven / Gradle / Ivy
/*
* ModeShape (http://www.modeshape.org)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.modeshape.jcr.xml;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Stack;
import java.util.concurrent.atomic.AtomicInteger;
import javax.jcr.RepositoryException;
import org.modeshape.common.annotation.NotThreadSafe;
import org.modeshape.common.collection.LinkedHashMultimap;
import org.modeshape.common.collection.Multimap;
import org.modeshape.common.text.NoOpEncoder;
import org.modeshape.common.text.TextDecoder;
import org.modeshape.common.text.XmlNameEncoder;
import org.modeshape.common.util.StringUtil;
import org.modeshape.jcr.ExecutionContext;
import org.modeshape.jcr.JcrI18n;
import org.modeshape.jcr.JcrLexicon;
import org.modeshape.jcr.api.JcrConstants;
import org.modeshape.jcr.value.Name;
import org.modeshape.jcr.value.NameFactory;
import org.modeshape.jcr.value.NamespaceRegistry;
import org.modeshape.jcr.value.Path;
import org.modeshape.jcr.value.PathFactory;
import org.modeshape.jcr.value.basic.LocalNamespaceRegistry;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.ext.DefaultHandler2;
/**
* A simplified version of the graph xml import handler (from ModeShape 2.x) which is used for importing initial content into
* workspaces.
*
* @author Randall Hauch
* @author Horia Chiorean ([email protected])
*/
@NotThreadSafe
public class NodeImportXmlHandler extends DefaultHandler2 {
/**
* The choices for how attributes that have no namespace prefix should be assigned a namespace.
*
* @author Randall Hauch
*/
public enum AttributeScoping {
/**
* The attribute's namespace is the default namespace
*/
USE_DEFAULT_NAMESPACE,
/**
* The attribute's namespace is the same namespace as the containing element
*/
INHERIT_ELEMENT_NAMESPACE
}
/**
* The default multi-valued delimiter string.
*/
protected static final String DEFAULT_MULTI_VALUE_SEPARATOR = ",";
/**
* Decoder for XML names, to turn '_xHHHH_' sequences in the XML element and attribute names into the corresponding UTF-16
* characters.
*/
private static final TextDecoder XML_DECODER = new XmlNameEncoder();
/**
* The default {@link AttributeScoping}.
*/
private static final AttributeScoping DEFAULT_ATTRIBUTE_SCOPING = AttributeScoping.USE_DEFAULT_NAMESPACE;
/**
* The mandatory name of the xml root element
*/
private static final String ROOT_ELEMENT_NAME = JcrConstants.JCR_ROOT;
/**
* The name of the XML attribute which defines a custom multi value separator.
*/
private static final String MULTI_VALUE_SEPARATOR_ATTRIBUTE = "multi-value-separator";
/**
* The name of the XML attribute whose value should be used for the name of the node. For example, "jcr:name".
*/
private final String nameAttribute;
/**
* The name of the property that is to be set with the type of the XML element. For example, "jcr:primaryType".
*/
private final String typeAttribute;
/**
* The name of the XML attribute which represents the type of a property. By default, this is "type".
*/
private final String propertyTypeAttribute;
/**
* The value of the node type property, if the node's name is set with the {@link #nameAttribute}.
*/
private final String typeAttributeValue;
/**
* The reference to the {@link org.modeshape.jcr.value.NameFactory}
*/
private final NameFactory nameFactory;
/**
* The reference to the {@link org.modeshape.jcr.value.PathFactory}
*/
protected final PathFactory pathFactory;
/**
* The cached reference to the graph's namespace registry.
*/
private final NamespaceRegistry namespaceRegistry;
/**
* The attribute scoping indicating how the namespaces are resolved for elements
*/
private final AttributeScoping attributeScoping;
/**
* The stack of prefixes for each namespace, which is used to keep the {@link #namespaceRegistry local namespace registry} in
* sync with the namespaces in the XML document.
*/
private final Map> prefixStackByUri = new HashMap<>();
/**
* The import destination.
*/
private final NodeImportDestination destination;
/**
* Character buffer to aggregate nested character data
*/
private final StringBuilder characterDataBuffer = new StringBuilder();
/**
* The separator string used for determining multi-valued properties
*/
protected String multiValueSeparator;
private final Stack elementsStack = new Stack<>();
private final List parsedElements = new ArrayList<>();
private LinkedHashMap parsedElementsByPath;
private boolean validateRootElement;
/**
* Creates a new handler instance, using only an execution context and some default values.
*
* @param destination a non-null {@link NodeImportDestination}
*/
public NodeImportXmlHandler( NodeImportDestination destination ) {
this(destination, JcrConstants.JCR_NAME, JcrConstants.JCR_PRIMARY_TYPE, JcrConstants.NT_UNSTRUCTURED,
DEFAULT_MULTI_VALUE_SEPARATOR, DEFAULT_ATTRIBUTE_SCOPING);
}
/**
* Create a handler that parses an xml file.
*
* @param destination a non-null {@link org.modeshape.jcr.xml.NodeImportDestination} which is expected to provide a valid
* context and to handle the results of the import process.
* @param nameAttribute the name of the property whose value should be used for the names of the nodes (typically, this is
* "jcr:name" or something equivalent); or null if the XML element name should always be used as the node name
* @param typeAttribute the name of the property that should be set with the type of the XML element, or null if there is no
* such property
* @param typeAttributeValue the value of the type property that should be used if the node has no nameAttribute
,
* or null if the value should be set to the type of the XML element
* @param multiValueSeparator the string that should be used a separator for creating multi-valued properties.
* @param scoping defines how to choose the namespace of attributes that do not have a namespace prefix; if null, the
* {@link #DEFAULT_ATTRIBUTE_SCOPING} value is used @throws IllegalArgumentException if the destination reference is
* null
*/
public NodeImportXmlHandler( NodeImportDestination destination,
String nameAttribute,
String typeAttribute,
String typeAttributeValue,
String multiValueSeparator,
AttributeScoping scoping ) {
this.nameAttribute = nameAttribute;
this.typeAttribute = typeAttribute;
this.typeAttributeValue = typeAttributeValue;
this.attributeScoping = scoping != null ? scoping : DEFAULT_ATTRIBUTE_SCOPING;
this.destination = destination;
this.multiValueSeparator = !StringUtil.isBlank(multiValueSeparator) ? multiValueSeparator : DEFAULT_MULTI_VALUE_SEPARATOR;
// Set up a local namespace registry that is kept in sync with the namespaces found in this XML document ...
ExecutionContext context = destination.getExecutionContext();
NamespaceRegistry namespaceRegistry = new LocalNamespaceRegistry(context.getNamespaceRegistry());
final ExecutionContext localContext = context.with(namespaceRegistry);
// Set up references to frequently-used objects in the context ...
this.nameFactory = localContext.getValueFactories().getNameFactory();
this.pathFactory = localContext.getValueFactories().getPathFactory();
this.namespaceRegistry = localContext.getNamespaceRegistry();
this.propertyTypeAttribute = createName(null, "type");
assert this.nameFactory != null;
assert this.namespaceRegistry != null;
}
/**
* {@inheritDoc}
*
* This method ensures that the namespace is registered with the {@link NamespaceRegistry registry}, using the supplied prefix
* to register the namespace if required. Note that because this class does not really use the namespace prefixes to create
* {@link Name} objects, no attempt is made to match the XML namespace prefixes.
*
*
* @see org.xml.sax.helpers.DefaultHandler#startPrefixMapping(java.lang.String, java.lang.String)
*/
@Override
public void startPrefixMapping( String prefix,
String uri ) {
assert uri != null;
// Add the prefix to the stack ...
LinkedList prefixStack = this.prefixStackByUri.get(uri);
if (prefixStack == null) {
prefixStack = new LinkedList<>();
this.prefixStackByUri.put(uri, prefixStack);
}
prefixStack.addFirst(prefix);
// If the namespace is already registered, then we'll have to register it in the context's registry, too.
if (!namespaceRegistry.isRegisteredNamespaceUri(uri)) {
ExecutionContext destinationContext = destination.getExecutionContext();
// The namespace is not already registered (locally or in the context's registry), so we have to
// register it with the context's registry (which the local register then inherits).
NamespaceRegistry contextRegistry = destinationContext.getNamespaceRegistry();
if (contextRegistry.getNamespaceForPrefix(prefix) != null) {
// The prefix is already bound, so register and generate a unique prefix
destinationContext.getNamespaceRegistry().getPrefixForNamespaceUri(uri, true);
// Now register locally with the supplied prefix ...
namespaceRegistry.register(prefix, uri);
} else {
destinationContext.getNamespaceRegistry().register(prefix, uri);
}
} else {
// It is already registered, but re-register it locally using the supplied prefix ...
namespaceRegistry.register(prefix, uri);
}
}
@Override
public void endPrefixMapping( String prefix ) {
assert prefix != null;
// Get the current URI for this prefix ...
String uri = namespaceRegistry.getNamespaceForPrefix(prefix);
assert uri != null;
// Get the previous prefix from the stack ...
LinkedList prefixStack = this.prefixStackByUri.get(uri);
assert prefixStack != null;
assert !prefixStack.isEmpty();
String existingPrefix = prefixStack.removeFirst();
assert prefix.equals(existingPrefix);
// If there are no previous prefixes, then remove the mapping ...
if (prefixStack.isEmpty()) {
namespaceRegistry.unregister(uri);
prefixStackByUri.remove(uri);
} else {
String previous = prefixStack.getFirst();
namespaceRegistry.register(previous, uri);
}
}
@Override
public void startElement( String uri,
String localName,
String name,
Attributes attributes ) throws SAXException {
// the root element should only be validated and not taken into account
if (validateRootElement) {
if (!name.equalsIgnoreCase(ROOT_ELEMENT_NAME)) {
throw new SAXException(JcrI18n.errorDuringInitialImport.text("Root xml element must be " + ROOT_ELEMENT_NAME));
}
parseCustomSettings(attributes);
validateRootElement = false;
return;
}
assert localName != null;
String nodeName = null;
ImportElement parent = elementsStack.isEmpty() ? null : elementsStack.peek();
ImportElement element = new ImportElement(parent);
elementsStack.push(element);
String typePropertyValue = null;
// Convert each of the attributes to a property ...
for (int i = 0, len = attributes.getLength(); i != len; ++i) {
String attributeLocalName = attributes.getLocalName(i);
String attributeUri = attributes.getURI(i);
String attributeName = null;
if ((attributeUri == null || attributeUri.length() == 0) && attributes.getQName(i).indexOf(':') == -1) {
switch (this.attributeScoping) {
case INHERIT_ELEMENT_NAMESPACE:
attributeName = createName(uri, attributeLocalName);
break;
case USE_DEFAULT_NAMESPACE:
attributeName = createName(null, attributeLocalName);
break;
}
} else {
attributeName = createName(attributeUri, attributeLocalName);
}
assert attributeName != null;
// Check to see if this is an attribute that represents the node name (which may be null) ...
if (nodeName == null && attributeName.equalsIgnoreCase(nameAttribute)) {
nodeName = createName(null, attributes.getValue(i));
element.setName(nodeName);
continue;
}
if (typePropertyValue == null && attributeName.equalsIgnoreCase(typeAttribute)) {
typePropertyValue = createName(null, attributes.getValue(i)); // don't use a decoder
element.setType(typePropertyValue);
continue;
}
// Create a property for this attribute ...
element.addProperty(attributeName, attributes.getValue(i), org.modeshape.jcr.value.PropertyType.STRING);
}
// Create the default node name if no explicit name has been configured
if (nodeName == null) {
// No attribute defines the node name ...
nodeName = createName(uri, localName);
element.setName(nodeName);
}
// Create the default node type if no explicit type has been configured
if (typePropertyValue == null) {
if (typeAttributeValue != null) {
// there is a preconfigured type to use, so use that
element.setType(typeAttributeValue);
} else {
// there is no default value for the type, so use nt:unstructured
element.setType(JcrConstants.NT_UNSTRUCTURED);
}
}
}
private void parseCustomSettings( Attributes attributes ) {
// parse any custom attributes of the root element, which would indicate some custom settings
for (int i = 0; i < attributes.getLength(); i++) {
String attributeLocalName = attributes.getLocalName(i);
if (attributeLocalName.equalsIgnoreCase(MULTI_VALUE_SEPARATOR_ATTRIBUTE)) {
this.multiValueSeparator = attributes.getValue(i);
}
}
}
protected String createName( String uri,
String localName ) {
return !StringUtil.isBlank(uri) ? nameFactory.create(uri, localName, XML_DECODER).getString(NoOpEncoder.getInstance()) : nameFactory.create(localName,
XML_DECODER)
.getString(NoOpEncoder.getInstance());
}
@Override
public void endElement( String uri,
String localName,
String name ) {
if (name.equalsIgnoreCase(ROOT_ELEMENT_NAME)) {
return;
}
ImportElement entry = elementsStack.pop();
String s = characterDataBuffer.toString().trim();
if (s.length() > 0) {
// there is char data
if (entry.looksLikeProperty()) {
// This is just a child element that is really a property ...
entry.setPropertyOnParent(s);
} else {
// This is actually a child node that fits the JCR 'jcr:xmlcharacters' pattern ...
entry.addProperty(JcrLexicon.XMLCHARACTERS.toString(), s, org.modeshape.jcr.value.PropertyType.STRING);
parsedElements.add(entry);
}
} else {
parsedElements.add(entry);
}
characterDataBuffer.delete(0, characterDataBuffer.length());
}
@Override
public void characters( char[] ch,
int start,
int length ) {
if (validateRootElement) {
return;
}
// Have to add this to a buffer as one logical set of character data can cause this method to fire multiple times
characterDataBuffer.append(ch, start, length);
}
@SuppressWarnings( "unused" )
@Override
public void startDocument() throws SAXException {
this.validateRootElement = true;
}
@Override
public void endDocument() throws SAXException {
this.validateRootElement = false;
try {
this.destination.submit(getParsedElementByPath());
} catch (RepositoryException e) {
throw new SAXException(e);
}
this.parsedElements.clear();
}
private LinkedHashMap getParsedElementByPath() {
if (parsedElementsByPath == null) {
parsedElementsByPath = new LinkedHashMap<>();
int depthLevel = 1;
// convert the list of parsed elements to a map making sure that the parents come before the children and the children
// order is preserved
while (!parsedElements.isEmpty()) {
for (Iterator elementIterator = parsedElements.iterator(); elementIterator.hasNext();) {
ImportElement element = elementIterator.next();
if (element.getPath().size() == depthLevel) {
parsedElementsByPath.put(element.getPath(), element);
elementIterator.remove();
}
}
++depthLevel;
}
}
return parsedElementsByPath;
}
/**
* Element entries represent in-memory representations of the xml elements (either nodes or properties) encountered between a
* {@link NodeImportXmlHandler#startElement(String, String, String, Attributes)} and a
* {@link NodeImportXmlHandler#endElement(String, String, String)} event.
*/
@SuppressWarnings( "synthetic-access" )
public class ImportElement {
private final Multimap properties = LinkedHashMultimap.create();
private final Map propertyTypes = new HashMap<>();
private final List mixins = new ArrayList<>();
private final Map childSnsIndexes = new HashMap<>();
private String name;
private String type;
private Path path;
private ImportElement parent;
private String propertyType;
protected ImportElement( ImportElement parent ) {
this.parent = parent;
}
/**
* Returns whether this element entry looks (at this point) like a property element: either it has no properties or has
* just one {@link org.modeshape.jcr.xml.NodeImportXmlHandler#propertyTypeAttribute} type attribute.
*
* @return true if this looks like a property element, or false otherwise
*/
protected final boolean looksLikeProperty() {
return properties.size() == 0 || (properties.size() == 1 && properties.containsKey(propertyTypeAttribute));
}
private int getNextSnsForChildNamed( String childName ) {
AtomicInteger snsIndex = childSnsIndexes.get(childName);
if (snsIndex == null) {
childSnsIndexes.put(childName, new AtomicInteger(0));
}
return childSnsIndexes.get(childName).incrementAndGet();
}
protected void setName( String name ) {
this.name = name;
int snsIndex = 1;
if (parent != null) {
snsIndex = parent.getNextSnsForChildNamed(name);
this.path = pathFactory.create(parent.getPath(), name, snsIndex);
} else {
this.path = pathFactory.create("/" + name);
}
}
protected void setType( String type ) {
this.type = type;
}
protected void addProperty( String propertyName,
String propertyValue,
org.modeshape.jcr.value.PropertyType jcrPropertyType ) {
if (propertyName.equalsIgnoreCase(propertyTypeAttribute)) {
propertyType = propertyValue;
}
String[] values = propertyValue.split(multiValueSeparator);
for (String value : values) {
if (jcrPropertyType != org.modeshape.jcr.value.PropertyType.STRING) {
// for everything but strings we trim the value
value = value.trim();
}
if (propertyName.equals(JcrConstants.JCR_MIXIN_TYPES)) {
mixins.add(createName(null, value.trim()));
} else {
properties.put(propertyName, value);
}
}
propertyTypes.put(propertyName, jcrPropertyType);
}
protected void setPropertyOnParent( String value ) {
parent.addPropertyWithType(name, value, propertyType);
}
private void addPropertyWithType( String propertyName,
String propertyValue,
String propertyType ) {
if (propertyType == null) {
// there is no specified type, so add it as a simple string
propertyType = "string";
}
switch (propertyType.toLowerCase()) {
case "simplereference": {
addProperty(propertyName, propertyValue, org.modeshape.jcr.value.PropertyType.SIMPLEREFERENCE);
break;
}
case "weakreference": {
addProperty(propertyName, propertyValue, org.modeshape.jcr.value.PropertyType.WEAKREFERENCE);
break;
}
case "reference": {
addProperty(propertyName, propertyValue, org.modeshape.jcr.value.PropertyType.REFERENCE);
break;
}
case "long": {
addProperty(propertyName, propertyValue, org.modeshape.jcr.value.PropertyType.LONG);
break;
}
case "double": {
addProperty(propertyName, propertyValue, org.modeshape.jcr.value.PropertyType.DOUBLE);
break;
}
case "date": {
addProperty(propertyName, propertyValue, org.modeshape.jcr.value.PropertyType.DATE);
break;
}
case "decimal": {
addProperty(propertyName, propertyValue, org.modeshape.jcr.value.PropertyType.DECIMAL);
break;
}
case "boolean": {
addProperty(propertyName, propertyValue, org.modeshape.jcr.value.PropertyType.BOOLEAN);
break;
}
case "name": {
addProperty(propertyName, propertyValue, org.modeshape.jcr.value.PropertyType.NAME);
break;
}
case "path": {
addProperty(propertyName, propertyValue, org.modeshape.jcr.value.PropertyType.PATH);
break;
}
case "uri": {
addProperty(propertyName, propertyValue, org.modeshape.jcr.value.PropertyType.URI);
break;
}
case "binary": {
addProperty(propertyName, propertyValue, org.modeshape.jcr.value.PropertyType.BINARY);
break;
}
default: {
// by default we treat it a string
addProperty(propertyName, propertyValue, org.modeshape.jcr.value.PropertyType.STRING);
}
}
}
/**
* Returns the name of the import element, which should translate to the name of a jcr node.
*
* @return a non-null {@link String}
*/
public String getName() {
return name;
}
/**
* Returns the name of the import element, which should translate to the type of a jcr node.
*
* @return a non-null {@link String}
*/
public String getType() {
return type;
}
/**
* Returns the list of mixins of the import element, which should translate to the mixins of a jcr node.
*
* @return a non-null {@link List}
*/
public List getMixins() {
return mixins;
}
/**
* Returns the imported element's properties.
*
* @return a non-null {@link Multimap}
*/
public Multimap getProperties() {
return properties;
}
/**
* Returns the JCR property type of the property with the given name.
*
* @param propertyName a {@link String} the name of a property; never null
* @return either a {@link org.modeshape.jcr.value.PropertyType} representing the JCR type of the property or {@code null}
* if the no property with the given name was parsed.
*/
public org.modeshape.jcr.value.PropertyType getPropertyType( String propertyName ) {
return propertyTypes.get(propertyName);
}
/**
* Returns the path of this import element, which translates to the path of the jcr node.
*
* @return a non-null {@link Path}
*/
public Path getPath() {
return path;
}
@Override
public String toString() {
final StringBuilder sb = new StringBuilder();
sb.append("ImportElement");
sb.append("{name='").append(name).append('\'');
sb.append(", path=").append(path);
sb.append(", type='").append(type).append('\'');
if (!properties.isEmpty()) {
sb.append(", properties=").append(properties);
sb.append(", types=").append(propertyTypes);
}
if (!mixins.isEmpty()) {
sb.append(", mixins=").append(mixins);
}
sb.append('}');
return sb.toString();
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy