
org.n52.youngs.transform.impl.YamlMappingConfiguration Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of main Show documentation
Show all versions of main Show documentation
A mapping platform to load CSW records into Elasticsearch
/*
* Copyright 2015-2016 52°North Initiative for Geospatial Open Source
* Software GmbH
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.n52.youngs.transform.impl;
import com.github.autermann.yaml.Yaml;
import com.github.autermann.yaml.YamlNode;
import com.github.autermann.yaml.nodes.YamlBooleanNode;
import com.github.autermann.yaml.nodes.YamlDecimalNode;
import com.github.autermann.yaml.nodes.YamlIntegralNode;
import com.github.autermann.yaml.nodes.YamlMapNode;
import com.github.autermann.yaml.nodes.YamlSeqNode;
import com.github.autermann.yaml.nodes.YamlTextNode;
import com.google.common.base.Joiner;
import com.google.common.base.MoreObjects;
import org.n52.youngs.transform.MappingEntry;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.io.Resources;
import java.io.IOException;
import java.io.InputStream;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Objects;
import java.util.Optional;
import java.util.stream.Collectors;
import javax.xml.namespace.NamespaceContext;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpression;
import javax.xml.xpath.XPathExpressionException;
import javax.xml.xpath.XPathFactory;
import org.n52.youngs.exception.MappingError;
import org.n52.youngs.impl.XPathHelper;
import org.n52.youngs.transform.MappingConfiguration;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.w3c.dom.Document;
/**
* @author Daniel Nüst
*/
public class YamlMappingConfiguration extends NamespacedYamlConfiguration implements MappingConfiguration {
private static final Logger log = LoggerFactory.getLogger(YamlMappingConfiguration.class);
List entries = Lists.newArrayList();
private String xpathVersion = DEFAULT_XPATH_VERSION;
private int version;
private String name = DEFAULT_NAME;
private XPathFactory xpathFactory;
private Optional applicabilityExpression = Optional.empty();
private String type = DEFAULT_TYPE;
private String index = DEFAULT_INDEX;
private boolean indexCreationEnabled = DEFAULT_INDEX_CREATION;
private boolean dynamicMappingEnabled;
private Optional indexCreationRequest = Optional.empty();
private final XPathHelper xpathHelper;
private String identifierField;
private Optional locationField = Optional.empty();
public YamlMappingConfiguration(String fileName, XPathHelper xpathHelper) throws IOException {
this(Resources.asByteSource(Resources.getResource(fileName)).openStream(), xpathHelper);
log.info("Created configuration from filename {}", fileName);
}
public YamlMappingConfiguration(InputStream input, XPathHelper xpathHelper) {
this.xpathHelper = xpathHelper;
this.xpathFactory = xpathHelper.newXPathFactory();
Yaml yaml = new Yaml();
YamlNode configurationNodes = yaml.load(input);
if (configurationNodes == null) {
log.error("Could not load configuration from {}, nodes: {}", input, configurationNodes);
} else {
log.trace("Read configuration file with the root elements {}", Joiner.on(" ").join(configurationNodes));
NamespaceContext nsContext = parseNamespaceContext(configurationNodes);
init(configurationNodes, nsContext);
}
log.info("Created configuration from stream {} with {} entries", input, entries.size());
}
private void init(YamlNode configurationNodes, NamespaceContext nsContext) {
// read the entries from the config file
this.name = configurationNodes.path("name").asTextValue(DEFAULT_NAME);
this.version = configurationNodes.path("version").asIntValue(DEFAULT_VERSION);
this.xpathVersion = configurationNodes.path("xpathversion").asTextValue(DEFAULT_XPATH_VERSION);
if (configurationNodes.hasNotNull("index")) {
YamlNode indexField = configurationNodes.get("index");
this.index = indexField.path("name").asTextValue(DEFAULT_INDEX);
this.indexCreationEnabled = indexField.path("create").asBooleanValue(DEFAULT_INDEX_CREATION);
this.dynamicMappingEnabled = indexField.path("dynamic_mapping").asBooleanValue(DEFAULT_DYNAMIC_MAPPING);
this.type = indexField.path("type").asTextValue(DEFAULT_TYPE);
if (indexField.hasNotNull("settings")) {
this.indexCreationRequest = Optional.of(indexField.get("settings").asTextValue());
}
}
if (!this.xpathHelper.isVersionSupported(xpathFactory, xpathVersion)) {
throw new MappingError("Provided factory {} does not support version {}", xpathFactory, xpathVersion);
}
log.debug("Using XPathFactory {}", xpathFactory);
String applicabilityXPathString = configurationNodes
.path("applicability_xpath").asTextValue(DEFAULT_APPLICABILITY_PATH);
XPath path = newXPath(nsContext);
try {
applicabilityExpression = Optional.of(path.compile(applicabilityXPathString));
} catch (XPathExpressionException e) {
log.error("Could not compile applicability xpath, will always evalute to true", e);
}
if (configurationNodes.hasNotNull("mappings")) {
YamlMapNode mappingsNode = configurationNodes.path("mappings").asMap();
this.entries = Lists.newArrayList();
for (Entry entry : mappingsNode.entries()) { // use old-style loop to forward exception
MappingEntry e = createEntry(entry.getKey().asTextValue(),
entry.getValue(), nsContext);
log.trace("Created entry: {}", e);
this.entries.add(e);
}
// ensure exactly one field is identifier
long idCount = this.entries.stream().filter(MappingEntry::isIdentifier).count();
if (idCount > 1) {
List entriesWithId = this.entries.stream().filter(MappingEntry::isIdentifier)
.map(MappingEntry::getFieldName).collect(Collectors.toList());
log.error("Found more than one entries marked as 'identifier': {}", Arrays.toString(entriesWithId.toArray()));
throw new MappingError("More than one field are marked as 'identifier'. Found {}: {}", idCount,
Arrays.toString(entriesWithId.toArray()));
}
Optional identifier = this.entries.stream().filter(MappingEntry::isIdentifier).findFirst();
if (identifier.isPresent()) {
this.identifierField = identifier.get().getFieldName();
log.trace("Found identifier field '{}'", this.identifierField);
} else {
throw new MappingError("No field is marked as 'identifier', exactly one must be.");
}
// ensure not more than one field is location
long locationCount = this.entries.stream().filter(MappingEntry::isLocation).count();
if (locationCount > 1) {
List entriesWithLocation = this.entries.stream().filter(MappingEntry::isIdentifier)
.map(MappingEntry::getFieldName).collect(Collectors.toList());
log.error("Found more than one entries marked as 'location': {}", Arrays.toString(entriesWithLocation.toArray()));
throw new MappingError("More than one field are marked as 'location'. Found {}: {}", idCount,
Arrays.toString(entriesWithLocation.toArray()));
}
Optional location = this.entries.stream().filter(MappingEntry::isLocation).findFirst();
if (location.isPresent()) {
this.locationField = Optional.of(location.get().getFieldName());
log.trace("Found location field '{}'", this.locationField.get());
} else {
log.warn("No field is marked as 'location'.");
}
// sort list by field name
Collections.sort(entries, (me1, me2) -> {
return me1.getFieldName().compareTo(me2.getFieldName());
});
}
}
private MappingEntry createEntry(String id, YamlNode node, NamespaceContext nsContext) throws MappingError {
log.trace("Parsing mapping '{}'", id);
if (node instanceof YamlMapNode) {
YamlMapNode mapNode = (YamlMapNode) node;
Map indexProperties = createIndexProperties(id, node);
boolean isIdentifier = mapNode.path("identifier").asBooleanValue(false);
boolean isLocation = mapNode.path("location").asBooleanValue(false);
boolean isXml = mapNode.path("raw_xml").asBooleanValue(false);
String expression = mapNode.path("xpath").asTextValue();
XPath xPath = newXPath(nsContext);
try {
XPathExpression compiledExpression = xPath.compile(expression);
MappingEntryImpl entry = new MappingEntryImpl(id,
compiledExpression,
indexProperties,
isIdentifier,
isLocation,
isXml);
log.trace("Starting new entry: {}", entry);
// geo types
if (mapNode.hasNotNull("coordinates")) {
String coordsType = mapNode.path("coordinates_type").asTextValue();
boolean points = mapNode.path("coordinates").has("points");
if (coordsType == null || !points) {
log.error("Missing properties for field {} for coordinates type: coordinates_type = {}, coordinates.points contained = {}",
entry.getFieldName(), coordsType, points);
throw new MappingError("Missing properties in field %s for coordinates type: coordinates_type = %s, coordinatesEyxpression = %s, coordinates contained = {}",
entry.getFieldName(), coordsType, points);
}
YamlSeqNode pointsMap = (YamlSeqNode) mapNode.path("coordinates").path("points");
// log.trace("Adding type '{}' coordinates xpath: {}", coordsType, coordsExpression);
List pointExpressions = pointsMap.value().stream().filter(n -> n instanceof YamlMapNode)
.map(n -> (YamlMapNode) n)
.map(mn -> {
String expressionStringLat = mn.path("lat").asTextValue();
String expressionStringLon = mn.path("lon").asTextValue();
try {
XPathExpression compiledLat = newXPath(nsContext).compile(expressionStringLat);
XPathExpression compiledLon = newXPath(nsContext).compile(expressionStringLon);
return new XPathExpression[]{compiledLat, compiledLon};
} catch (XPathExpressionException e) {
log.warn("Error creating xpath '{}' or '{}' for point in field {}: {}",
expressionStringLat, expressionStringLon, id, e.getMessage());
return null;
}
})
.filter(Objects::nonNull)
.collect(Collectors.toList());
log.trace("Created {} points for {}", pointExpressions.size(), id);
entry.setCoordinatesXPaths(pointExpressions).setCoordinatesType(coordsType);
}
if (mapNode.hasNotNull("replacements")) {
YamlSeqNode rMap = (YamlSeqNode) mapNode.path("replacements");
Map replacements = Maps.newHashMap();
rMap.value().stream().filter(n -> n instanceof YamlMapNode)
.map(n -> (YamlMapNode) n)
.map(mn -> {
String replace = mn.path("replace").asTextValue();
String with = mn.path("with").asTextValue();
return new String[]{replace, with};
})
.forEach(e -> replacements.put(e[0], e[1]));
log.trace("Parsed replacements: {}", Arrays.toString(replacements.entrySet().toArray()));
entry.setReplacements(replacements);
}
if (mapNode.hasNotNull("split")) {
YamlNode sNode = mapNode.path("split");
String split = sNode.asTextValue();
log.trace("Parsed split: {}", split);
entry.setSplit(split);
}
// for raw types
if (mapNode.hasNotNull("output_properties")) {
YamlSeqNode rMap = (YamlSeqNode) mapNode.path("output_properties");
Map op = Maps.newHashMap();
rMap.value().stream().filter(n -> n instanceof YamlMapNode)
.map(n -> (YamlMapNode) n)
.map(mn -> {
String replace = mn.path("name").asTextValue();
String with = mn.path("value").asTextValue();
return new String[]{replace, with};
})
.forEach(e -> op.put(e[0], e[1]));
log.trace("Parsed outputProperties: {}", Arrays.toString(op.entrySet().toArray()));
entry.setOutputProperties(op);
}
return entry;
} catch (XPathExpressionException e) {
log.error("Could not create XPath for provided expression '{}' in field {}", expression, id, e);
throw new MappingError(e, "Could not create XPath for provided expression '%s' in field %s",
expression, id);
}
}
throw new MappingError("The provided node class %s is not supported in the mapping '%s': %s",
node.getClass().toString(), id, node.toString());
}
private XPath newXPath(NamespaceContext nsContext) {
XPath xPath = xpathFactory.newXPath();
xPath.setNamespaceContext(nsContext);
return xPath;
}
private Map createIndexProperties(String id, YamlNode node) {
Map props = Maps.newHashMap();
if (node.hasNotNull("properties")) {
final YamlMapNode valueMap = node.path("properties").asMap();
Map indexProperties = valueMap.entries().stream().collect(Collectors.toMap(Entry::getKey, Entry::getValue));
indexProperties.forEach((YamlNode k, YamlNode v) -> {
log.trace("Adding property {} = {}, type: {}", k, v, v.getClass());
String key = k.asTextValue();
Optional
© 2015 - 2025 Weber Informatics LLC | Privacy Policy