All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.metafacture.xml.XmlFilenameWriter Maven / Gradle / Ivy

/*
 * Copyright 2013 Pascal Christoph (hbz)
 *
 *  Licensed under the Apache License, Version 2.0 the "License";
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing, software
 *  distributed under the License is distributed on an "AS IS" BASIS,
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *  See the License for the specific language governing permissions and
 *  limitations under the License.
 */
package org.metafacture.xml;

import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.StringReader;
import java.io.UncheckedIOException;
import java.io.Writer;
import java.nio.file.Paths;
import java.util.function.Function;

import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathExpressionException;
import javax.xml.xpath.XPathFactory;

import org.apache.commons.compress.compressors.bzip2.BZip2CompressorOutputStream;
import org.metafacture.framework.FluxCommand;
import org.metafacture.framework.MetafactureException;
import org.metafacture.framework.ObjectReceiver;
import org.metafacture.framework.StreamReceiver;
import org.metafacture.framework.annotations.Description;
import org.metafacture.framework.annotations.In;
import org.metafacture.framework.annotations.Out;
import org.metafacture.framework.helpers.DefaultStreamPipe;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.xml.sax.InputSource;

/**
 * A sink, writing an xml file. The filename is constructed from the xpath given
 * via setProperty().
 *
 * @author Pascal Christoph
 * @author Christoph Böhme
 */
@Description("Writes the xml into the filesystem. The filename is constructed from the xpath given as 'property'.\n"
		+ " Variables are\n" + "- 'target' (determining the output directory)\n"
		+ "- 'property' (the element in the XML entity. Constitutes the main part of the file's name.)\n"
		+ "- 'startIndex' ( a subfolder will be extracted out of the filename. This marks the index' beginning )\n"
		+ "- 'stopIndex' ( a subfolder will be extracted out of the filename. This marks the index' end )\n")
@In(StreamReceiver.class)
@Out(Void.class)
@FluxCommand("write-xml-files")
public final class XmlFilenameWriter
		extends DefaultStreamPipe>
		implements FilenameExtractor {

	private static final Logger LOG = LoggerFactory.getLogger(
			XmlFilenameWriter.class);

	private final XPath xPath = XPathFactory.newInstance().newXPath();
	private final FilenameUtil filenameUtil = new FilenameUtil();

	private String compression;

	/**
	 * Default constructor
	 */
	public XmlFilenameWriter() {
		setFileSuffix(".xml");
	}

	/**
	 * Sets the compression. Default is no compression.
	 *
	 * @param compression
	 *            The compression. At the moment only 'bz2' is possible.
	 */
	public void setCompression(final String compression) {
		this.compression = compression;
	}

	@Override
	public void setEncoding(final String encoding) {
		filenameUtil.encoding = encoding;
	}

	@Override
	public String getEncoding() {
		return filenameUtil.encoding;
	}

	@Override
	public void setEndIndex(final int endIndex) {
		filenameUtil.endIndex = endIndex;
	}

	@Override
	public void setFileSuffix(final String fileSuffix) {
		filenameUtil.fileSuffix = fileSuffix;

	}

	@Override
	public void setProperty(final String property) {
		filenameUtil.property = property;
	}

	@Override
	public void setStartIndex(final int startIndex) {
		filenameUtil.startIndex = startIndex;
	}

	@Override
	public void setTarget(final String target) {
		filenameUtil.target = target;
	}

	@Override
	public void literal(final String str, final String xml) {
		final String identifier = extractIdentifier(xml);
		if (identifier == null) {
			return;
		}
		final File file = buildTargetFileName(identifier);
		filenameUtil.ensurePathExists(file);
		if (compression == null) {
			writeXml(xml, file);
		} else if ("bz2".equals(compression)) {
			final File compressedFile = new File(file.getPath() + ".bz2");
			writeXml(xml, compressedFile, this::createBZip2Compressor);
		}
	}

	private String extractIdentifier(String xml) {
		final String identifier;
		try {
			identifier = xPath.evaluate(this.filenameUtil.property,
					new InputSource(new StringReader(xml)));
		} catch (XPathExpressionException e) {
			throw new MetafactureException(e);
		}
		if (identifier == null || identifier.length() < filenameUtil.endIndex) {
			LOG.info("No identifier found, skip writing");
			LOG.debug("the xml: {}", xml);
			return null;
		}
		return identifier;
	}

	private File buildTargetFileName(String identifier) {
		final String directory = identifier.substring(filenameUtil.startIndex,
				filenameUtil.endIndex);
		return Paths.get(filenameUtil.target)
				.resolve(directory)
				.resolve(identifier + filenameUtil.fileSuffix)
				.toFile();
	}

	private void writeXml(String xml, File file) {
		writeXml(xml, file, Function.identity());
	}

	private void writeXml(String xml, File file,
			Function compressorFactory) {
		try (
				OutputStream fileStream = new FileOutputStream(file);
				OutputStream compressedStream = compressorFactory.apply(fileStream);
				Writer writer = new OutputStreamWriter(compressedStream,
						filenameUtil.encoding);
		) {
			writer.write(xml);
		} catch (IOException | UncheckedIOException e) {
			throw new MetafactureException(e);
		}
	}

	private OutputStream createBZip2Compressor(OutputStream stream) {
		try {
			return new BZip2CompressorOutputStream(stream);
		} catch (IOException e) {
			throw new UncheckedIOException(e);
		}
	}

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy