All Downloads are FREE. Search and download functionalities are using the official Maven repository.

stream.io.JSONBlockWriter Maven / Gradle / Ivy

The newest version!
/*
 *  streams library
 *
 *  Copyright (C) 2011-2014 by Christian Bockermann, Hendrik Blom
 * 
 *  streams is a library, API and runtime environment for processing high
 *  volume data streams. It is composed of three submodules "stream-api",
 *  "stream-core" and "stream-runtime".
 *
 *  The streams library (and its submodules) is free software: you can 
 *  redistribute it and/or modify it under the terms of the 
 *  GNU Affero General Public License as published by the Free Software 
 *  Foundation, either version 3 of the License, or (at your option) any 
 *  later version.
 *
 *  The stream.ai library (and its submodules) is distributed in the hope
 *  that it will be useful, but WITHOUT ANY WARRANTY; without even the implied 
 *  warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU Affero General Public License for more details.
 *
 *  You should have received a copy of the GNU Affero General Public License
 *  along with this program.  If not, see http://www.gnu.org/licenses/.
 */
package stream.io;

import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.Serializable;
import java.text.SimpleDateFormat;
import java.util.Date;

import net.minidev.json.JSONObject;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import stream.AbstractProcessor;
import stream.Data;
import stream.annotations.Parameter;
import stream.data.DataFactory;
import stream.util.ByteSize;

import javax.xml.bind.DatatypeConverter;

/**
 * @author chris
 * 
 */
public class JSONBlockWriter extends AbstractProcessor {

	static Logger log = LoggerFactory.getLogger(JSONBlockWriter.class);
	ByteSize blockSize = new ByteSize(64 * ByteSize.MB);
	File directory;

	File currentBlock = null;
	FileOutputStream out;

	long bytesWritten = 0L;
	int blocksCreated = 0;
	String pattern = "yyyy/MM/dd/HH00";
	SimpleDateFormat fmt = new SimpleDateFormat(pattern);

	String timeKey = "timestamp";
	String blockFormat = "block-${blockId}.json";

	/**
	 * @see stream.Processor#process(stream.Data)
	 */
	@Override
	public Data process(Data input) {

		Long time = System.currentTimeMillis();
		try {
			time = new Long(input.get(timeKey).toString());
		} catch (Exception e) {
			time = System.currentTimeMillis();
		}

		try {
			if (directory != null) {

				Data dat = DataFactory.create(input);
				for (String key : dat.keySet()) {
					Serializable val = dat.get(key);
					if (val.getClass().isArray()
							&& val.getClass().getComponentType() == byte.class) {
						try {
							String enc = DatatypeConverter.printBase64Binary((byte[]) val);
							dat.put(key, enc);
						} catch (Exception e) {
							e.printStackTrace();
						}
					}
				}

				byte[] json = (JSONObject.toJSONString(input) + "\n")
						.getBytes();

				File parent = new File(directory.getAbsolutePath()
						+ File.separator + fmt.format(new Date(time))
						+ File.separator);

				if (currentBlock != null
						&& !currentBlock.getCanonicalPath().startsWith(
								parent.getCanonicalPath())) {
					log.info("Change of target path detected!");

					out.flush();
					out.close();
					out = null;
					blocksCreated = 0;
				}

				if (out != null
						&& bytesWritten + json.length >= blockSize.getBytes()) {
					out.flush();
					out.close();
					out = null;
					log.info("Block {} full!", currentBlock);
				}

				if (out == null) {
					currentBlock = new File(parent.getAbsolutePath()
							+ File.separator
							+ blockFormat.replace("${blockId}", blocksCreated
									+ ""));
					log.info("Opening new block at {}", currentBlock);

					if (parent != null && !parent.isDirectory()) {
						parent.mkdirs();
						if (!parent.isDirectory()) {
							log.error("Failed to create directory {}", parent);
						}
					}

					out = new FileOutputStream(currentBlock);
					bytesWritten = 0L;
					blocksCreated++;
				}

				out.write(json);
				bytesWritten += json.length;
			}
		} catch (IOException e) {
			e.printStackTrace();
		}
		return input;
	}

	/**
	 * @return the blockSize
	 */
	public ByteSize getBlockSize() {
		return blockSize;
	}

	/**
	 * @param blockSize
	 *            the blockSize to set
	 */
	@Parameter(description = "The size of the blocks that are to be written. Default is 64M.")
	public void setBlockSize(ByteSize blockSize) {
		this.blockSize = blockSize;
	}

	/**
	 * @return the directory
	 */
	public File getDirectory() {
		return directory;
	}

	/**
	 * @param directory
	 *            the directory to set
	 */
	@Parameter(description = "The base directory in which the file are to be stored.", required = true)
	public void setDirectory(File directory) {
		this.directory = directory;
	}

	/**
	 * @return the pattern
	 */
	public String getPattern() {
		return pattern;
	}

	/**
	 * @param pattern
	 *            the pattern to set
	 */
	@Parameter(description = "A data-time pattern that is used to create subdirectories where to store blocks (within the base directory). The default pattern is `yyyy/MM/dd/HH00`.")
	public void setPattern(String pattern) {
		this.pattern = pattern;
	}

	/**
	 * @return the timeKey
	 */
	public String getTimeKey() {
		return timeKey;
	}

	/**
	 * @param timeKey
	 *            the timeKey to set
	 */
	@Parameter(description = "The key that is used for determining the current time. If not specified, the default key `timestamp` is used. If a data item does not contain a timestamp, the current system time is used.")
	public void setTimeKey(String timeKey) {
		this.timeKey = timeKey;
	}
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy