All Downloads are FREE. Search and download functionalities are using the official Maven repository.

stream.io.LineStream Maven / Gradle / Ivy

There is a newer version: 0.9.10
Show newest version
/*
 *  streams library
 *
 *  Copyright (C) 2011-2012 by Christian Bockermann, Hendrik Blom
 * 
 *  streams is a library, API and runtime environment for processing high
 *  volume data streams. It is composed of three submodules "stream-api",
 *  "stream-core" and "stream-runtime".
 *
 *  The streams library (and its submodules) is free software: you can 
 *  redistribute it and/or modify it under the terms of the 
 *  GNU Affero General Public License as published by the Free Software 
 *  Foundation, either version 3 of the License, or (at your option) any 
 *  later version.
 *
 *  The stream.ai library (and its submodules) is distributed in the hope
 *  that it will be useful, but WITHOUT ANY WARRANTY; without even the implied 
 *  warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU Affero General Public License for more details.
 *
 *  You should have received a copy of the GNU Affero General Public License
 *  along with this program.  If not, see http://www.gnu.org/licenses/.
 */
package stream.io;

import java.io.InputStream;
import java.util.Map;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import stream.Data;
import stream.annotations.Description;
import stream.annotations.Parameter;
import stream.data.DataFactory;
import stream.util.parser.Parser;
import stream.util.parser.ParserGenerator;

/**
 * This is a very simple stream that just reads from a URL line-by-line. The
 * content of the line is stored in the attribute determined by the
 * getKey() method of this instance. By default the key
 * LINE is used.
 * 
 * It also supports the specification of a simple format string that can be used
 * to create a generic parser to populate additional fields of the data item
 * read from the stream.
 * 
 * The parser format is:
 * 
 * 
 *       %{IP} [%{DATE}] "%{URL}"
 * 
* * This will create a parser that is able to read line in the format * *
 *       127.0.0.1 [2012/03/14 12:03:48 +0100] "http://example.com/index.html"
 * 
* * The outcoming data item will have the attribute IP set to * "127.0.0.1" and the DATE attribute set to * "2012/03/14 12:03:48 +0100". The URL attribute will be set to * http://example.com/index.html. * * * @author Christian Bockermann <[email protected]> * */ @Description(group = "Data Stream.Sources") public class LineStream extends AbstractLineStream { static Logger log = LoggerFactory.getLogger(LineStream.class); String key = "LINE"; String format = null; Parser> parser = null; public LineStream(SourceURL url) throws Exception { super(url); } public LineStream(InputStream in) throws Exception { super(in); } /** * @return the key */ public String getKey() { return key; } /** * @param key * the key to set */ @Parameter(required = false, defaultValue = "LINE", description = "") public void setKey(String key) { this.key = key; } /** * @return the format */ public String getFormat() { return format; } /** * @param format * the format to set */ @Parameter(required = false, description = "The format how to parse each line. Elements like %(KEY) will be detected and automatically populated in the resulting items.") public void setFormat(String format) { this.format = format; try { ParserGenerator pg = new ParserGenerator(format); parser = pg.newParser(); } catch (Exception e) { throw new RuntimeException("Failed to create parser for format: " + e.getMessage()); } } /** * @see stream.io.AbstractStream#readItem(stream.Data) */ @Override public synchronized Data readNext() throws Exception { if (this.limit != null && this.limit > 0 && this.count > this.limit) return null; Data instance = DataFactory.create(); String line = readLine(); if (line == null) return null; instance.put(key, line); if (parser != null) { Map map = parser.parse(line); for (String key : map.keySet()) { instance.put(key, map.get(key)); } } return instance; } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy