All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.kitesdk.morphline.stdio.ReadLineBuilder Maven / Gradle / Ivy

/*
 * Copyright 2013 Cloudera Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.kitesdk.morphline.stdio;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.nio.charset.Charset;
import java.util.Collection;
import java.util.Collections;

import org.kitesdk.morphline.api.Command;
import org.kitesdk.morphline.api.CommandBuilder;
import org.kitesdk.morphline.api.MorphlineCompilationException;
import org.kitesdk.morphline.api.MorphlineContext;
import org.kitesdk.morphline.api.Record;
import org.kitesdk.morphline.base.Fields;

import com.typesafe.config.Config;

/**
 * Command that emits one record per line in the input stream of the first attachment.
 */
public final class ReadLineBuilder implements CommandBuilder {

  @Override
  public Collection getNames() {
    return Collections.singletonList("readLine");
  }

  @Override
  public Command build(Config config, Command parent, Command child, MorphlineContext context) {
    return new ReadLine(this, config, parent, child, context);
  }
  
  
  ///////////////////////////////////////////////////////////////////////////////
  // Nested classes:
  ///////////////////////////////////////////////////////////////////////////////
  private static final class ReadLine extends AbstractParser {

    private final Charset charset;
    private final boolean ignoreFirstLine;
    private final String commentPrefix;
  
    public ReadLine(CommandBuilder builder, Config config, Command parent, Command child, MorphlineContext context) {
      super(builder, config, parent, child, context);
      this.charset = getConfigs().getCharset(config, "charset", null);
      this.ignoreFirstLine = getConfigs().getBoolean(config, "ignoreFirstLine", false);
      String cprefix = getConfigs().getString(config, "commentPrefix", "");
      if (cprefix.length() > 1) {
        throw new MorphlineCompilationException("commentPrefix must be at most one character long: " + cprefix, config);
      }
      this.commentPrefix = (cprefix.length() > 0 ? cprefix : null);
      validateArguments();
    }
  
    @Override
    protected boolean doProcess(Record inputRecord, InputStream stream) throws IOException {
      Record template = inputRecord.copy();
      removeAttachments(template);
      template.removeAll(Fields.MESSAGE);
      Charset detectedCharset = detectCharset(inputRecord, charset);  
      Reader reader = new InputStreamReader(stream, detectedCharset);
      BufferedReader lineReader = new BufferedReader(reader, getBufferSize(stream));
      boolean isFirst = true;
      String line;

      while ((line = lineReader.readLine()) != null) {
        if (isFirst && ignoreFirstLine) {
          isFirst = false;
          continue; // ignore first line
        }
        if (line.length() == 0) {
          continue; // ignore empty lines
        }
        if (commentPrefix != null && line.startsWith(commentPrefix)) {
          continue; // ignore comments
        }
        Record outputRecord = template.copy();
        outputRecord.put(Fields.MESSAGE, line);
        incrementNumRecords();
        
        // pass record to next command in chain:
        if (!getChild().process(outputRecord)) {
          return false;
        }
      }
      return true;        
    }
      
  }
  
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy