All Downloads are FREE. Search and download functionalities are using the official Maven repository.

gr.ekt.bteio.loaders.CSVDataLoader Maven / Gradle / Ivy

/**
 * Copyright (c) 2007-2013, National Documentation Centre (EKT, www.ekt.gr)
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are
 * met:
 *
 *     Redistributions of source code must retain the above copyright
 *     notice, this list of conditions and the following disclaimer.
 *
 *     Redistributions in binary form must reproduce the above copyright
 *     notice, this list of conditions and the following disclaimer in
 *     the documentation and/or other materials provided with the
 *     distribution.
 *
 *     Neither the name of the National Documentation Centre nor the
 *     names of its contributors may be used to endorse or promote
 *     products derived from this software without specific prior written
 *     permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
package gr.ekt.bteio.loaders;

import gr.ekt.bte.core.DataLoadingSpec;
import gr.ekt.bte.core.RecordSet;
import gr.ekt.bte.core.StringValue;
import gr.ekt.bte.dataloader.FileDataLoader;
import gr.ekt.bte.exceptions.EmptySourceException;
import gr.ekt.bte.exceptions.MalformedSourceException;
import gr.ekt.bte.record.MapRecord;

import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.Map;

import org.apache.log4j.Logger;

import au.com.bytecode.opencsv.CSVReader;

public class CSVDataLoader extends FileDataLoader {
    private static Logger logger_ = Logger.getLogger(CSVDataLoader.class);
    private CSVReader reader_;
    private Map field_map_;
    private int skip_lines_ = 0;
    private char separator_ = ',';
    private char quote_char_ = '"';
    private String value_separator_ = null;

    public CSVDataLoader() {
        super();
        reader_ = null;
        field_map_ = null;
    }

    public CSVDataLoader(String filename, Map fields) {
        super(filename);
        field_map_ = fields;
    }

    public CSVDataLoader(String filename, Map fields, char separator, char quote_char, int skip_lines, String value_separator) {
        super(filename);
        field_map_ = fields;
        separator_ = separator;
        quote_char_ = quote_char;
        skip_lines_ = skip_lines;
        value_separator_ = value_separator;
    }

    @Override
    public RecordSet getRecords() throws MalformedSourceException, EmptySourceException {
        openReader();
        if (reader_ == null) {
            throw new EmptySourceException("Input file is not open");
        }
        RecordSet rs = null;
        try {
            String [] next_line;
            rs = new RecordSet();
            while((next_line = reader_.readNext()) != null) {
                MapRecord rec = new MapRecord();
                for(Map.Entry en : field_map_.entrySet()) {
                    int i = en.getKey();
                    if (next_line.length <= i) {
                        throw new MalformedSourceException("The requested column " + i + " does not exist");
                    }
                    if (value_separator_ != null) {
                        String values[] = next_line[i].split(value_separator_);
                        for(int j = 0; j < values.length; j++) {
                            rec.addValue(field_map_.get(i), new StringValue(values[j]));
                        }
                    }
                    else {
                        rec.addValue(field_map_.get(i), new StringValue(next_line[i]));
                    }
                }
                rs.addRecord(rec);
            }
        } catch(IOException e) {
            logger_.info(e.getMessage());
            throw new MalformedSourceException("");
        }

        return rs;
    }

    @Override
    public RecordSet getRecords(DataLoadingSpec spec) throws MalformedSourceException {
        //Not using the DataLoadingSpec for the moment
        if (spec.getOffset() > 0) {
            return new RecordSet();
        }
        return getRecords();
    }

    @Override
    protected void finalize() throws Throwable {
        reader_.close();
    }

    private void openReader() throws EmptySourceException {
        try {
            reader_ = new CSVReader(new FileReader(filename), separator_, quote_char_, skip_lines_);
        } catch (FileNotFoundException e) {
            throw new EmptySourceException("File " + filename + " not found");
        }
    }

    @Override
    public void setFilename(String filename) {
        this.filename = filename;
        try {
            openReader();
        } catch (EmptySourceException e) {
            logger_.info("Could not open file " + filename);
            reader_ = null;
        }
    }

    /**
     * @return the fields_
     */
    public Map getFieldMap() {
        return field_map_;
    }

    /**
     * @param fields_ the fields_ to set
     */
    public void setFieldMap(Map fields_) {
        this.field_map_ = fields_;
    }

    /**
     * @return the skip_lines_
     */
    public int getSkipLines() {
        return skip_lines_;
    }

    /**
     * @param skip_lines_ the skip_lines_ to set
     */
    public void setSkipLines(int skip_lines_) {
        this.skip_lines_ = skip_lines_;
    }

    /**
     * @return the separator_
     */
    public char getSeparator() {
        return separator_;
    }

    /**
     * @param separator_ the separator_ to set
     */
    public void setSeparator(char separator_) {
        this.separator_ = separator_;
    }

    /**
     * @return the quote_char_
     */
    public char getQuoteChar() {
        return quote_char_;
    }

    /**
     * @param quote_char_ the quote_char_ to set
     */
    public void setQuoteChar(char quote_char_) {
        this.quote_char_ = quote_char_;
    }

    /**
     * @return the value_separator_
     */
    public String getValueSeparator() {
        return value_separator_;
    }

    /**
     * @param value_separator_ the value_separator_ to set
     */
    public void setValueSeparator(String value_separator_) {
        this.value_separator_ = value_separator_;
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy