All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.milyn.flatfile.regex.RegexParser Maven / Gradle / Ivy

The newest version!
/*
 * Milyn - Copyright (C) 2006 - 2010
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License (version 2.1) as published by the Free Software
 * Foundation.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 *
 * See the GNU Lesser General Public License for more details:
 * http://www.gnu.org/licenses/lgpl.txt
 */

package org.milyn.flatfile.regex;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.Reader;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.milyn.flatfile.variablefield.VariableFieldRecordParser;
import org.xml.sax.InputSource;

/**
 * Regex record parser.
 *
 * If there are no groups defined in the regexPattern this parser will use the
 * pattern to split the record into fields. If groups are defined, it will
 * extract the record field data from the groups defined in the pattern.
 * 
 * @author [email protected]
 */
public class RegexParser extends VariableFieldRecordParser {

    private BufferedReader reader;
    private StringBuilder readerBuffer;
    private int groupCount;

    public void setDataSource(InputSource source) {
        Reader reader = source.getCharacterStream();

        if (reader == null) {
            throw new IllegalStateException(
                    "Invalid InputSource type supplied to RegexParser.  Must contain a Reader instance.");
        }

        this.reader = new BufferedReader(reader);
        this.readerBuffer = new StringBuilder();
        this.groupCount = getFactory().getRegexPattern().matcher("").groupCount();
    }

    @Override
    public List nextRecordFieldValues() throws IOException {
        T factory = getFactory();
        Pattern pattern = factory.getRegexPattern();

        readerBuffer.setLength(0);
        factory.readRecord(reader, readerBuffer, (getRecordCount() + 1));

        if (readerBuffer.length() == 0) {
            return null;
        }

        if (groupCount > 0) {
            String recordString = readerBuffer.toString();
            List fields = new ArrayList();
            Matcher matcher = pattern.matcher(recordString);

            if (matcher.matches()) {
                for (int i = 0; i < matcher.groupCount(); i++) {
                    String fieldValue = matcher.group(i + 1);
                    if (fieldValue != null) {
                        fields.add(fieldValue);
                    }
                }
            } else {
                // Add the full record text as the only field value
                fields.add(recordString);
            }

            return fields;
        } else {
            return Arrays.asList(pattern.split(readerBuffer.toString()));
        }
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy