All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.milyn.flatfile.regex.RegexParser Maven / Gradle / Ivy

The newest version!
/*
 * Milyn - Copyright (C) 2006 - 2010
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License (version 2.1) as published by the Free Software
 * Foundation.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 *
 * See the GNU Lesser General Public License for more details:
 * http://www.gnu.org/licenses/lgpl.txt
 */

package org.milyn.flatfile.regex;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.Reader;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.milyn.flatfile.variablefield.VariableFieldRecordParser;
import org.xml.sax.InputSource;

/**
 * Regex record parser.
 * 

* If there are no groups defined in the regexPattern this parser will use the * pattern to split the record into fields. If groups are defined, it will * extract the record field data from the groups defined in the pattern. * * @author [email protected] */ public class RegexParser extends VariableFieldRecordParser { private BufferedReader reader; private StringBuilder readerBuffer; private int groupCount; public void setDataSource(InputSource source) { Reader reader = source.getCharacterStream(); if (reader == null) { throw new IllegalStateException( "Invalid InputSource type supplied to RegexParser. Must contain a Reader instance."); } this.reader = new BufferedReader(reader); this.readerBuffer = new StringBuilder(); this.groupCount = getFactory().getRegexPattern().matcher("").groupCount(); } @Override public List nextRecordFieldValues() throws IOException { T factory = getFactory(); Pattern pattern = factory.getRegexPattern(); readerBuffer.setLength(0); factory.readRecord(reader, readerBuffer, (getRecordCount() + 1)); if (readerBuffer.length() == 0) { return null; } if (groupCount > 0) { String recordString = readerBuffer.toString(); List fields = new ArrayList(); Matcher matcher = pattern.matcher(recordString); if (matcher.matches()) { for (int i = 0; i < matcher.groupCount(); i++) { String fieldValue = matcher.group(i + 1); if (fieldValue != null) { fields.add(fieldValue); } } } else { // Add the full record text as the only field value fields.add(recordString); } return fields; } else { return Arrays.asList(pattern.split(readerBuffer.toString())); } } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy