com.twitter.elephantbird.pig.load.LzoRegexLoader Maven / Gradle / Ivy
package com.twitter.elephantbird.pig.load;
import java.util.regex.Pattern;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* LzoRegexLoader extends LzoBaseRegexLoader, allowing regular expressions to be passed by argument through pig latin
* via a line like:
* A = LOAD 'test.txt' USING com.twitter.elephantbird.pig.storage.LzoRegexLoader('(\\d+)::(\\w+)\|\|(\\w+)');
* which would parse lines like
* 1::one||i 2::two||ii 3::three--iii
* into arrays like
* {1, "one", "i"}, {2, "two", "ii"}, {3, "three", "iii"}
*/
public class LzoRegexLoader extends LzoBaseRegexLoader {
private static final Logger LOG = LoggerFactory.getLogger(LzoRegexLoader.class);
private final Pattern pattern_;
/**
* The regex is passed in via the constructor.
* @param pattern the regex.
*/
public LzoRegexLoader(String pattern) {
LOG.info("LzoRegexLoader with regex = " + pattern);
pattern = pattern.replace("\\\\","\\");
pattern_ = Pattern.compile(pattern);
}
/**
* Implement the abstract part of the class by returning the pattern.
*/
@Override
public Pattern getPattern() {
return pattern_;
}
}