All Downloads are FREE. Search and download functionalities are using the official Maven repository.

edu.isi.nlp.files.MultitableLoader Maven / Gradle / Ivy

The newest version!
package edu.isi.nlp.files;

import com.google.common.base.Function;
import com.google.common.base.Functions;
import com.google.common.base.Optional;
import com.google.common.base.Splitter;
import com.google.common.io.CharSource;
import com.google.common.io.LineProcessor;
import edu.isi.nlp.IsiNlpImmutable;
import edu.isi.nlp.collections.ImmutableListMultitable;
import edu.isi.nlp.collections.ImmutableMultitable;
import edu.isi.nlp.collections.ImmutableSetMultitable;
import edu.isi.nlp.collections.ListMultitable;
import edu.isi.nlp.collections.Multitable;
import edu.isi.nlp.collections.SetMultitable;
import edu.isi.nlp.symbols.Symbol;
import edu.isi.nlp.symbols.SymbolUtils;
import java.io.IOException;
import java.util.List;

/**
 * Loads a {@link Multitable} from a {@link CharSource}.
 *
 * 

The input is required to fall into three fields when split by {@link #fieldSplitter()} * (defaults to tab-separated). The first of these is expected to be the row key, the second the * column key, and the third the value. Multiple values may appear in one value field if the * optional {@link #valueListSplitter()} is specified to split them with. * *

Comment lines are currently not skipped, but this could be added as an option in the future. * * @author Ryan Gabbard */ @org.immutables.value.Value.Immutable @IsiNlpImmutable public abstract class MultitableLoader { public abstract Optional valueListSplitter(); @org.immutables.value.Value.Default public Splitter fieldSplitter() { return Splitter.on("\t"); } public abstract Function rowInterpreter(); public abstract Function columnInterpreter(); public abstract Function valueInterpreter(); /** * Loads a {@link ListMultitable} from the specified source according to the configuration of this * loader. */ public final ImmutableListMultitable loadToListMultitable(CharSource source) throws IOException { final ImmutableListMultitable.Builder ret = ImmutableListMultitable.builder(); loadToMultitable(source, ret); return ret.build(); } /** * Loads a {@link SetMultitable} from the specified source according to the configuration of this * loader. */ public final ImmutableSetMultitable loadToSetMultitable(CharSource source) throws IOException { final ImmutableSetMultitable.Builder ret = ImmutableSetMultitable.builder(); loadToMultitable(source, ret); return ret.build(); } public static MultitableLoader.Builder builder() { return new MultitableLoader.Builder<>(); } public static MultitableLoader.Builder builderForStrings() { return new MultitableLoader.Builder() .rowInterpreter(Functions.identity()) .columnInterpreter(Functions.identity()) .valueInterpreter(Functions.identity()); } public static MultitableLoader.Builder builderForSymbols() { return new MultitableLoader.Builder() .rowInterpreter(SymbolUtils.symbolizeFunction()) .columnInterpreter(SymbolUtils.symbolizeFunction()) .valueInterpreter(SymbolUtils.symbolizeFunction()); } private void loadToMultitable( final CharSource source, final ImmutableMultitable.Builder ret) throws IOException { source.readLines( new LineProcessor() { @Override public boolean processLine(final String line) throws IOException { final List fields = fieldSplitter().splitToList(line); if (fields.size() == 3) { final R rowKey = interpret(fields.get(0), rowInterpreter(), "row key", line); final C columnKey = interpret(fields.get(1), columnInterpreter(), "column key", line); if (valueListSplitter().isPresent()) { for (final String value : valueListSplitter().get().split(fields.get(2))) { ret.put(rowKey, columnKey, interpret(value, valueInterpreter(), "value", line)); } } else { ret.put( rowKey, columnKey, interpret(fields.get(2), valueInterpreter(), "value", line)); } } else { throw new IOException("Cannot parse lines as multitable entries:\n" + line); } // we never stop procesisng lines early return true; } @Override public Void getResult() { return null; } }); } private T interpret( String field, Function interpreter, String fieldName, String line) throws IOException { try { return interpreter.apply(field); } catch (Exception e) { throw new IOException( "While parsing multitable line\n" + line + "\n failed to interpret " + fieldName + " " + field + " using " + interpreter); } } public static class Builder extends ImmutableMultitableLoader.Builder { public Builder splitValuesOnCommas() { valueListSplitter(Splitter.on(",")); return this; } } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy