All Downloads are FREE. Search and download functionalities are using the official Maven repository.

ch.ethz.sn.visone3.io.csv.CsvNodeListSource Maven / Gradle / Ivy

The newest version!
/*
 * This file is part of netroles.
 *
 * netroles is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * netroles is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with netroles.  If not, see .
 */

package ch.ethz.sn.visone3.io.csv;

import ch.ethz.sn.visone3.io.Source;
import ch.ethz.sn.visone3.io.SourceFormat;
import ch.ethz.sn.visone3.io.impl.IdMapper;
import ch.ethz.sn.visone3.io.impl.RangedList;
import ch.ethz.sn.visone3.lang.ConstMapping;
import ch.ethz.sn.visone3.lang.Mapping;
import ch.ethz.sn.visone3.lang.PrimitiveList;
import ch.ethz.sn.visone3.progress.ProgressProvider;
import ch.ethz.sn.visone3.progress.ProgressSource;

import com.univocity.parsers.csv.CsvParser;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.stream.IntStream;

/**
 * CSV network source. All the CSV settings (line separator, delimiter, quotes) are auto discovered
 * (thanks to univocity). Semantics of columns has to supplied by calling
 * {@link #monad(String, Range)} and {@link #noderange(String, Range)}.
 * Columns without a range specification are not read.
 *
 * 

* Example: * *

 * final CsvNodeListSource source = new CsvEdgeListSource(stream, true);
 *
 * // configure what to read
 * source.monad(NODE, Source.Range.INT);
 * source.range(VALUE, Source.Range.INT);
 *
 * // read
 * try (final InputStream in = new ByteArrayInputStream(data.getBytes())) {
 *   source.parse();
 * }
 * 
*/ public class CsvNodeListSource implements SourceFormat, Source { private static final Logger LOG = LoggerFactory.getLogger(CsvNodeListSource.class); private final InputStream in; private final boolean header; private final Map> name2range; private String nameNode; private Range rangeNodeId; private Map> monadic; private IdMapper ids = IdMapper.continous(String.class); /** * Constructs the source. * * @param in the stream to read from. * @param header true if the CSV data contains a header line, otherwise false. */ public CsvNodeListSource(final InputStream in, final boolean header) { this.in = in; this.header = header; name2range = new HashMap<>(); } @Override public boolean isAutoconfig() { return false; } @Override public void mergeNodes(final ConstMapping ids) { this.ids = IdMapper.fixed(ids); // TODO hand in mapper? } @Override public void monad(final String varName, final Range range) { nameNode = varName; rangeNodeId = range; } @Override public void noderange(final String varName, final Range range) { name2range.put(varName, range); } @Override public SourceFormat parse() throws IOException { final CsvParser parser = new CsvParser(UnivocitySettings.SETTINGS); parser.beginParsing(in); // parse header String[] row = parser.parseNext(); final ArrayList header = new ArrayList<>(); if (this.header) { Collections.addAll(header, row); row = parser.parseNext(); } else { IntStream.range(0, row.length).mapToObj(String::valueOf).forEach(header::add); } // find source and target columns final int nodeCol = header.indexOf(nameNode); if (nodeCol < 0) { throw new IOException("could not find node column\n" + "known: " + String.join(", ", header) + "\n" + "searching: " + nameNode); } if (name2range.remove(nameNode) != null) { LOG.warn("removed node range"); } // create ranges and lists final RangedList[] rangedMappings = new RangedList[row.length]; for (int i = 0; i < rangedMappings.length; i++) { Range range = name2range.get(header.get(i)); if (range != null) { rangedMappings[i] = new RangedList<>(range); } } // read try (ProgressSource p = ProgressProvider.getMonitor().newSource()) { p.updateProgress("read node csv"); int ignored = 0; do { final int v = ids.map(row[nodeCol]); if (v >= 0) { // add monadic attributes p.updateProgress(v); for (int i = 0; i < row.length; i++) { if (rangedMappings[i] != null) { if (v >= rangedMappings[i].getList().size()) { rangedMappings[i].getList().setSize(null, v + 1); } rangedMappings[i].setListAt(v, row[i]); } } } else { ignored++; } } while ((row = parser.parseNext()) != null); LOG.info("{} ids read", ids.size()); LOG.info("{} rows ignored (merging or empty)", ignored); // fill the monadic mappings map p.updateProgress(1, 3); monadic = new LinkedHashMap<>(); int maxId = ids.getMapping().values().stream().mapToInt(Integer::intValue).max().orElse(-1) + 1; for (int i = 0; i < rangedMappings.length; i++) { if (rangedMappings[i] != null) { PrimitiveList mapping = rangedMappings[i].getList(); mapping.setSize(null, maxId); monadic.put(header.get(i), mapping); } } // save the original node ids final RangedList nodeIds = new RangedList<>(rangeNodeId, maxId); for (final Map.Entry e : ids.entrySet()) { nodeIds.setListAt(e.getValue().intValue(), e.getKey()); } monadic.put("id", nodeIds.getList()); } return this; } @Override public Map> monadic() { return Collections.unmodifiableMap(monadic); } @Override public Map nodeIds() { return ids.getMapping(); } @Override public void close() throws IOException { in.close(); } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy