ch.ethz.sn.visone3.io.csv.CsvNodeListSource Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of netroles-io-engine Show documentation
Network file IO engine for netroles library
The newest version!
/*
 * This file is part of netroles.
 *
 * netroles is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * netroles is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with netroles.  If not, see .
 */

package ch.ethz.sn.visone3.io.csv;

import ch.ethz.sn.visone3.io.Source;
import ch.ethz.sn.visone3.io.SourceFormat;
import ch.ethz.sn.visone3.io.impl.IdMapper;
import ch.ethz.sn.visone3.io.impl.RangedList;
import ch.ethz.sn.visone3.lang.ConstMapping;
import ch.ethz.sn.visone3.lang.Mapping;
import ch.ethz.sn.visone3.lang.PrimitiveList;
import ch.ethz.sn.visone3.progress.ProgressProvider;
import ch.ethz.sn.visone3.progress.ProgressSource;

import com.univocity.parsers.csv.CsvParser;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.stream.IntStream;

/**
 * CSV network source. All the CSV settings (line separator, delimiter, quotes) are auto discovered
 * (thanks to univocity). Semantics of columns has to supplied by calling
 * {@link #monad(String, Range)} and {@link #noderange(String, Range)}.
 * Columns without a range specification are not read.
 *
 * 
 * Example:
 * 
 * 
 * final CsvNodeListSource source = new CsvEdgeListSource(stream, true);
 *
 * // configure what to read
 * source.monad(NODE, Source.Range.INT);
 * source.range(VALUE, Source.Range.INT);
 *
 * // read
 * try (final InputStream in = new ByteArrayInputStream(data.getBytes())) {
 *   source.parse();
 * }
 * 
 */
public class CsvNodeListSource implements SourceFormat, Source {
  private static final Logger LOG = LoggerFactory.getLogger(CsvNodeListSource.class);
  private final InputStream in;
  private final boolean header;
  private final Map> name2range;
  private String nameNode;
  private Range rangeNodeId;
  private Map> monadic;
  private IdMapper ids = IdMapper.continous(String.class);

  /**
   * Constructs the source.
   * 
   * @param in     the stream to read from.
   * @param header true if the CSV data contains a header line, otherwise false.
   */
  public CsvNodeListSource(final InputStream in, final boolean header) {
    this.in = in;
    this.header = header;
    name2range = new HashMap<>();
  }

  @Override
  public boolean isAutoconfig() {
    return false;
  }

  @Override
  public void mergeNodes(final ConstMapping ids) {
    this.ids = IdMapper.fixed(ids); // TODO hand in mapper?
  }

  @Override
  public void monad(final String varName, final Range range) {
    nameNode = varName;
    rangeNodeId = range;
  }

  @Override
  public void noderange(final String varName, final Range range) {
    name2range.put(varName, range);
  }

  @Override
  public SourceFormat parse() throws IOException {
    final CsvParser parser = new CsvParser(UnivocitySettings.SETTINGS);
    parser.beginParsing(in);

    // parse header
    String[] row = parser.parseNext();
    final ArrayList header = new ArrayList<>();
    if (this.header) {
      Collections.addAll(header, row);
      row = parser.parseNext();
    } else {
      IntStream.range(0, row.length).mapToObj(String::valueOf).forEach(header::add);
    }

    // find source and target columns
    final int nodeCol = header.indexOf(nameNode);
    if (nodeCol < 0) {
      throw new IOException("could not find node column\n" + "known: "
          + String.join(", ", header) + "\n" + "searching: " + nameNode);
    }
    if (name2range.remove(nameNode) != null) {
      LOG.warn("removed node range");
    }

    // create ranges and lists
    final RangedList[] rangedMappings = new RangedList[row.length];
    for (int i = 0; i < rangedMappings.length; i++) {
      Range range = name2range.get(header.get(i));
      if (range != null) {
        rangedMappings[i] = new RangedList<>(range);
      }
    }

    // read
    try (ProgressSource p = ProgressProvider.getMonitor().newSource()) {
      p.updateProgress("read node csv");
      int ignored = 0;
      do {
        final int v = ids.map(row[nodeCol]);
        if (v >= 0) {
          // add monadic attributes
          p.updateProgress(v);
          for (int i = 0; i < row.length; i++) {
            if (rangedMappings[i] != null) {
              if (v >= rangedMappings[i].getList().size()) {
                rangedMappings[i].getList().setSize(null, v + 1);
              }
              rangedMappings[i].setListAt(v, row[i]);
            }
          }
        } else {
          ignored++;
        }
      } while ((row = parser.parseNext()) != null);
      LOG.info("{} ids read", ids.size());
      LOG.info("{} rows ignored (merging or empty)", ignored);

      // fill the monadic mappings map
      p.updateProgress(1, 3);
      monadic = new LinkedHashMap<>();
      int maxId = ids.getMapping().values().stream().mapToInt(Integer::intValue).max().orElse(-1) + 1;
      for (int i = 0; i < rangedMappings.length; i++) {
        if (rangedMappings[i] != null) {
          PrimitiveList mapping = rangedMappings[i].getList();
          mapping.setSize(null, maxId);
          monadic.put(header.get(i), mapping);
        }
      }
      // save the original node ids
      final RangedList nodeIds = new RangedList<>(rangeNodeId, maxId);
      for (final Map.Entry e : ids.entrySet()) {
        nodeIds.setListAt(e.getValue().intValue(), e.getKey());
      }
      monadic.put("id", nodeIds.getList());
    }
    return this;
  }

  @Override
  public Map> monadic() {
    return Collections.unmodifiableMap(monadic);
  }

  @Override
  public Map nodeIds() {
    return ids.getMapping();
  }

  @Override
  public void close() throws IOException {
    in.close();
  }
}