All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.simiacryptus.util.TableOutput Maven / Gradle / Ivy

There is a newer version: 2.1.0
Show newest version
/*
 * Copyright (c) 2018 by Andrew Charneski.
 *
 * The author licenses this file to you under the
 * Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance
 * with the License.  You may obtain a copy
 * of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package com.simiacryptus.util;

import com.simiacryptus.util.data.DoubleStatistics;

import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.PrintStream;
import java.net.URL;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Comparator;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.TreeSet;
import java.util.stream.Collectors;

/**
 * The type Table output.
 */
public class TableOutput {
  
  /**
   * The Rows.
   */
  public final List> rows = new ArrayList<>();
  /**
   * The Schema.
   */
  public final Map> schema = new LinkedHashMap<>();
  
  /**
   * Create table output.
   *
   * @param rows the rows
   * @return the table output
   */
  @javax.annotation.Nonnull
  public static com.simiacryptus.util.TableOutput create(@javax.annotation.Nonnull final Map... rows) {
    @javax.annotation.Nonnull final com.simiacryptus.util.TableOutput table = new com.simiacryptus.util.TableOutput();
    Arrays.stream(rows).forEach(table::putRow);
    return table;
    
  }
  
  /**
   * Calc number stats table output.
   *
   * @return the table output
   */
  @javax.annotation.Nonnull
  public com.simiacryptus.util.TableOutput calcNumberStats() {
    @javax.annotation.Nonnull final com.simiacryptus.util.TableOutput tableOutput = new com.simiacryptus.util.TableOutput();
    schema.entrySet().stream().filter(x -> Number.class.isAssignableFrom(x.getValue())).map(col -> {
      final CharSequence key = col.getKey();
      final DoubleStatistics stats = rows.stream().filter(x -> x.containsKey(key)).map(x -> (Number) x.get(key)).collect(DoubleStatistics.NUMBERS);
      @javax.annotation.Nonnull final LinkedHashMap row = new LinkedHashMap<>();
      row.put("field", key);
      row.put("sum", stats.getSum());
      row.put("avg", stats.getAverage());
      row.put("stddev", stats.getStandardDeviation());
      row.put("nulls", rows.size() - stats.getCount());
      return row;
    }).sorted(Comparator.comparing(x -> x.get("field").toString()))
      .forEach(row -> tableOutput.putRow(row));
    return tableOutput;
  }
  
  /**
   * Clear.
   */
  public void clear() {
    schema.clear();
    rows.clear();
  }
  
  /**
   * Put row.
   *
   * @param properties the properties
   */
  public void putRow(@javax.annotation.Nonnull final Map properties) {
    for (@javax.annotation.Nonnull final Entry prop : properties.entrySet()) {
      final CharSequence propKey = prop.getKey();
      final Class propClass = prop.getValue().getClass();
      if (!propClass.equals(schema.getOrDefault(propKey, propClass))) {
        throw new RuntimeException("Schema mismatch for " + propKey);
      }
      schema.putIfAbsent(propKey, propClass);
    }
    rows.add(new HashMap<>(properties));
  }
  
  /**
   * To csv string.
   *
   * @param sortCols the sort cols
   * @return the string
   */
  public CharSequence toCSV(final boolean sortCols) {
    try (@javax.annotation.Nonnull ByteArrayOutputStream buffer = new ByteArrayOutputStream()) {
      try (@javax.annotation.Nonnull PrintStream printStream = new PrintStream(buffer)) {
        @javax.annotation.Nonnull final Collection keys = sortCols ? new TreeSet(schema.keySet()) : schema.keySet();
        final String formatString = keys.stream()
          .map(k -> {
            switch (schema.get(k).getSimpleName()) {
              case "String":
                return "%-" + rows.stream().mapToInt(x -> x.getOrDefault(k, "").toString().length()).max().getAsInt() + "s";
              case "Integer":
                return "%6d";
              case "Double":
                return "%.4f";
              default:
                return "%s";
            }
          }).collect(Collectors.joining(","));
        printStream.println(keys.stream().collect(Collectors.joining(",")).trim());
        for (@javax.annotation.Nonnull final Map row : rows) {
          printStream.println(String.format(formatString, keys.stream().map(k -> row.get(k)).toArray()));
        }
      }
      return buffer.toString();
    } catch (@javax.annotation.Nonnull final IOException e) {
      throw new RuntimeException(e);
    }
  }
  
  /**
   * To html table string.
   *
   * @return the string
   */
  public String toHtmlTable() {
    return toHtmlTable(false);
  }
  
  /**
   * To html table string.
   *
   * @param sortCols the sort cols
   * @return the string
   */
  public String toHtmlTable(final boolean sortCols) {
    try (@javax.annotation.Nonnull ByteArrayOutputStream buffer = new ByteArrayOutputStream()) {
      try (@javax.annotation.Nonnull PrintStream printStream = new PrintStream(buffer)) {
        @javax.annotation.Nonnull final Collection keys = sortCols ? new TreeSet(schema.keySet()) : schema.keySet();
        final String formatString = keys.stream()
          .map(k -> {
            switch (schema.get(k).getSimpleName()) {
              case "String":
                return "%-" + rows.stream().mapToInt(x -> x.getOrDefault(k, "").toString().length()).max().getAsInt() + "s";
              case "Integer":
                return "%6d";
              case "Double":
                return "%.4f";
              default:
                return "%s";
            }
          }).map(s -> "" + s + "").collect(Collectors.joining(""));
        printStream.print("");
        printStream.print("");
        printStream.println(keys.stream().map(s -> "").collect(Collectors.joining("")).trim());
        printStream.print("");
        for (@javax.annotation.Nonnull final Map row : rows) {
          printStream.print("");
          printStream.println(String.format(formatString, keys.stream().map(k -> row.get(k)).toArray()));
          printStream.print("");
        }
        printStream.print("
" + s + "
"); } return buffer.toString(); } catch (@javax.annotation.Nonnull final IOException e) { throw new RuntimeException(e); } } /** * To text table string. * * @return the string */ public String toMarkdownTable() { try (@javax.annotation.Nonnull ByteArrayOutputStream buffer = new ByteArrayOutputStream()) { try (@javax.annotation.Nonnull PrintStream printStream = new PrintStream(buffer)) { final String formatString = schema.entrySet().stream() .map(e -> { switch (e.getValue().getSimpleName()) { case "String": return "%-" + rows.stream().mapToInt(x -> x.getOrDefault(e.getKey(), "").toString().length()).max().getAsInt() + "s"; case "Integer": return "%6d"; case "Double": return "%.4f"; default: return "%s"; } }).collect(Collectors.joining(" | ")); printStream.println(schema.entrySet().stream().map(x -> x.getKey()).collect(Collectors.joining(" | ")).trim()); printStream.println(schema.entrySet().stream().map(x -> x.getKey()).map(x -> { @javax.annotation.Nonnull final char[] t = new char[x.length()]; Arrays.fill(t, '-'); return new String(t); }).collect(Collectors.joining(" | ")).trim()); for (@javax.annotation.Nonnull final Map row : rows) { printStream.println(String.format(formatString, schema.entrySet().stream().map(e -> row.get(e.getKey())).toArray())); } } return buffer.toString(); } catch (@javax.annotation.Nonnull final IOException e) { throw new RuntimeException(e); } } /** * Write projector data. * * @param path the path * @param baseUrl the base url * @throws IOException the io exception */ public void writeProjectorData(@javax.annotation.Nonnull final File path, final URL baseUrl) throws IOException { path.mkdirs(); try (@javax.annotation.Nonnull FileOutputStream file = new FileOutputStream(new File(path, "data.tsv"))) { try (@javax.annotation.Nonnull PrintStream printStream = new PrintStream(file)) { printStream.println(toMarkdownTable()); } } final List>> scalarCols = schema.entrySet().stream() .filter(e -> Number.class.isAssignableFrom(e.getValue())) .collect(Collectors.toList()); try (@javax.annotation.Nonnull FileOutputStream file = new FileOutputStream(new File(path, "tensors.tsv"))) { try (@javax.annotation.Nonnull PrintStream printStream = new PrintStream(file)) { for (@javax.annotation.Nonnull final Map row : rows) { printStream.println(scalarCols.stream() .map(e -> ((Number) row.getOrDefault(e.getKey(), 0)).doubleValue()) .map(x -> x.toString()).collect(Collectors.joining("\t"))); } } } final List>> metadataCols = schema.entrySet().stream() .filter(e -> String.class.isAssignableFrom(e.getValue())) .collect(Collectors.toList()); try (@javax.annotation.Nonnull FileOutputStream file = new FileOutputStream(new File(path, "metadata.tsv"))) { try (@javax.annotation.Nonnull PrintStream printStream = new PrintStream(file)) { if (1 < metadataCols.size()) { printStream.println(metadataCols.stream().map(e -> e.getKey()).collect(Collectors.joining("\t"))); } for (@javax.annotation.Nonnull final Map row : rows) { printStream.println(metadataCols.stream() .map(e -> ((CharSequence) row.getOrDefault(e.getKey(), ""))) .collect(Collectors.joining("\t"))); } } } final List>> urlCols = schema.entrySet().stream() .filter(e -> URL.class.isAssignableFrom(e.getValue())) .collect(Collectors.toList()); try (@javax.annotation.Nonnull FileOutputStream file = new FileOutputStream(new File(path, "bookmarks.txt"))) { try (@javax.annotation.Nonnull PrintStream printStream = new PrintStream(file)) { for (@javax.annotation.Nonnull final Map row : rows) { printStream.println(urlCols.stream() .map(e -> row.get(e.getKey()).toString()) .collect(Collectors.joining("\t"))); } } } try (@javax.annotation.Nonnull FileOutputStream file = new FileOutputStream(new File(path, "config.json"))) { try (@javax.annotation.Nonnull PrintStream printStream = new PrintStream(file)) { printStream.println("{\n" + " \"embeddings\": [\n" + " {\n" + " \"tensorName\": \"" + path.getName() + "\",\n" + " \"tensorShape\": [\n" + " " + rows.size() + ",\n" + " " + scalarCols.size() + "\n" + " ],\n" + " \"tensorPath\": \"" + new URL(baseUrl, "tensors.tsv") + (0 == metadataCols.size() ? "" : "\",\n \"metadataPath\": \"" + new URL(baseUrl, "metadata.tsv")) + "\"\n" + " }\n" + " ]\n" + "}"); } } if (0 < urlCols.size()) { try (@javax.annotation.Nonnull FileOutputStream file = new FileOutputStream(new File(path, "config_withLinks.json"))) { try (@javax.annotation.Nonnull PrintStream printStream = new PrintStream(file)) { printStream.println("{\n" + " \"embeddings\": [\n" + " {\n" + " \"tensorName\": \"" + path.getName() + "\",\n" + " \"tensorShape\": [\n" + " " + rows.size() + ",\n" + " " + scalarCols.size() + "\n" + " ],\n" + " \"tensorPath\": \"" + new URL(baseUrl, "tensors.tsv") + (0 == metadataCols.size() ? "" : "\",\n \"metadataPath\": \"" + new URL(baseUrl, "metadata.tsv")) + "\",\n \"bookmarksPath\": \"" + new URL(baseUrl, "bookmarks.txt") + "\"\n" + " }\n" + " ]\n" + "}"); } } } } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy