tech.tablesaw.io.string.DataFramePrinter Maven / Gradle / Ivy
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package tech.tablesaw.io.string;
import java.io.IOException;
import java.io.OutputStream;
import java.util.stream.IntStream;
import tech.tablesaw.columns.Column;
import tech.tablesaw.table.Relation;
import tech.tablesaw.util.StringUtils;
/**
* A class that can pretty print a DataFrame to text for visualization in a console
*
* Based off of
* https://github.com/zavtech/morpheus-core/blob/master/src/main/java/com/zavtech/morpheus/reference/XDataFramePrinter.java
* under Apache 2 license
*/
public class DataFramePrinter {
private static final String TOO_SHORT_COLUMN_MARKER = "?";
private final int maxRows;
private final OutputStream stream;
/**
* Constructor
*
* @param maxRows the max rows to print
* @param stream the print stream to write to
*/
public DataFramePrinter(int maxRows, OutputStream stream) {
this.maxRows = maxRows;
this.stream = stream;
}
/**
* Returns the column widths required to print the header and data
*
* @param headers the headers to print
* @param data the data items to print
* @return the required column widths
*/
private static int[] getWidths(String[] headers, String[][] data) {
final int[] widths = new int[headers.length];
for (int j = 0; j < headers.length; j++) {
final String header = headers[j];
widths[j] = Math.max(widths[j], header != null ? header.length() : 0);
}
for (String[] rowValues : data) {
for (int j = 0; j < rowValues.length; j++) {
final String value = rowValues[j];
widths[j] = Math.max(widths[j], value != null ? value.length() : 0);
}
}
return widths;
}
/**
* Returns the header template given the widths specified
*
* @param widths the token widths
* @return the line format template
*/
private static String getHeaderTemplate(int[] widths, String[] headers) {
return IntStream.range(0, widths.length)
.mapToObj(
i -> {
final int width = widths[i];
final int length = headers[i].length();
final int leading = (width - length) / 2;
final int trailing = width - (length + leading);
final StringBuilder text = new StringBuilder();
whitespace(text, leading + 1);
text.append("%").append(i + 1).append("$s");
whitespace(text, trailing);
text.append(" |");
return text.toString();
})
.reduce((left, right) -> left + " " + right)
.orElse("");
}
/**
* Returns the data template given the widths specified
*
* @param widths the token widths
* @return the line format template
*/
private static String getDataTemplate(int[] widths) {
return IntStream.range(0, widths.length)
.mapToObj(i -> " %" + (i + 1) + "$" + widths[i] + "s |")
.reduce((left, right) -> left + " " + right)
.orElse("");
}
/**
* Returns a whitespace string of the length specified
*
* @param length the length for whitespace
*/
private static void whitespace(StringBuilder text, int length) {
IntStream.range(0, length).forEach(i -> text.append(" "));
}
/**
* Prints the specified DataFrame to the stream bound to this printer
*
* @param frame the DataFrame to print
*/
public void print(Relation frame) {
try {
final String[] headers = getHeaderTokens(frame);
final String[][] data = getDataTokens(frame);
final int[] widths = getWidths(headers, data);
final String dataTemplate = getDataTemplate(widths);
final String headerTemplate = getHeaderTemplate(widths, headers);
final int totalWidth = IntStream.of(widths).map(w -> w + 5).sum() - 1;
final int totalHeight = data.length + 1;
int capacity = totalWidth * totalHeight;
if (capacity < 0) {
capacity = 0;
}
final StringBuilder text = new StringBuilder(capacity);
if (frame.name() != null) {
text.append(tableName(frame, totalWidth)).append(System.lineSeparator());
}
final String headerLine = String.format(headerTemplate, (Object[]) headers);
text.append(headerLine).append(System.lineSeparator());
for (int j = 0; j < totalWidth; j++) {
text.append("-");
}
for (String[] row : data) {
final String dataLine = String.format(dataTemplate, (Object[]) row);
text.append(System.lineSeparator());
text.append(dataLine);
}
final byte[] bytes = text.toString().getBytes();
this.stream.write(bytes);
this.stream.flush();
} catch (IOException ex) {
throw new IllegalStateException("Failed to print DataFrame", ex);
}
}
private String tableName(Relation frame, int width) {
if (frame.name().length() > width) {
return frame.name();
}
int diff = width - frame.name().length();
String result = StringUtils.repeat(" ", diff / 2) + frame.name();
return result + StringUtils.repeat(" ", width - result.length());
}
/**
* Returns the header string tokens for the frame
*
* @param frame the frame to create header tokens
* @return the header tokens
*/
private String[] getHeaderTokens(Relation frame) {
final int colCount = frame.columnCount();
final String[] header = new String[colCount];
IntStream.range(0, colCount)
.forEach(
colIndex -> {
header[colIndex] = frame.column(colIndex).name();
});
return header;
}
private String getDataToken(Column> col, int i) {
return col.size() > i ? col.getString(i) : TOO_SHORT_COLUMN_MARKER;
}
/**
* Returns the 2-D array of data tokens from the frame specified
*
* @param frame the DataFrame from which to create 2D array of formatted tokens
* @return the array of data tokens
*/
private String[][] getDataTokens(Relation frame) {
if (frame.rowCount() == 0) return new String[0][0];
final int rowCount = Math.min(maxRows, frame.rowCount());
final boolean truncated = frame.rowCount() > maxRows;
final int colCount = frame.columnCount();
final String[][] data;
if (truncated) {
data = new String[rowCount + 1][colCount];
int i;
for (i = 0; i < Math.ceil((double) rowCount / 2); i++) {
for (int j = 0; j < colCount; j++) {
Column> col = frame.column(j);
data[i][j] = getDataToken(col, i);
}
}
for (int j = 0; j < colCount; j++) {
data[i][j] = "...";
}
for (++i; i <= rowCount; i++) {
for (int j = 0; j < colCount; j++) {
Column> col = frame.column(j);
data[i][j] = getDataToken(col, frame.rowCount() - maxRows + i - 1);
}
}
} else {
data = new String[rowCount][colCount];
for (int i = 0; i < rowCount; i++) {
for (int j = 0; j < colCount; j++) {
Column> col = frame.column(j);
String value = getDataToken(col, i);
data[i][j] = value == null ? "" : value;
}
}
}
return data;
}
}