org.biojava.nbio.survival.data.WorkSheet Maven / Gradle / Ivy
The newest version!
/*
* BioJava development code
*
* This code may be freely distributed and modified under the
* terms of the GNU Lesser General Public Licence. This should
* be distributed with the code. If you do not have a copy,
* see:
*
* http://www.gnu.org/copyleft/lesser.html
*
* Copyright for this code is held jointly by the individual
* authors. These should be listed in @author doc comments.
*
* For more information on the BioJava project and its aims,
* or to join the biojava-l mailing list, visit the home page
* at:
*
* http://www.biojava.org/
*
*/
package org.biojava.nbio.survival.data;
import java.io.*;
import java.util.*;
/**
* Need to handle very large spreadsheets of expression data so keep memory
* footprint low
*
* @author Scooter Willis
*/
public class WorkSheet {
private LinkedHashMap columnLookup = new LinkedHashMap<>();
private LinkedHashMap rowLookup = new LinkedHashMap<>();
private CompactCharSequence[][] data = new CompactCharSequence[1][1];
HashMap dataGrid = new HashMap<>();
private String indexColumnName = "";
/**
*
*/
public WorkSheet() {
}
/**
*
* @param rows
* @param columns
* @throws Exception
*/
public WorkSheet(Collection rows, Collection columns) throws Exception {
// rowsList = new ArrayList(rows);
int i = 1;
for (String row : rows) {
if (rowLookup.containsKey(row)) {
throw new Exception("Duplicate row " + row);
}
rowLookup.put(row, new HeaderInfo(i));
i++;
}
i = 1;
for (String col : columns) {
if (columnLookup.containsKey(col)) {
throw new Exception("Duplicate row " + col);
}
columnLookup.put(col, new HeaderInfo(i));
i++;
}
// columnsList.trimToSize();
// rowsList.trimToSize();
data = new CompactCharSequence[rowLookup.size() + 1][columnLookup.size() + 1];
}
/**
*
* @param values
*/
public WorkSheet(String[][] values) {
// System.out.println("In worksheet init " + Runtime.getRuntime().totalMemory());
String[] columns = new String[values[0].length];
for (int i = 0; i < columns.length; i++) {
columns[i] = new String(values[0][i].getBytes());
}
for (int i = 1; i < columns.length; i++) {
columnLookup.put(columns[i], new HeaderInfo(i));
}
for (int i = 1; i < values.length; i++) {
String row = new String(values[i][0].getBytes());
rowLookup.put(row, new HeaderInfo(i));
}
data = new CompactCharSequence[values.length][values[0].length];
for (int row = 0; row < values.length; row++) {
for (int col = 0; col < values[0].length; col++) {
String value = values[row][col];
data[row][col] = new CompactCharSequence(value);
values[row][col] = null;
}
System.out.println("Row " + row + " " + Runtime.getRuntime().totalMemory());
}
values = null;
System.gc();
//data = values;
}
/**
* See if we can free up memory
*/
public void clear() {
columnLookup.clear();
rowLookup.clear();
data = null;
dataGrid.clear();
doubleValues.clear();
System.gc();
}
@Override
public String toString() {
return super.toString(); //To change body of generated methods, choose Tools | Templates.
}
/**
* Split a worksheet randomly. Used for creating a discovery/validation data
* set The first file name will matched the percentage and the second file
* the remainder
*
* @param percentage
* @param fileName1
* @param fileName2
* @throws Exception
*/
public void randomlyDivideSave(double percentage, String fileName1, String fileName2) throws Exception {
ArrayList rows = this.getDataRows();
Collections.shuffle(rows);
int portion = (int) (rows.size() * percentage);
for (int i = 0; i < portion; i++) {
this.hideRow(rows.get(i), true);
}
this.saveTXT(fileName2);
for (int i = 0; i < portion; i++) {
this.hideRow(rows.get(i), false);
}
for (int i = portion; i < rows.size(); i++) {
this.hideRow(rows.get(i), true);
}
this.saveTXT(fileName1);
for (int i = portion; i < rows.size(); i++) {
this.hideRow(rows.get(i), false);
}
}
/**
* Create a copy of a worksheet. If shuffling of columns or row for testing
* a way to duplicate original worksheet
*
* @param copyWorkSheet
* @param rows
* @return
* @throws Exception
*/
static public WorkSheet getCopyWorkSheetSelectedRows(WorkSheet copyWorkSheet, ArrayList rows) throws Exception {
ArrayList columns = copyWorkSheet.getColumns();
WorkSheet workSheet = new WorkSheet(rows, columns);
for (String row : rows) {
for (String col : columns) {
workSheet.addCell(row, col, copyWorkSheet.getCell(row, col));
}
}
workSheet.setMetaDataColumns(copyWorkSheet.getMetaDataColumns());
workSheet.setMetaDataRows(copyWorkSheet.getMetaDataRows());
return workSheet;
}
/**
* Create a copy of a worksheet. If shuffling of columns or row for testing
* a way to duplicate original worksheet
*
* @param copyWorkSheet
* @return
* @throws Exception
*/
static public WorkSheet getCopyWorkSheet(WorkSheet copyWorkSheet) throws Exception {
ArrayList rows = copyWorkSheet.getRows();
ArrayList columns = copyWorkSheet.getColumns();
WorkSheet workSheet = new WorkSheet(rows, columns);
for (String row : rows) {
for (String col : columns) {
workSheet.addCell(row, col, copyWorkSheet.getCell(row, col));
}
}
workSheet.setMetaDataColumns(copyWorkSheet.getMetaDataColumns());
workSheet.setMetaDataRows(copyWorkSheet.getMetaDataRows());
return workSheet;
}
/**
*
* @param values
*/
public WorkSheet(CompactCharSequence[][] values) {
// System.out.println("In worksheet init " + Runtime.getRuntime().totalMemory());
String[] columns = new String[values[0].length];
for (int i = 0; i < columns.length; i++) {
columns[i] = values[0][i].toString();
}
this.setIndexColumnName(columns[0]);
for (int i = 1; i < columns.length; i++) {
columnLookup.put(columns[i], new HeaderInfo(i));
}
for (int i = 1; i < values.length; i++) {
String row = values[i][0].toString();
rowLookup.put(row, new HeaderInfo(i));
}
data = values;
}
private LinkedHashMap metaDataColumnsHashMap = new LinkedHashMap<>();
/**
*
* @return
*/
public ArrayList getMetaDataColumns() {
ArrayList metaColumns = new ArrayList<>();
for (String key : metaDataColumnsHashMap.keySet()) {
HeaderInfo hi = columnLookup.get(key);
if (!hi.isHide()) {
metaColumns.add(key);
}
}
return metaColumns;
}
/**
*
* @return
*/
public ArrayList getMetaDataRows() {
ArrayList metaRows = new ArrayList<>();
for (String key : metaDataRowsHashMap.keySet()) {
HeaderInfo hi = rowLookup.get(key);
if (!hi.isHide()) {
metaRows.add(key);
}
}
return metaRows;
}
/**
*
* @return
*/
public ArrayList getDataColumns() {
ArrayList dataColumns = new ArrayList<>();
ArrayList columns = this.getColumns();
for (String column : columns) {
if (!metaDataColumnsHashMap.containsKey(column)) {
dataColumns.add(column);
}
}
return dataColumns;
}
/**
* Randomly shuffle the columns and rows. Should be constrained to the same
* data type if not probably doesn't make any sense.
*
* @param columns
* @param rows
* @throws Exception
*/
public void shuffleColumnsAndThenRows(ArrayList columns, ArrayList rows) throws Exception {
doubleValues.clear();
for (String column : columns) { //shuffle all values in the column
ArrayList rowIndex = new ArrayList<>();
for (int i = 0; i < rows.size(); i++) {
rowIndex.add(i);
}
Collections.shuffle(rowIndex);
for (int i = 0; i < rows.size(); i++) {
String row = rows.get(i);
int randomIndex = rowIndex.get(i);
String destinationRow = rows.get(randomIndex);
String temp = this.getCell(destinationRow, column);
String value = this.getCell(row, column);
this.addCell(destinationRow, column, value);
this.addCell(row, column, temp);
}
}
for (String row : rows) {
ArrayList columnIndex = new ArrayList<>();
for (int i = 0; i < columns.size(); i++) {
columnIndex.add(i);
}
Collections.shuffle(columnIndex);
for (int i = 0; i < columns.size(); i++) {
String column = columns.get(i);
int randomIndex = columnIndex.get(i);
String destinationCol = columns.get(randomIndex);
String temp = this.getCell(row, destinationCol);
String value = this.getCell(row, column);
this.addCell(row, destinationCol, value);
this.addCell(row, column, temp);
}
}
}
/**
* Need to shuffle column values to allow for randomized testing. The
* columns in the list will be shuffled together
*
* @param columns
* @throws Exception
*/
public void shuffleColumnValues(ArrayList columns) throws Exception {
doubleValues.clear();
ArrayList rows = this.getDataRows();
for (String column : columns) { //shuffle all values in the column
ArrayList rowIndex = new ArrayList<>();
for (int i = 0; i < rows.size(); i++) {
rowIndex.add(i);
}
Collections.shuffle(rowIndex);
for (int i = 0; i < rows.size(); i++) {
String row = rows.get(i);
int randomIndex = rowIndex.get(i);
String destinationRow = rows.get(randomIndex);
String temp = this.getCell(destinationRow, column);
String value = this.getCell(row, column);
this.addCell(destinationRow, column, value);
this.addCell(row, column, temp);
}
}
}
/**
* Need to shuffle rows values to allow for randomized testing. The rows in
* the list will be shuffled together
*
* @param rows
* @throws Exception
*/
public void shuffleRowValues(ArrayList rows) throws Exception {
doubleValues.clear();
ArrayList columns = this.getColumns();
for (String row : rows) {
ArrayList columnIndex = new ArrayList<>();
for (int i = 0; i < columns.size(); i++) {
columnIndex.add(i);
}
Collections.shuffle(columnIndex);
for (int i = 0; i < columns.size(); i++) {
String column = columns.get(i);
int randomIndex = columnIndex.get(i);
String destinationCol = columns.get(randomIndex);
String temp = this.getCell(row, destinationCol);
String value = this.getCell(row, column);
this.addCell(row, destinationCol, value);
this.addCell(row, column, temp);
}
}
}
/**
*
* @param value
*/
public void hideMetaDataColumns(boolean value) {
ArrayList metadataColumns = this.getMetaDataColumns();
for (String column : metadataColumns) {
this.hideColumn(column, value);
}
}
/**
*
* @param value
*/
public void hideMetaDataRows(boolean value) {
ArrayList metadataRows = this.getMetaDataRows();
for (String row : metadataRows) {
this.hideRow(row, value);
}
}
/**
*
*/
public void setMetaDataRowsAfterRow() {
this.setMetaDataRowsAfterRow("META_DATA");
}
/**
*
*/
public void setMetaDataColumnsAfterColumn() {
this.setMetaDataColumnsAfterColumn("META_DATA");
}
/**
*
* @param row
*/
public void setMetaDataRowsAfterRow(String row) {
ArrayList rows = this.getRows();
boolean metarow = false;
for (String r : rows) {
if (r.equals(row) && !metarow) {
metarow = true;
}
if (metarow) {
this.markMetaDataRow(r);
}
}
}
/**
*
* @param column
*/
public void setMetaDataColumnsAfterColumn(String column) {
ArrayList cols = this.getColumns();
boolean metacolumns = false;
for (String col : cols) {
if (col.equals(column) && !metacolumns) {
metacolumns = true;
}
if (metacolumns) {
this.markMetaDataColumn(col);
}
}
}
/**
* Clears existing meta data columns and sets new ones
*
* @param metaDataColumns
*/
public void setMetaDataColumns(ArrayList metaDataColumns) {
metaDataColumnsHashMap.clear();
markMetaDataColumns(metaDataColumns);
}
/**
* marks columns as containing meta data
*
* @param metaDataColumns
*/
public void markMetaDataColumns(ArrayList metaDataColumns) {
for (String column : metaDataColumns) {
metaDataColumnsHashMap.put(column, column);
}
}
/**
*
* @param column
*/
public void markMetaDataColumn(String column) {
metaDataColumnsHashMap.put(column, column);
}
/**
*
* @param column
* @return
*/
public boolean isMetaDataColumn(String column) {
if (metaDataColumnsHashMap.get(column) == null) {
return false;
} else {
return true;
}
}
/**
*
* @param row
* @return
*/
public boolean isMetaDataRow(String row) {
if (metaDataRowsHashMap.get(row) == null) {
return false;
} else {
return true;
}
}
private LinkedHashMap metaDataRowsHashMap = new LinkedHashMap<>();
/**
*
* @param row
*/
public void markMetaDataRow(String row) {
metaDataRowsHashMap.put(row, row);
}
/**
*
* @param metaDataRows
*/
public void setMetaDataRows(ArrayList metaDataRows) {
metaDataRowsHashMap.clear();
for (String row : metaDataRows) {
metaDataRowsHashMap.put(row, row);
}
}
/**
*
* @throws Exception
*/
public void hideEmptyRows() throws Exception {
ArrayList rows = this.getDataRows();
ArrayList columns = this.getDataColumns();
for (String row : rows) {
boolean emptyRow = true;
for (String column : columns) {
String value = this.getCell(row, column).trim();
if (value.length() > 0) {
emptyRow = false;
break;
}
}
if (emptyRow) {
hideRow(row, true);
}
}
}
/**
*
* @throws Exception
*/
public void hideEmptyColumns() throws Exception {
ArrayList rows = this.getDataRows();
ArrayList columns = this.getDataColumns();
for (String column : columns) {
boolean emptyColumn = true;
for (String row : rows) {
String value = this.getCell(row, column).trim();
if (value.length() > 0) {
emptyColumn = false;
break;
}
}
if (emptyColumn) {
hideColumn(column, true);
}
}
}
/**
*
* @param row
* @param hide
*/
public void hideRow(String row, boolean hide) {
HeaderInfo rowInfo = rowLookup.get(row);
rowInfo.setHide(hide);
}
/**
*
* @param column
* @param hide
*/
public void hideColumn(String column, boolean hide) {
HeaderInfo colInfo = columnLookup.get(column);
colInfo.setHide(hide);
}
/**
* Change values in a column where 0 = something and 1 = something different
*
* @param column
* @param values
* @throws Exception
*/
public void replaceColumnValues(String column, HashMap values) throws Exception {
for (String row : rowLookup.keySet()) {
String oldValue = this.getCell(row, column);
String newValue = values.get(oldValue);
this.addCell(row, column, newValue);
}
}
/**
* Apply filter to a column to change values from say numberic to nominal
* based on some range
*
* @param column
* @param changeValue
* @throws Exception
*/
public void applyColumnFilter(String column, ChangeValue changeValue) throws Exception {
for (String row : rowLookup.keySet()) {
String oldValue = this.getCell(row, column);
String newValue = changeValue.change(oldValue);
this.addCell(row, column, newValue);
}
}
/**
*
* @param column
* @param defaultValue
*/
public void addColumn(String column, String defaultValue) {
ArrayList columns = new ArrayList<>();
columns.add(column);
addColumns(columns, defaultValue);
}
/**
* Add columns to worksheet and set default value
*
* @param columns
* @param defaultValue
*/
public void addColumns(ArrayList columns, String defaultValue) {
CompactCharSequence dv = new CompactCharSequence(defaultValue);
for (int i = 0; i < data.length; i++) {
CompactCharSequence[] row = data[i];
int oldrowlength = data[i].length;
data[i] = (CompactCharSequence[]) resizeArray(row, oldrowlength + columns.size());
for (int c = 0; c < columns.size(); c++) {
data[i][oldrowlength + c] = dv;
}
if (i == 0) {
for (int c = 0; c < columns.size(); c++) {
String column = columns.get(c);
data[0][oldrowlength + c] = new CompactCharSequence(column);
columnLookup.put(column, new HeaderInfo(oldrowlength + c));
}
}
}
// columnLookup.get("ZNF30");
// int startIndex = columnLookup.size() + 1;
// for (String column : columns) {
// if(column.equals("ttr")){
// int dummy = 1;
// }
// columnLookup.put(column, new HeaderInfo(startIndex));
// startIndex++;
// }
}
/**
*
* @param row
* @param defaultValue
*/
public void addRow(String row, String defaultValue) {
ArrayList rows = new ArrayList<>();
rows.add(row);
addRows(rows, defaultValue);
}
/**
* Add rows to the worksheet and fill in default value
*
* @param rows
* @param defaultValue
*/
public void addRows(ArrayList rows, String defaultValue) {
CompactCharSequence dv = new CompactCharSequence(defaultValue);
int oldlength = data.length;
int numColumns = 0;
if (data.length > 0 && data[0] != null) {
numColumns = data[0].length;
}
data = (CompactCharSequence[][]) resizeArray(data, data.length + rows.size());
for (int r = 0; r < rows.size(); r++) {
data[oldlength + r] = new CompactCharSequence[numColumns];
for (int c = 0; c < numColumns; c++) {
data[oldlength + r][c] = dv;
}
data[oldlength + r][0] = new CompactCharSequence(rows.get(r));
rowLookup.put(rows.get(r), new HeaderInfo(r + oldlength));
}
}
/**
* Reallocates an array with a new size, and copies the contents of the old
* array to the new array.
*
* @param oldArray the old array, to be reallocated.
* @param newSize the new array size.
* @return A new array with the same contents.
*/
private static Object resizeArray(Object oldArray, int newSize) {
int oldSize = java.lang.reflect.Array.getLength(oldArray);
Class> elementType = oldArray.getClass().getComponentType();
Object newArray = java.lang.reflect.Array.newInstance(
elementType, newSize);
int preserveLength = Math.min(oldSize, newSize);
if (preserveLength > 0) {
System.arraycopy(oldArray, 0, newArray, 0, preserveLength);
}
return newArray;
}
/**
* Add data to a cell
*
* @param row
* @param col
* @param value
* @throws Exception
*/
public void addCell(String row, String col, String value) throws Exception {
HeaderInfo rowIndex = rowLookup.get(row);
HeaderInfo colIndex = columnLookup.get(col);
if (rowIndex == null) {
throw new Exception("Row " + row + " not found in worksheet");
}
if (colIndex == null) {
throw new Exception("Column " + col + " not found in worksheet");
}
data[rowIndex.getIndex()][colIndex.getIndex()] = new CompactCharSequence(value);
}
/**
*
* @param row
* @return
*/
public boolean isValidRow(String row) {
HeaderInfo rowIndex = rowLookup.get(row);
if (rowIndex == null) {
for (String rowtable : rowLookup.keySet()) {
if (row.equalsIgnoreCase(rowtable)) {
return true;
}
}
return false;
} else {
return true;
}
}
/**
*
* @param col
* @return
*/
public boolean isValidColumn(String col) {
HeaderInfo colIndex = columnLookup.get(col);
if (colIndex == null) {
for (String coltable : columnLookup.keySet()) {
if (col.equalsIgnoreCase(coltable)) {
return true;
}
}
return false;
} else {
return true;
}
}
//When we do gene signatures we ask for the same data value often. This method took up 50% of the time.
HashMap doubleValues = new HashMap<>();
boolean cacheDoubleValues = false;
/**
*
* @param value
*/
public void setCacheDoubleValues(boolean value) {
cacheDoubleValues = value;
}
/**
*
* @param row
* @param col
* @return
* @throws Exception
*/
public Double getCellDouble(String row, String col) throws Exception {
if (cacheDoubleValues) {
String key = row + ":" + col;
Double v = doubleValues.get(key);
if (v != null) {
return v;
}
String value = getCell(row, col);
try {
v = Double.parseDouble(value);
} catch (Exception e) {
}
doubleValues.put(key, v);
return v;
} else {
Double v = null;
String value = getCell(row, col);
try {
v = Double.parseDouble(value);
} catch (Exception e) {
}
return v;
}
}
/**
* Get cell value
*
* @param row
* @param col
* @return
* @throws Exception
*/
public String getCell(String row, String col) throws Exception {
if (col.equals(this.getIndexColumnName())) {
return row;
}
HeaderInfo rowIndex = rowLookup.get(row);
HeaderInfo colIndex = columnLookup.get(col);
if (rowIndex == null) {
//allow for case insentive search
for (String rowtable : rowLookup.keySet()) {
if (row.equalsIgnoreCase(rowtable)) {
rowIndex = rowLookup.get(rowtable);
break;
}
}
if (rowIndex == null) {
throw new Exception("Row " + row + " not found in worksheet");
}
}
if (colIndex == null) {
//allow for case insentive search
for (String coltable : columnLookup.keySet()) {
if (col.equalsIgnoreCase(coltable)) {
colIndex = columnLookup.get(coltable);
break;
}
}
if (colIndex == null) {
throw new Exception("Column " + col + " not found in worksheet");
}
}
CompactCharSequence ccs = data[rowIndex.getIndex()][colIndex.getIndex()];
if (ccs != null) {
return ccs.toString();
} else {
return "";
}
// return .toString();
}
/**
*
* @param changeValue
*/
public void changeRowHeader(ChangeValue changeValue) {
ArrayList rows = new ArrayList<>(rowLookup.keySet());
for (String row : rows) {
String newRow = changeValue.change(row);
HeaderInfo value = rowLookup.get(row);
rowLookup.remove(row);
rowLookup.put(newRow, value);
}
}
/**
*
* @param changeValue
*/
public void changeColumnHeader(ChangeValue changeValue) {
ArrayList columns = new ArrayList<>(columnLookup.keySet());
for (String col : columns) {
String newCol = changeValue.change(col);
HeaderInfo value = columnLookup.get(col);
columnLookup.remove(col);
columnLookup.put(newCol, value);
}
}
/**
*
* @param row
* @param newRow
* @throws Exception
*/
public void changeRowHeader(String row, String newRow) throws Exception {
HeaderInfo value = rowLookup.get(row);
if (value == null) {
throw new Exception("Row not found " + row);
}
rowLookup.remove(row);
rowLookup.put(newRow, value);
if (this.isMetaDataRow(row)) {
metaDataRowsHashMap.remove(row);
metaDataRowsHashMap.put(newRow, newRow);
}
}
/**
* Change the columns in the HashMap Key to the name of the value
*
* @param newColumnValues
* @throws Exception
*/
public void changeColumnsHeaders(LinkedHashMap newColumnValues) throws Exception {
for (String oldColumn : newColumnValues.keySet()) {
String newColumn = newColumnValues.get(oldColumn);
changeColumnHeader(oldColumn, newColumn);
}
}
/**
*
* @param col
* @param newCol
* @throws Exception
*/
public void changeColumnHeader(String col, String newCol) throws Exception {
HeaderInfo value = columnLookup.get(col);
if (value == null) {
throw new Exception("Column not found " + col);
}
columnLookup.remove(col);
columnLookup.put(newCol, value);
if (this.isMetaDataColumn(col)) {
metaDataColumnsHashMap.remove(col);
metaDataColumnsHashMap.put(newCol, newCol);
}
}
/**
*
* @param column
* @return
* @throws Exception
*/
public Integer getColumnIndex(String column) throws Exception {
HeaderInfo headerInfo = columnLookup.get(column);
if (headerInfo == null) {
throw new Exception("Column " + column + " not found");
}
return headerInfo.getIndex();
}
/**
*
* @param row
* @return
* @throws Exception
*/
public Integer getRowIndex(String row) throws Exception {
HeaderInfo headerInfo = rowLookup.get(row);
if (headerInfo == null) {
throw new Exception("Row " + row + " not found");
}
return headerInfo.getIndex();
}
/**
*
* @param number
* @return
*/
public ArrayList getRandomDataColumns(int number) {
ArrayList columns = getDataColumns();
return getRandomDataColumns(number, columns);
}
/**
*
* @param number
* @param columns
* @return
*/
public ArrayList getRandomDataColumns(int number, ArrayList columns) {
ArrayList randomColumns = new ArrayList<>();
HashMap picked = new HashMap<>();
while (picked.size() < number) {
double v = Math.random();
int index = (int) (v * columns.size());
if (picked.containsKey(String.valueOf(index))) {
continue;
}
picked.put(String.valueOf(index), String.valueOf(index));
randomColumns.add(columns.get(index));
}
return randomColumns;
}
/**
* Get the list of column names including those that may be hidden
*
* @return
*/
public ArrayList getAllColumns() {
ArrayList columns = new ArrayList<>();
for (String col : columnLookup.keySet()) {
columns.add(col);
}
return columns;
}
/**
* Get the list of column names. Does not include hidden columns
*
* @return
*/
public ArrayList getColumns() {
ArrayList columns = new ArrayList<>();
for (String col : columnLookup.keySet()) {
HeaderInfo hi = columnLookup.get(col);
if (!hi.isHide()) {
columns.add(col);
}
}
return columns;
}
/**
* Get back a list of unique values in the column
*
* @param column
* @return
* @throws Exception
*/
public ArrayList getDiscreteColumnValues(String column) throws Exception {
HashMap hashMapValues = new HashMap<>();
ArrayList values = new ArrayList<>();
ArrayList rows = getDataRows();
for (String row : rows) {
String value = getCell(row, column);
if (!hashMapValues.containsKey(value)) {
hashMapValues.put(value, value);
values.add(value);
}
}
return values;
}
/**
* Get back a list of unique values in the row
*
* @param row
* @return
* @throws Exception
*/
public ArrayList getDiscreteRowValues(String row) throws Exception {
HashMap hashMapValues = new HashMap<>();
ArrayList values = new ArrayList<>();
for (String column : getColumns()) {
String value = getCell(row, column);
if (!hashMapValues.containsKey(value)) {
hashMapValues.put(value, value);
values.add(value);
}
}
return values;
}
/**
* Get all rows including those that may be hidden
*
* @return
*/
public ArrayList getAllRows() {
ArrayList rows = new ArrayList<>();
for (String row : rowLookup.keySet()) {
rows.add(row);
}
return rows;
}
/**
* Get the list of row names. Will exclude hidden values
*
* @return
*/
public ArrayList getRows() {
ArrayList rows = new ArrayList<>();
for (String row : rowLookup.keySet()) {
HeaderInfo hi = rowLookup.get(row);
if (!hi.isHide()) {
rows.add(row);
}
}
return rows;
}
/**
* Get the list of row names
*
* @return
*/
public ArrayList getDataRows() {
ArrayList rows = new ArrayList<>();
for (String row : rowLookup.keySet()) {
if (this.isMetaDataRow(row)) {
continue;
}
HeaderInfo hi = rowLookup.get(row);
if (!hi.isHide()) {
rows.add(row);
}
}
return rows;
}
/**
* Get the log scale of this worksheet where a zero value will be set to .1
* as Log(0) is undefined
*
* @param base
* @return
* @throws Exception
*/
public WorkSheet getLogScale(double base) throws Exception {
return getLogScale(base, .1);
}
/**
* Get the log scale of this worksheet
*
* @param base
* @return
* @throws Exception
*/
public WorkSheet getLogScale(double base, double zeroValue) throws Exception {
WorkSheet workSheet = new WorkSheet(getRows(), getColumns());
workSheet.setIndexColumnName(this.getIndexColumnName());
ArrayList rows = getRows();
ArrayList columns = getColumns();
for (String row : rows) {
for (String col : columns) {
if (this.isMetaDataColumn(col) || this.isMetaDataRow(row)) {
String value = getCell(row, col);
workSheet.addCell(row, col, value);
} else {
String value = getCell(row, col);
try {
Double d = Double.parseDouble(value);
if (d == 0.0) {
d = zeroValue;
} else {
d = Math.log(d) / Math.log(base);
}
workSheet.addCell(row, col, d + "");
} catch (Exception e) {
workSheet.addCell(row, col, value);
}
}
}
}
ArrayList metadataRows = this.getMetaDataRows();
ArrayList metadataColumns = this.getMetaDataColumns();
workSheet.setMetaDataColumns(metadataColumns);
workSheet.setMetaDataRows(metadataRows);
return workSheet;
}
/**
* Swap the row and columns returning a new worksheet
*
* @return
* @throws Exception
*/
public WorkSheet swapRowAndColumns() throws Exception {
WorkSheet swappedWorkSheet = new WorkSheet(getColumns(), getRows());
for (String row : getRows()) {
for (String col : getColumns()) {
String value = getCell(row, col);
swappedWorkSheet.addCell(col, row, value);
}
}
ArrayList metadataRows = this.getMetaDataRows();
ArrayList metadataColumns = this.getMetaDataColumns();
swappedWorkSheet.setMetaDataColumns(metadataRows);
swappedWorkSheet.setMetaDataRows(metadataColumns);
return swappedWorkSheet;
}
static CompactCharSequence[][] getAllValuesCompactCharSequence(File fileName, char delimiter) throws Exception {
FileInputStream fi = new FileInputStream(fileName);
return getAllValuesCompactCharSequence(fi, delimiter);
}
/**
* All support for loading from a jar file
*
* @param is
* @param delimiter
* @return
* @throws Exception
*/
static CompactCharSequence[][] getAllValuesCompactCharSequence(InputStream is, char delimiter) throws Exception {
// FileReader reader = new FileReader(fileName);
BufferedReader br = new BufferedReader(new InputStreamReader(is));
ArrayList rows = new ArrayList<>();
String line = br.readLine();
int numcolumns = -1;
while (line != null) {
String[] d = line.split(String.valueOf(delimiter));
if (numcolumns == -1) {
numcolumns = d.length;
}
CompactCharSequence[] ccs = new CompactCharSequence[d.length];
for (int i = 0; i < d.length; i++) {
ccs[i] = new CompactCharSequence(d[i]);
}
rows.add(ccs);
line = br.readLine();
}
br.close();
// reader.close();
CompactCharSequence[][] data = new CompactCharSequence[rows.size()][numcolumns];
for (int i = 0; i < rows.size(); i++) {
CompactCharSequence[] row = rows.get(i);
for (int j = 0; j < row.length; j++) { //
if (row[j].length() > 1 && row[j].charAt(0) == '"') {
// System.out.println(row[j]);
if (row[j].length() > 2) {
row[j] = new CompactCharSequence(row[j].subSequence(1, row[j].length() - 1).toString());
} else {
row[j] = new CompactCharSequence("");
}
}
if (j < row.length && j < data[0].length) {
data[i][j] = row[j];
}
}
}
return data;
}
static String[][] getAllValues(String fileName, char delimiter) throws Exception {
FileReader reader = new FileReader(fileName);
BufferedReader br = new BufferedReader(reader);
ArrayList rows = new ArrayList<>();
String line = br.readLine();
int numcolumns = -1;
while (line != null) {
String[] d = line.split(String.valueOf(delimiter));
if (numcolumns == -1) {
numcolumns = d.length;
}
rows.add(d);
line = br.readLine();
}
br.close();
reader.close();
String[][] data = new String[rows.size()][numcolumns];
for (int i = 0; i < rows.size(); i++) {
String[] row = rows.get(i);
for (int j = 0; j < row.length; j++) {
if (row[j].startsWith("\"") && row[j].endsWith("\"")) {
// System.out.println(row[j]);
row[j] = row[j].substring(1, row[j].length() - 1);
}
data[i][j] = row[j];
}
}
return data;
}
/**
* Combine two work sheets where you join based on rows. Rows that are found
* in one but not the other are removed. If the second sheet is meta data
* then a meta data column will be added between the two joined columns
*
* @param w1FileName
* @param w2FileName
* @param delimitter
* @param secondSheetMetaData
* @return
* @throws Exception
*/
static public WorkSheet unionWorkSheetsRowJoin(String w1FileName, String w2FileName, char delimitter, boolean secondSheetMetaData) throws Exception {
WorkSheet w1 = WorkSheet.readCSV(w1FileName, delimitter);
WorkSheet w2 = WorkSheet.readCSV(w2FileName, delimitter);
return unionWorkSheetsRowJoin(w1, w2, secondSheetMetaData);
}
/**
* * Combine two work sheets where you join based on rows. Rows that are
* found in one but not the other are removed. If the second sheet is meta
* data then a meta data column will be added between the two joined columns
*
* @param w1
* @param w2
* @param secondSheetMetaData
* @return
* @throws Exception
*/
static public WorkSheet unionWorkSheetsRowJoin(WorkSheet w1, WorkSheet w2, boolean secondSheetMetaData) throws Exception {
ArrayList w1Columns = w1.getColumns();
ArrayList w2Columns = w2.getColumns();
ArrayList w1DataColumns = w1.getDataColumns();
ArrayList w2DataColumns = w2.getDataColumns();
ArrayList w1MetaDataColumns = w1.getMetaDataColumns();
ArrayList w2MetaDataColumns = w2.getMetaDataColumns();
if (secondSheetMetaData) {
if (!w1.getColumns().contains("META_DATA")) {
w1DataColumns.add("META_DATA");
}
}
ArrayList joinedColumns = new ArrayList<>();
joinedColumns.addAll(w1DataColumns);
joinedColumns.addAll(w2DataColumns);
if (!joinedColumns.contains("META_DATA") && (w1MetaDataColumns.size() > 0 || w2MetaDataColumns.size() > 0)) {
joinedColumns.add("META_DATA");
}
for (String column : w1MetaDataColumns) {
if (!joinedColumns.contains(column)) {
joinedColumns.add(column);
}
}
for (String column : w2MetaDataColumns) {
if (!joinedColumns.contains(column)) {
joinedColumns.add(column);
}
}
ArrayList w1Rows = w1.getRows();
ArrayList w2Rows = w2.getRows();
ArrayList rows = new ArrayList<>();
HashSet w1Key = new HashSet<>(w1Rows);
for (String key : w2Rows) {
if (w1Key.contains(key)) {
rows.add(key);
}
}
WorkSheet worksheet = new WorkSheet(rows, joinedColumns);
for (String row : rows) {
for (String column : w1Columns) {
if ("META_DATA".equals(column)) {
continue;
}
String value = w1.getCell(row, column);
worksheet.addCell(row, column, value);
}
}
for (String row : rows) {
for (String column : w2Columns) {
if ("META_DATA".equals(column)) {
continue;
}
String value = w2.getCell(row, column);
worksheet.addCell(row, column, value);
}
}
worksheet.setMetaDataColumnsAfterColumn();
worksheet.setMetaDataRowsAfterRow();
return worksheet;
}
/**
* Read a CSV/Tab delimitted file where you pass in the delimiter
*
* @param fileName
* @param delimiter
* @return
* @throws Exception
*/
static public WorkSheet readCSV(String fileName, char delimiter) throws Exception {
return readCSV(new File(fileName), delimiter);
}
static public WorkSheet readCSV(File f, char delimiter) throws Exception {
return readCSV(new FileInputStream(f), delimiter);
}
/**
* Read a CSV/Tab delimited file where you pass in the delimiter
*
* @param f
* @param delimiter
* @return
* @throws Exception
*/
static public WorkSheet readCSV(InputStream is, char delimiter) throws Exception {
CompactCharSequence[][] data = getAllValuesCompactCharSequence(is, delimiter);
WorkSheet workSheet = new WorkSheet(data);
workSheet.setMetaDataColumnsAfterColumn();
workSheet.setMetaDataRowsAfterRow();
return workSheet;
}
/**
* Save the worksheet as a csv file
*
* @param fileName
* @throws Exception
*/
public void saveCSV(String fileName) throws Exception {
File f = new File(fileName);
File parentFile = f.getParentFile();
if (!parentFile.isDirectory()) {
parentFile.mkdirs();
}
FileOutputStream file = new FileOutputStream(fileName);
BufferedOutputStream bs = new BufferedOutputStream(file);
save(bs, ',', false);
bs.close();
file.close();
}
/**
*
* @param fileName
* @throws Exception
*/
public void saveTXT(String fileName) throws Exception {
File f = new File(fileName);
File parentFile = f.getParentFile();
if (!parentFile.isDirectory()) {
parentFile.mkdirs();
}
FileOutputStream file = new FileOutputStream(fileName);
BufferedOutputStream bs = new BufferedOutputStream(file);
save(bs, '\t', false);
bs.close();
file.close();
}
private String rowHeader = "REF";
/**
*
* @param value
*/
public void setRowHeader(String value) {
rowHeader = value;
}
/**
* Add columns from a second worksheet to be joined by common row. If the
* appended worksheet doesn't contain a row in the master worksheet then
* default value of "" is used. Rows in the appended worksheet not found in
* the master worksheet are not added.
*
* @param worksheet
* @throws Exception
*/
public void appendWorkSheetColumns(WorkSheet worksheet) throws Exception {
ArrayList newColumns = worksheet.getColumns();
this.addColumns(newColumns, "");
ArrayList rows = this.getRows();
for (String row : rows) {
for (String col : newColumns) {
if (worksheet.isValidRow(row)) {
String value = worksheet.getCell(row, col);
this.addCell(row, col, value);
}
}
}
}
/**
* Add rows from a second worksheet to be joined by common column. If the
* appended worksheet doesn't contain a column in the master worksheet then
* default value of "" is used. Columns in the appended worksheet not found
* in the master worksheet are not added.
*
* @param worksheet
* @throws Exception
*/
public void appendWorkSheetRows(WorkSheet worksheet) throws Exception {
ArrayList newRows = worksheet.getRows();
this.addRows(newRows, "");
for (String col : this.getColumns()) {
if (!worksheet.isValidColumn(col)) {
continue;
}
for (String row : newRows) {
if (worksheet.isValidColumn(col)) {
String value = worksheet.getCell(row, col);
this.addCell(row, col, value);
}
}
}
}
/**
*
* @param outputStream
* @param delimitter
* @param quoteit
* @throws Exception
*/
public void save(OutputStream outputStream, char delimitter, boolean quoteit) throws Exception {
outputStream.write(rowHeader.getBytes());
//String quote = "\"";
for (String col : getColumns()) {
outputStream.write(delimitter);
if (quoteit) {
outputStream.write('"');
}
outputStream.write(col.getBytes());
if (quoteit) {
outputStream.write('"');
}
}
outputStream.write("\r\n".getBytes());
for (String row : getRows()) {
if (quoteit) {
outputStream.write('"');
}
outputStream.write(row.getBytes());
if (quoteit) {
outputStream.write('"');
}
for (String col : getColumns()) {
// try{
String value = getCell(row, col);
outputStream.write(delimitter);
if (!this.isMetaDataColumn(col) && !this.isMetaDataRow(row)) {
if (value == null || value.length() == 0 || "null".equalsIgnoreCase(value)) {
value = "NaN";
}
} else {
if (value == null || value.length() == 0 || "null".equalsIgnoreCase(value)) {
value = "";
}
}
outputStream.write(value.getBytes());
// }catch(Exception e){
// System.out.println(row + " " + col);
// }
}
outputStream.write("\r\n".getBytes());
}
}
/**
* @return the indexColumnName
*/
public String getIndexColumnName() {
return indexColumnName;
}
/**
* @param indexColumnName the indexColumnName to set
*/
public void setIndexColumnName(String indexColumnName) {
this.indexColumnName = indexColumnName;
}
/**
* @return the columnLookup
*/
public LinkedHashMap getColumnLookup() {
return columnLookup;
}
/**
* @return the rowLookup
*/
public LinkedHashMap getRowLookup() {
return rowLookup;
}
/**
* @return the metaDataColumnsHashMap
*/
public LinkedHashMap getMetaDataColumnsHashMap() {
return metaDataColumnsHashMap;
}
/**
* @return the metaDataRowsHashMap
*/
public LinkedHashMap getMetaDataRowsHashMap() {
return metaDataRowsHashMap;
}
/**
* @return the rowHeader
*/
public String getRowHeader() {
return rowHeader;
}
}