org.clulab.wm.eidoscommon.utils.XsvUtils.scala Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of eidos-eidoscommon_2.12 Show documentation
Show all versions of eidos-eidoscommon_2.12 Show documentation
Code to be shared by other Eidos subprojects and clients
The newest version!
package org.clulab.wm.eidoscommon.utils
import java.io.PrintWriter
class EscapePair(char: Char, escaped: Char) {
val unescapedString: String = char.toString
val escapedString: String = "\\" + escaped
def escape(string: String): String = string.replace(unescapedString, escapedString)
def unescape(string: String): String = string.replace(escapedString, unescapedString)
}
object EscapePair {
def apply(char: Char, escaped: Char) = new EscapePair(char, escaped)
}
// See https://en.wikipedia.org/wiki/Tab-separated_values.
// This does not attempt to double internal quotes or quote an entire field that contains a quote, etc.
object XsvUtils {
var nlChar = '\n'
var crChar = '\r'
val tabChar = '\t'
val commaChar = ','
val quoteChar = '"'
val backslashChar = '\\'
val escapePairs = Seq(
EscapePair(XsvUtils.backslashChar, '\\'),
EscapePair(XsvUtils.nlChar, 'n'),
EscapePair(XsvUtils.crChar, 'r'),
EscapePair(XsvUtils.tabChar, 't')
)
}
class XsvReader(protected val separatorChar: Char) {
}
class TsvReader() extends XsvReader(XsvUtils.tabChar) {
def unescape(string: String): String = {
XsvUtils.escapePairs.reverse.foldLeft(string) { (string, escapePair) => escapePair.unescape(string) }
}
def readln(line: String, length: Int = -1): Array[String] = {
// Java will truncate unused columns from the back. Therefore, add an extra,
// used column at the end, but then remove the extra value that results.
// The alternative is to split on a regular expression and include -1 as the
// final argument, but the programmer is too obstinate for that approach.
val count = line.count(_ == separatorChar) + 1
val values = (line + separatorChar + ' ')
.split(separatorChar)
.take(count)
.map(unescape)
if (length >= 0) {
if (length < values.length)
values.take(length) // Truncate it.
else if (length == values.length)
values // Return it.
else // Expand it.
values.padTo(length, "")
}
else
values
}
}
object TsvReader {
}
class CsvReader() extends XsvReader(XsvUtils.commaChar) {
// TODO It is more complicated because of the multiple lines per string
}
abstract class XsvWriter(val printWriter: PrintWriter, separatorChar: Char) {
protected val separatorString: String = separatorChar.toString
def quote(text: String): String = "\"" + text.replace("\"", "\"\"") + "\""
def mkString(values: Seq[AnyRef]): String
// Because of type erasure, toString will need to be called on strings, unfortunately.
def mkString(string: String, strings: String*): String = mkString(string +: strings)
def print(values: Seq[AnyRef]): XsvWriter = {
printWriter.print(mkString(values))
this
}
def print(string: String, strings: String*): XsvWriter = print(string +: strings)
// If there is more than one argument, assume they are all strings
def println(string: String, strings: String*): XsvWriter = println(string +: strings)
def println(values: Seq[AnyRef]): XsvWriter = {
print(values)
println()
}
def println(): XsvWriter = {
printWriter.print("\n") // Force Unix line endings.
this
}
def close(): Unit = printWriter.close()
}
class TsvWriter(printWriter: PrintWriter, isExcel: Boolean = true) extends XsvWriter(printWriter, XsvUtils.tabChar) {
def escape(string: String): String = {
XsvUtils.escapePairs.foldLeft(string) { (string, escapePair) => escapePair.escape(string) }
}
def stringlnPlain(values: Seq[AnyRef]): String = {
val escapedStrings = values
.map(_.toString)
.map(escape)
escapedStrings.mkString(separatorString)
}
def stringlnExcel(values: Seq[AnyRef]): String = {
val quotedStrings = values
.map(_.toString)
.map { string =>
val mustBeQuoted = TsvWriter.quotableStrings.exists { quotableString: String =>
string.contains(quotableString)
} || string.contains(XsvUtils.commaChar)
if (mustBeQuoted) quote(string)
else string
}
quotedStrings.mkString(separatorString)
}
def mkString(values: Seq[AnyRef]): String =
if (isExcel) stringlnExcel(values)
else stringlnPlain(values)
}
object TsvWriter {
val quotableStrings = Seq(
XsvUtils.nlChar.toString,
XsvUtils.crChar.toString,
XsvUtils.tabChar.toString,
XsvUtils.quoteChar.toString
)
}
class CsvWriter(printWriter: PrintWriter, isExcel: Boolean = true) extends XsvWriter(printWriter, XsvUtils.commaChar) {
// TODO: Excel does not seem to be able to handle tabs.
def mkString(values: Seq[AnyRef]): String = {
val quotedStrings = values
.map(_.toString)
.map { string =>
val mustBeQuoted = CsvWriter.quotableStrings.exists { separator: String =>
string.contains(separator)
}
if (mustBeQuoted) quote(string)
else string
}
quotedStrings.mkString(separatorString)
}
}
object CsvWriter {
val quotableStrings = Seq(
XsvUtils.nlChar.toString,
XsvUtils.crChar.toString,
XsvUtils.commaChar.toString,
XsvUtils.quoteChar.toString
)
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy