horan.kse3-eio_3.0.3.6.source-code.Xsv.scala Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of kse3-eio_3 Show documentation
Show all versions of kse3-eio_3 Show documentation
Kerr Scala Extensions 3, module kse3-eio
// This file is distributed under the BSD 3-clause license. See file LICENSE.
// Copyright (c) 2023 Rex Kerr, UCSF, and Calico Life Sciences LLC
package kse.eio
import java.io._
import java.nio._
import java.nio.file._
import java.nio.channels.{ReadableByteChannel, WritableByteChannel, SeekableByteChannel}
import java.nio.charset.StandardCharsets._
import scala.collection.mutable.Builder
import scala.util.boundary
import scala.annotation.targetName
import scala.collection.{ IterableOnce => IOnce }
import scala.collection.immutable.{Range => Rg}
import kse.basics.{given, _}
import kse.basics.intervals._
import kse.flow.{given, _}
import kse.maths.{given, _}
import kse.maths.packed.{given, _}
import kse.eio._
class Xsv private (
separator: Char,
permissiveWhitespace: Boolean = false
) {
// Current parse line
private var line: UInt = UInt(0)
// Current parse position within line
private var pos: Int = 0
// Index within current buffer: points at the next character to consume.
private var index: Int = 0
// Token parsing state--states are bits so we can test for various combinations of them with one operation
private var state: Int = 0
private inline val Tk = 0x01 // Inside a token
private inline val Tn = 0x02 // Just after a LF
private inline val Tr = 0x04 // Just after a CR
private inline val Tc = 0x08 // Just after a separator
private inline val Q1 = 0x10 // Just started a quote
private inline val Qt = 0x20 // Inside a quote
private inline val Qr = 0x40 // Just hit a CR inside a quote
private inline val Qq = 0x80 // Just after a quote when previously inside a quote
private inline val Sp = 0x100 // Removing whitespace after the end of a quote
private inline val EoI = 0x1 // End of input is just the end of this bit of data, nothing special
private inline val EoL = 0x2 // End of input is also an end of line
private inline val EoF = 0x4 // There is no more input to be had: "file" or equivalent is finished
private def sayWhere(prefix: String): String = s"${prefix} on line ${line.toLong + 1}, position $pos"
private given AutoMap[Err, Err] = _.explainBy(sayWhere("Error"))
private def clear(): Unit =
line = UInt(0)
pos = 0
index = 0
state = 0
// Ignores spaces after a quote in input.
// If input runs out, index will be iN and state will stay Sp
// Otherwise state will transition to Tn, Tr, or Tc, or an error will be emitted.
// If the transition is to Tn or Tr, visitor will receive a newline call.
// Assumes 0 <= i0 <= iN <= data.length, and Sp state on entry.
private transparent inline def skipSpaceImpl[T <: Array[Byte] | String, U, C <: Byte | Char](
data: T, inline lookup: (T, Int) => C, i0: Int, iN: Int, visitor: Xsv.Visitor[T, ?]
): Unit Or Err =
Or.Ret:
var i = i0
while i < iN do
val c = lookup(data, i)
c match
case x if x == separator =>
pos += i - i0
index = i + 1
state = Tc
Is.unit.break
case ' ' | '\t' =>
i += 1
case '\r' | '\n' =>
line = line +# UInt(1)
pos = 0
index = i + 1
state = if c == '\n' then Tn else Tr
visitor.newline(line).?*
case _ =>
pos += i - i0
index = i
visitor.error(Err(sayWhere("Non-space cell content after quote"))).asAlt.break
pos += i - i0
index = i
Is.unit
private def skipSpace[U](data: Array[Byte], i0: Int, iN: Int, visitor: Xsv.Visitor[Array[Byte], ?]): Unit Or Err =
skipSpaceImpl[Array[Byte], U, Byte](data, (a, i) => a(i), i0, iN, visitor)
private def skipSpace[U](data: String, i0: Int, iN: Int, visitor: Xsv.Visitor[String, ?]): Unit Or Err =
skipSpaceImpl[String, U, Char](data, (s, i) => s.charAt(i), i0, iN, visitor)
// Loads quoted input into a visitor.
// State will be Qr, Qq, or Qt if we are not known to be done;
// Tn, Tr, Tc, or Sp if we are (in which case endquote will have been called)
// If incoming state is Q1, quoted will always be called. Otherwise it will be called on any processed input.
// Assumes 0 <= i0 < iN <= data.length, and a Q state on entry.
private transparent inline def visitQuoteImpl[T <: Array[Byte] | String, U, C <: Byte | Char](
data: T, inline lookup: (T, Int) => C, inline lf: T, i0: Int, iN: Int, visitor: Xsv.Visitor[T, U]
): Unit Or Err =
Or.Ret:
var i = i0
while i < iN do
// First we mark where we've gotten to successfully, and see if there's any state to handle.
index = i
state match
case Qr =>
if lookup(data, i) == '\n' then i += 1
else visitor.quoted(lf, 0, 1) // Found \r alone but we like \n so we emit one
state = Qt
case Qq =>
// Found what could be an ending quote, but is it really?
val c = lookup(data, i)
if c == '"' then
// No, it was an embedded quote. Pick up the extra quote.
i += 1
state = Qt
else if c == '\n' || c == '\r' then
// The quote is over and we've reached the end of a line
line = line +# UInt(1)
pos = 0
index = i + 1
state = if c == '\n' then Tn else Tr
(visitor.endquote() && visitor.newline(line)).?*
Is.unit.break
else if c == separator then
// The quote is over and we've reached the end of a cell
pos += 1
index = i + 1
state = Tc
visitor.endquote().?*
Is.unit.break
else if permissiveWhitespace && (c == ' ' || c == '\t') then
// The quote is over but there's whitespace after it and we've promised we'll handle it.
pos += 1
index = i + 1
state = Sp
visitor.endquote().?*
Is.unit.break
else
// The quote is over and there's some junk here.
visitor.error(Err(sayWhere("Non-space cell content after quote"))).asAlt.break
case _ =>
// Now we know we're inside a quote so we try to walk forwards until something happens.
while i < iN && (state & (Qq | Qr)) == 0 do
lookup(data, i) match
case '"' =>
// Quote might have ended! Save anything we found.
if index < i || state == Q1 then
visitor.quoted(data, index, i)
i += 1
pos += i - index
index = i
state = Qq
case '\r' =>
// Might be an annoying Windows newline. We definitely advanced a line, but let state handler deal.
if index < i || state == Q1 then
visitor.quoted(data, index, i)
line = line +# UInt(1)
pos = 0
i += 1
index = i
state = Qr
case '\n' =>
// Regular newline--intercept to update line count, but we can just keep scanning input.
line = line +# UInt(1)
pos = 0
i += 1
case _ =>
// Quoted text. Just pass it.
i += 1
// At this point, we're either out of input, or saved our progress and changed state
// At this point we know we're out of input. Save progress if we're missing anything.
if index < i || state == Q1 then
visitor.quoted(data, index, i)
index = i
state = Qt
()
private def visitQuote[U](data: Array[Byte], i0: Int, iN: Int, visitor: Xsv.Visitor[Array[Byte], U]): Unit Or Err =
visitQuoteImpl[Array[Byte], U, Byte](data, (d, i) => d(i), Xsv.lfByte, i0, iN, visitor)
private def visitQuote[U](data: String, i0: Int, iN: Int, visitor: Xsv.Visitor[String, U]): Unit Or Err =
visitQuoteImpl[String, U, Char](data, (d, i) => d(i), "\n", i0, iN, visitor)
private transparent inline def trimmedImpl[T <: Array[Byte] | String, U, C <: Byte | Char](
data: T, inline lookup: (T, Int) => C, i0: Int, iN: Int, visitor: Xsv.Visitor[T, U]
): Unit Or Err =
var jN = iN
while jN > i0 && lookup(data, jN-1).fn(c => c == ' ' || c == '\t') do jN -= 1
var j0 = i0
while j0 < jN && lookup(data, j0).fn(c => c == ' ' || c == '\t') do j0 += 1
visitor.unquoted(data, j0, jN)
private def trimmed[U](data: Array[Byte], i0: Int, iN: Int, visitor: Xsv.Visitor[Array[Byte], U]): Unit Or Err =
trimmedImpl[Array[Byte], U, Byte](data, (d, i) => d(i), i0, iN, visitor)
private def trimmed[U](data: String, i0: Int, iN: Int, visitor: Xsv.Visitor[String, U]): Unit Or Err =
trimmedImpl[String, U, Char](data, (s, i) => s.charAt(i), i0, iN, visitor)
private transparent inline def tokLineImpl[T <: Array[Byte] | String, U, C <: Byte | Char](
data: T, inline lookup: (T, Int) => C,
inline trim: (T, Int, Int, Xsv.Visitor[T, U]) => Unit Or Err,
i: Int, visitor: Xsv.Visitor[T, U]
): Unit Or Err =
Or.Ret:
(if permissiveWhitespace then trim(data, index, i, visitor) else visitor.unquoted(data, index, i)).?*
line = line +# UInt(1)
pos = 0
visitor.newline(line).?*
index = i+1
()
private def tokLine[U](data: Array[Byte], i: Int, visitor: Xsv.Visitor[Array[Byte], U]): Unit Or Err =
tokLineImpl[Array[Byte], U, Byte](data, (d, i) => d(i), (d, i, j, v) => trimmed(d, i, j, v), i, visitor)
private def tokLine[U](data: String, i: Int, visitor: Xsv.Visitor[String, U]): Unit Or Err =
tokLineImpl[String, U, Char](data, (s, i) => s.charAt(i), (s, i, j, v) => trimmed[U](s, i, j, v), i, visitor)
// Loads input into a visitor.
// Index is advanced to the last valid data--this will be iN unless state ends as Tk with e == EoI (which means more data is needed)
// Any state can be emitted if e == EoI, but if it's a Q state it means that endquote has not been called.
// If e & EoL != 0, a virtual \n will be inserted at the end of the input. Any Q state can be emitted but otherwise only Tn can be
// If e & EoD != 0, a trailing cell is terminated and considered complete (Tn state will be set.)
// Assumes 0 <= i0 <= iN <= data.length. Empty input okay. Any state okay.
private transparent inline def visitRangeImpl[T <: Array[Byte] | String, U, C <: Byte | Char](
data: T,
inline lookup: (T, Int) => C,
inline sspace: (T, Int, Int, Xsv.Visitor[T, U]) => Unit Or Err,
inline vquote: (T, Int, Int, Xsv.Visitor[T, U]) => Unit Or Err,
inline trim: (T, Int, Int, Xsv.Visitor[T, U]) => Unit Or Err,
inline tokl: (T, Int, Xsv.Visitor[T, U]) => Unit Or Err,
inline lf: T,
i0: Int, iN: Int, visitor: Xsv.Visitor[T, U], e: Int
): Unit Or Err =
Or.Ret:
var i = i0
while i < iN do
state match
case s if (s & (Q1 | Qr | Qq | Qt)) != 0 =>
vquote(data, i, iN, visitor).?
i = index
case Sp =>
sspace(data, i, iN, visitor).?
i = index
case _ =>
boundary: // Exit on quote
while i < iN do
lookup(data, i) match
case '"' =>
if i > index then
if !permissiveWhitespace || { while index < i && lookup(data, index).fn(c => c == ' ' || c == '\t') do { index += 1; pos += 1 }; index < i } then
visitor.error(Err(sayWhere("Extra content before quote."))).asAlt.break
i += 1
index = i
state = Q1
boundary.break()
case '\n' =>
if state == Tr && i == index then
i += 1
index = i
state = Tn
else
tokl(data, i, visitor).?
i = index
state = Tn
case '\r' =>
tokl(data, i, visitor).?
i = index
state = Tr
case c if c == separator =>
(if permissiveWhitespace then trim(data, index, i, visitor) else visitor.unquoted(data, index, i)).?*
i += 1
pos += i - index
index = i
state = Tc
case _ =>
i += 1
// If we get here, we're at the end of the input and need to figure out what that means
if (e & EoL) != 0 then
// Manually add a newline
if (state & (Q1 | Qq | Qt | Qr)) != 0 then
if state == Qq then
visitor.endquote().?*
state = Tn
else if state != Qr then visitor.quoted(lf, 0, 1)
line = line +# UInt(1)
pos = 0
else tokl(data, i, visitor).?
if (e & EoF) != 0 then
// Make sure we consumed all the data and are not in a quote
if (state & (Q1 | Qq | Qt | Qr)) != 0 then
if state == Qq then visitor.endquote().?*
else visitor.error(Err(sayWhere("Input ended inside of quote"))).asAlt.break
else if i > index || state == Tc then
(if permissiveWhitespace then trim(data, index, i, visitor) else visitor.unquoted(data, index, i)).?*
index = i
()
private def visitRange[U](data: Array[Byte], i0: Int, iN: Int, visitor: Xsv.Visitor[Array[Byte], U], e: Int): Unit Or Err =
visitRangeImpl[Array[Byte], U, Byte](
data, (d, i) => d(i),
(d, i, j, v) => skipSpace(d, i, j, v),
(d, i, j, v) => visitQuote(d, i, j, v),
(d, i, j, v) => trimmed(d, i, j, v),
(d, i, v) => tokLine(d, i, v),
Xsv.lfByte,
i0, iN, visitor, e
)
private def visitRange[U](data: String, i0: Int, iN: Int, visitor: Xsv.Visitor[String, U], e: Int): Unit Or Err =
visitRangeImpl[String, U, Char](
data, (s, i) => s.charAt(i),
(s, i, j, v) => skipSpace(s, i, j, v),
(s, i, j, v) => visitQuote(s, i, j, v),
(s, i, j, v) => trimmed(s, i, j, v),
(s, i, v) => tokLine(s, i, v),
"\n",
i0, iN, visitor, e
)
def visit[U](content: Array[Byte], visitor: Xsv.Visitor[Array[Byte], U]): U Or Err =
clear()
visitor.clear()
visitRange(content, 0, content.length, visitor, EoF) && visitor.complete(line)
def visit[U](content: Array[Byte], i0: Int, iN: Int, visitor: Xsv.Visitor[Array[Byte], U]): U Or Err =
clear()
visitor.clear()
val j0 = 0 max i0
val jN = (iN min content.length) max j0
index = j0
visitRange(content, j0, jN, visitor, EoF) && visitor.complete(line)
inline def visit[U](content: Array[Byte], inline rg: Rg, visitor: Xsv.Visitor[Array[Byte], U]): U Or Err =
val iv = Iv of rg
visit(content, iv.i0, iv.iN, visitor)
inline def visit[U](content: Array[Byte], inline v: Iv | PIv, visitor: Xsv.Visitor[Array[Byte], U]): U Or Err =
val iv = Iv.of(v, content)
visit(content, iv.i0, iv.iN, visitor)
@targetName("visitByteArrays")
def visit[U](content: IOnce[Array[Byte]], visitor: Xsv.Visitor[Array[Byte], U]): U Or Err =
Or.FlatRet:
clear()
visitor.clear()
var buffer: Array[Byte] = null
var k: Int = 0
val it = content.iterator
var more = it.hasNext
while more do
val a = it.next
more = it.hasNext
index = 0
if k > 0 then
var n = (128 min a.length) min (buffer.length - k)
a.inject(buffer, k)(0, n)
var moreExtra = n > 0 || !more
while index < k && moreExtra do
visitRange(buffer, index, k + n, visitor, if !more && n == a.length then EoF else EoI).?
if index < k then
var m = ((4L * n) min a.length.toLong).toInt
if m > buffer.length - k then
val h = (((k.toLong + m) max (2L * buffer.length)) min (Int.MaxValue - 7L)).toInt
if h <= buffer.length then visitor.error(Err(sayWhere("Buffer overflow"))).asAlt.break
buffer = buffer.copyToSize(h)
if m > buffer.length - k then m = buffer.length - k
moreExtra = m > n
a.inject(buffer, k + n)(n, m)
n = m
if index >= k then
index -= k
k = 0
if index < a.length then
visitRange(a, index, a.length, visitor, if more then EoI else EoF).?
if index < a.length then
k = a.length - index
if (buffer eq null) || buffer.length - 128 < k then
val g = if buffer eq null then 64 else buffer.length
val h = (((k + 128L) max (2L * g)) min (Int.MaxValue - 7L)).toInt
if k >= h then visitor.error(Err(sayWhere(s"Buffer overflow"))).asAlt.break
buffer = new Array[Byte](h)
a.inject(buffer)(index to End)
else k = 0
visitor.complete(line)
def visit[U](content: String, visitor: Xsv.Visitor[String, U]): U Or Err =
clear()
visitor.clear()
visitRange(content, 0, content.length, visitor, EoF) && visitor.complete(line)
def visit[U](content: String, i0: Int, iN: Int, visitor: Xsv.Visitor[String, U]): U Or Err =
clear()
visitor.clear()
val j0 = 0 max i0
val jN = (iN min content.length) max j0
index = j0
visitRange(content, j0, jN, visitor, EoF) && visitor.complete(line)
inline def visit[U](content: String, inline rg: Rg, visitor: Xsv.Visitor[String, U]): U Or Err =
val iv = Iv of rg
visit(content, iv.i0, iv.iN, visitor)
inline def visit[U](content: String, inline v: Iv | PIv, visitor: Xsv.Visitor[String, U]): U Or Err =
val iv = Iv.of(v, content)
visit(content, iv.i0, iv.iN, visitor)
@targetName("visitStrings")
def visit[U](content: IOnce[String], visitor: Xsv.Visitor[String, U]): U Or Err =
Or.FlatRet:
clear()
visitor.clear()
val it = content.iterator
var more = it.hasNext
while more do
index = 0
val s = it.next
visitRange(s, 0, s.length, visitor, { more = it.hasNext; if more then EoL else EoF })
if index < s.length then visitor.error(Err(sayWhere(s"Only consumed $index of ${s.length} characters"))).asAlt.break
visitor.complete(line)
def visitInputStream[U](content: InputStream, visitor: Xsv.Visitor[Array[Byte], U], startBufferSize: Int = 256, maxBufferSize: Int = 4194304): U Or Err =
Or.FlatRet:
nice{ visit(Send.IterateInputStream(content, startBufferSize, maxBufferSize), visitor).? }
def visit[U](content: InputStream, visitor: Xsv.Visitor[Array[Byte], U]): U Or Err = visitInputStream[U](content, visitor)
def visitByteChannel[U](content: ReadableByteChannel, visitor: Xsv.Visitor[Array[Byte], U], startBufferSize: Int = 256, maxBufferSize: Int = 4194304): U Or Err =
Or.FlatRet:
nice{ visit(Send.IterateByteChannel(content, startBufferSize, maxBufferSize), visitor).? }
def visit[U](content: ReadableByteChannel, visitor: Xsv.Visitor[Array[Byte], U]): U Or Err = visitByteChannel[U](content, visitor)
def visit[U](content: Path, visitor: Xsv.Visitor[Array[Byte], U]): U Or Err =
if !content.exists then Err.or(s"File not found: $content")
else Resource.Nice(content.openRead(0))(_.close): input =>
val sz = content.raw.size
var n = sz.clamp(256, 4194304).toInt
if sz > n && sz - n < 65536 then n = 2621440
visit(Send.IterateInputStream(input, n, n), visitor).?
def decode[U](content: Array[Byte] )(using gv: Xsv.GetVisitor[Array[Byte], U]): U Or Err = visit(content, gv.get(content.length))
@targetName("decodeByteArrays")
def decode[U](content: IOnce[Array[Byte]] )(using gv: Xsv.GetVisitor[Array[Byte], U]): U Or Err = visit(content, gv.get(-1))
def decode[U](content: String )(using gv: Xsv.GetVisitor[String, U]): U Or Err = visit(content, gv.get(content.length))
@targetName("decodeStrings")
def decode[U](content: IOnce[String] )(using gv: Xsv.GetVisitor[String, U]): U Or Err = visit(content, gv.get(-1))
def decode[U](content: InputStream )(using gv: Xsv.GetVisitor[Array[Byte], U]): U Or Err = visit(content, gv.get(-1))
def decode[U](content: ReadableByteChannel)(using gv: Xsv.GetVisitor[Array[Byte], U]): U Or Err = visit(content, gv.get(-1))
def read[U](path: Path )(using gv: Xsv.GetVisitor[Array[Byte], U]): U Or Err = visit(path, gv.get(path.size))
def bomless: Xsv.Bomless = new Xsv.Bomless(this)
def encode(table: Array[Array[String]]): Iterator[Array[Byte]] =
Xsv.encodeTable(table, separator)
def write(table: Array[Array[String]])(p: Path)(using tr: Send[Iterator[Array[Byte]], OutputStream]): Unit Or Err =
Xsv.encodeTable(table, separator).writeTo(p)
}
object Xsv {
def create(separator: Char, permissiveWhitespace: Boolean = false): Xsv Or Err =
if separator > 127 then Err.or("Xsv only supports basic one-byte separators (0-127)")
else if separator == '\n' || separator == '\r' then Err.or("Separator cannot be a newline character")
else if separator == '"' then Err.or("Separator cannot be a double quote character")
else Is(new Xsv(separator, permissiveWhitespace))
def comma = Xsv.create(',') .get
def tab = Xsv.create('\t').get
def space = Xsv.create(' ') .get
def semi = Xsv.create(';') .get
def trimComma = Xsv.create(',', permissiveWhitespace = true).get
def trimTab = Xsv.create('\t', permissiveWhitespace = true).get
def trimSpace = Xsv.create(' ', permissiveWhitespace = true).get
def trimSemi = Xsv.create(';', permissiveWhitespace = true).get
val lfByte = Array[Byte]('\n'.toByte)
final class Bomless(xsv: Xsv) {
def decode[U](content: Array[Byte])(using gv: GetVisitor[Array[Byte], U]): U Or Err =
if content.hasBOM then xsv.visit(content, 3 to End, gv.get(content.length-3))
else xsv.decode(content)
def decode[U](content: IOnce[Array[Byte]])(using gv: Xsv.GetVisitor[Array[Byte], U]): U Or Err =
val it = content.iterator
if !it.hasNext then xsv.decode(it)
else
val first = it.next
if first.hasBOM then
if it.hasNext then xsv.decode(Iterator(first.select(3 to End)) ++ it)
else xsv.visit(first, 3 to End, gv.get(first.length - 3))
else
if it.hasNext then xsv.decode(Iterator(first) ++ it)
else xsv.decode(first)
def decode[U](content: InputStream)(using gv: Xsv.GetVisitor[Array[Byte], U]): U Or Err =
decode(Send.IterateInputStream(content, 256, 4194304))
def decode[U](content: ReadableByteChannel)(using gv: Xsv.GetVisitor[Array[Byte], U]): U Or Err =
decode(Send.IterateByteChannel(content, 256, 4194304))
def read[U](content: Path)(using gv: Xsv.GetVisitor[Array[Byte], U]): U Or Err =
if !content.exists then Err.or(s"File not found: $content")
else Resource.Nice(content.openRead(0))(_.close): input =>
val sz = content.raw.size
var n = sz.clamp(256, 4194304).toInt
if sz > n && sz - n < 65536 then n = 2621440
decode(Send.IterateInputStream(input, n, n)).?
}
trait GetVisitor[T <: Array[Byte] | String, U] {
def get(size: Long): Visitor[T, U]
}
object GetVisitor {
given string2table: GetVisitor[String, Array[Array[String]]] =
new GetVisitor[String, Array[Array[String]]] {
def get(size: Long) = new Visitor.TableFromString()
}
given bytes2table: GetVisitor[Array[Byte], Array[Array[String]]] =
new GetVisitor[Array[Byte], Array[Array[String]]] {
def get(size: Long) = new Visitor.TableFromBytes()
}
}
trait Visitor[T <: Array[Byte] | String, U] {
def clear(): this.type
def unquoted(data: T, start: Int, end: Int): Unit Or Err
def quoted(data: T, start: Int, end: Int): Unit
def endquote(): Unit Or Err
def newline(line: UInt): Unit Or Err
def complete(line: UInt): U Or Err
def error(err: Err): Err
}
object Visitor {
val emptyRow = Array("")
val emptyTable = Array.empty[Array[String]]
abstract class ToStringTable[T <: Array[Byte] | String](strictlyRectangular: Boolean)
extends Visitor[T, Array[Array[String]]] {
private var table: Array[Array[String]] = null
private var row: Array[String] = null
private var rowIdx: Int = 0
private var colIdx: Int = 0
protected var q: String | java.lang.StringBuilder | Null = null
def clear(): this.type =
table = null
row = null
rowIdx = 0
colIdx = 0
q = null
this
protected def addToRow(cell: String): Unit Or Err =
if row eq null then
if (table eq null) || rowIdx == 0 || rowIdx >= table.length then row = new Array[String](4)
else row = new Array[String](table(rowIdx-1).length)
else if colIdx >= row.length then
val m = (colIdx *# 2) min (Int.MaxValue -7)
if m == row.length then return Err.or(s"Too many columns for row: ${m+1}")
row = row.copyToSize(m)
row(colIdx) = cell
colIdx += 1
Is.unit
def endquote(): Unit Or Err =
val result = q match
case null => Err.or(s"Supposed end-quote with no starting quote?")
case s: String => addToRow(s)
case sb: java.lang.StringBuilder => addToRow(sb.toString)
q = null
result
def newline(line: UInt): Unit Or Err =
if q ne null then return Err.or("New row in middle of quote")
if (row ne null) && colIdx > 0 then
if table eq null then table = new Array[Array[String]](4)
else if rowIdx >= table.length then
val m = (rowIdx *# 2) min (Int.MaxValue - 7)
if m == table.length then return Err.or(s"Too many lines for table: ${m+1}")
table = table.copyToSize(m)
table(rowIdx) = row.shrinkCopy(colIdx)
var i = rowIdx - 1
while i >= 0 && (table(i) eq null) do
table(i) = emptyRow
i -= 1
if strictlyRectangular && rowIdx > 0 && table(rowIdx-1).length != colIdx then
return Err.or(s"Row $rowIdx has ${table(rowIdx-1).length} columns but row ${rowIdx+1} has $colIdx")
else
if (table ne null) && rowIdx < table.length then table(rowIdx) = emptyRow
row = null
colIdx = 0
rowIdx += 1
Is.unit
def complete(line: UInt): Array[Array[String]] Or Err = Or.Ret:
if q ne null then return Err.or("End of table in middle of quote")
if colIdx > 0 then newline(line).?
if table eq null then emptyTable
else
var i = rowIdx min table.length
while i > 0 && table(i-1).fn(x => (x eq null) || x.length == 0) do i -= 1
val ans = table.shrinkCopy(i)
clear()
ans
def error(err: Err): Err =
clear()
err
}
final class TableFromString(strictRect: Boolean = false)
extends ToStringTable[String](strictRect) {
def unquoted(data: String, start: Int, end: Int): Unit Or Err =
if q ne null then Err.or(s"New token in middle of quote")
addToRow(data.substring(start, end))
def quoted(data: String, start: Int, end: Int): Unit = q match
case null => q = data.substring(start, end)
case s: String =>
val sb = new java.lang.StringBuilder
sb append s
sb append data.substring(start, end)
q = sb
case sb: java.lang.StringBuilder =>
sb append data.substring(start, end)
}
final class TableFromBytes(strictRect: Boolean = false)
extends ToStringTable[Array[Byte]](strictRect) {
def unquoted(data: Array[Byte], start: Int, end: Int): Unit Or Err =
if q ne null then Err.or("New token in middle of quote")
addToRow(new String(data, start, end-start))
def quoted(data: Array[Byte], start: Int, end: Int): Unit =
val x = if start == 0 && end == 1 && data(0) == '\n' then "\n" else new String(data, start, end-start)
q match
case null => q = x
case s: String =>
val sb = new java.lang.StringBuilder
sb append s
sb append x
q = sb
case sb: java.lang.StringBuilder =>
sb append x
}
def onString(strictRect: Boolean = false) = new TableFromString(strictRect)
def onBytes( strictRect: Boolean = false) = new TableFromBytes( strictRect)
}
def encodeCell(cell: String, separator: Char)(sb: java.lang.StringBuilder): Unit =
var n = -1
escape:
cell.visit(){ (c, i) =>
(c != '\r' && c != '\n' && c != '"' && c != separator).?
n = i
}
if n == cell.length-1 then sb append cell
else
sb append '"'
var m = 0
escape:
cell.visit(n+1 to End){ (c, i) =>
(c != '"').?
n = i
}
while n != cell.length-1 do
sb.append(cell, m, n+1)
sb append "\"\""
m = n+2
if m < cell.length then
n = m
escape:
cell.visit(n to End){ (c, i) =>
(c != '"').?
n = i
}
else n = cell.length -1
if m <= n then sb.append(cell, m, n+1)
sb append '"'
def encodeRow(cells: Array[String], separator: Char)(sb: java.lang.StringBuilder): Unit =
cells.visit(){ (cell, i) =>
encodeCell(cell, separator)(sb)
if i+1 == cells.length then sb append "\r\n" else sb append separator
}
def encodeTable(table: Array[Array[String]], separator: Char): Iterator[Array[Byte]] =
EncodeIterator(table, separator)
class EncodeIterator(private var table: Array[Array[String]], val separator: Char) extends Iterator[Array[Byte]] {
private var row = if (table eq null) || table.length == 0 then -1 else 0
def hasNext = row >= 0
def next: Array[Byte] =
if row < 0 then Iterator.empty.next
val sb = "".builder()
encodeRow(table(row), separator)(sb)
val b = sb.toString.bytes
row += 1
if row >= table.length then
row = -1
table = null
b
}
}
object Csv {
def decode(content: Array[Byte] ): Array[Array[String]] Or Err = Xsv.comma.decode(content)
@targetName("decodeByteArrays")
def decode(content: IOnce[Array[Byte]]): Array[Array[String]] Or Err = Xsv.comma.decode(content)
def decode(content: String ): Array[Array[String]] Or Err = Xsv.comma.decode(content)
@targetName("decodeStrings")
def decode(content: IOnce[String] ): Array[Array[String]] Or Err = Xsv.comma.decode(content)
def decode(content: InputStream ): Array[Array[String]] Or Err = Xsv.comma.decode(content)
def read(path: Path ): Array[Array[String]] Or Err = Xsv.comma.read(path)
object bomless {
def decode(content: Array[Byte] ): Array[Array[String]] Or Err = Xsv.comma.bomless.decode(content)
def decode(content: IOnce[Array[Byte]]): Array[Array[String]] Or Err = Xsv.comma.bomless.decode(content)
def decode(content: InputStream ): Array[Array[String]] Or Err = Xsv.comma.bomless.decode(content)
def read(path: Path ): Array[Array[String]] Or Err = Xsv.comma.bomless.read(path)
}
def encode(table: Array[Array[String]]): Iterator[Array[Byte]] =
Xsv.comma.encode(table)
def write(table: Array[Array[String]])(p: Path)(using tr: Send[Iterator[Array[Byte]], OutputStream]): Unit Or Err =
Xsv.comma.write(table)(p)
}
object Tsv {
def decode(content: Array[Byte] ): Array[Array[String]] Or Err = Xsv.tab.decode(content)
@targetName("decodeByteArrays")
def decode(content: IOnce[Array[Byte]]): Array[Array[String]] Or Err = Xsv.tab.decode(content)
def decode(content: String ): Array[Array[String]] Or Err = Xsv.tab.decode(content)
@targetName("decodeStrings")
def decode(content: IOnce[String] ): Array[Array[String]] Or Err = Xsv.tab.decode(content)
def decode(content: InputStream ): Array[Array[String]] Or Err = Xsv.tab.decode(content)
def read(path: Path ): Array[Array[String]] Or Err = Xsv.tab.read(path)
object bomless {
def decode(content: Array[Byte] ): Array[Array[String]] Or Err = Xsv.tab.bomless.decode(content)
def decode(content: IOnce[Array[Byte]]): Array[Array[String]] Or Err = Xsv.tab.bomless.decode(content)
def decode(content: InputStream ): Array[Array[String]] Or Err = Xsv.tab.bomless.decode(content)
def read(path: Path ): Array[Array[String]] Or Err = Xsv.tab.bomless.read(path)
}
def encode(table: Array[Array[String]]): Iterator[Array[Byte]] =
Xsv.tab.encode(table)
def write(table: Array[Array[String]])(p: Path)(using tr: Send[Iterator[Array[Byte]], OutputStream]): Unit Or Err =
Xsv.tab.write(table)(p)
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy