All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.github.seratch.ltsv4s.LTSVParser.scala Maven / Gradle / Ivy

The newest version!
package com.github.seratch.ltsv4s

/*
;; ABNF 

ltsv = *(record NL) [record]
record = [field *(TAB field)]
field = label ":" field-value
label = 1*lbyte
field-value = *fbyte

TAB = %x09
NL = [%x0D] %x0A
lbyte = %x30-39 / %x41-5A / %x61-7A / "_" / "." / "-" ;; [0-9A-Za-z_.-]
fbyte = %x01-08 / %x0B / %x0C / %x0E-FF
 */
import scala.util.parsing.combinator.RegexParsers

/** Parser configuration
  *
  * @param lenient
  *   Allow a wider range of characters in field values than the LTSV spec
  */
case class LTSVParserConfig(lenient: Boolean = false)

class LTSVParser(config: LTSVParserConfig) extends RegexParsers {

  override def skipWhitespace = false

  def ltsv = repsep(record, nl)
  def record = repsep(field, tab) ^^ { _.toMap }
  def field = label ~ ":" ~ fieldValue ^^ { case k ~ _ ~ v => (k, v) }
  def label = {
    if (config.lenient) """[^\t\r\n:]*""".r
    else "[0-9A-Za-z_\\.-]+".r
  }
  def fieldValue = {
    if (config.lenient) """[^\t\r\n]*""".r
    else """[\u000B\u000C\u0001-\u0008\u000E-\u00FF]*""".r
  }
  def tab = '\t'
  def nl = opt('\r') <~ '\n'

  def parse(input: String): List[Map[String, String]] =
    parseAll(ltsv, input).getOrElse {
      throw new IllegalArgumentException(
        "Failed to parse ltsv: " + (
          if (input.length > 1000) "\"" + input.take(1000) + "..." + "\""
          else "\"" + input + "\""
        )
      )
    }
}

object LTSVParser {
  def parse(
    input: String,
    lenient: Boolean = false
  ): List[Map[String, String]] =
    new LTSVParser(LTSVParserConfig(lenient)).parse(input)
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy