All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.github.seratch.ltsv4s.LTSVParser.scala Maven / Gradle / Ivy

The newest version!
package com.github.seratch.ltsv4s

/*
;; ABNF 

ltsv = *(record NL) [record]
record = [field *(TAB field)]
field = label ":" field-value
label = 1*lbyte
field-value = *fbyte

TAB = %x09
NL = [%x0D] %x0A
lbyte = %x30-39 / %x41-5A / %x61-7A / "_" / "." / "-" ;; [0-9A-Za-z_.-]
fbyte = %x01-08 / %x0B / %x0C / %x0E-FF
*/
import scala.util.parsing.combinator.RegexParsers

/**
 * Parser configuration
 * 
 * @param lenient Allow a wider range of characters in field values than the LTSV spec
 */
case class LTSVParserConfig(lenient: Boolean = false)

class LTSVParser(config: LTSVParserConfig) extends RegexParsers {

  override def skipWhitespace = false

  def ltsv = repsep(record, nl)
  def record = repsep(field, tab) ^^ { _.toMap }
  def field = label ~ ":" ~ fieldValue ^^ { case k ~ ":" ~ v => (k, v) }
  def label = "[0-9A-Za-z_\\.-]+".r
  def fieldValue = {
    if (config.lenient) """[^\t\r\n]*""".r
    else """[\u000B\u000C\u0001-\u0008\u000E-\u00FF]*""".r
  }
  def tab = '\t'
  def nl = opt('\r') <~ '\n'

  def parse(input: String): List[Map[String, String]] = parseAll(ltsv, input).getOrElse {
    throw new IllegalArgumentException("Failed to parse ltsv: " + 
      (if (input.size > 1000) "\"" + input.take(1000) + "..." + "\"" else "\"" + input + "\""))
  }

}

object LTSVParser {
  def parse(input: String, lenient: Boolean = false): List[Map[String, String]] = 
    new LTSVParser(LTSVParserConfig(lenient)).parse(input)
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy