All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.johnsnowlabs.nlp.annotators.sbd.pragmatic.RuleSymbols.scala Maven / Gradle / Ivy

There is a newer version: 1.6.2
Show newest version
package com.johnsnowlabs.nlp.annotators.sbd.pragmatic

/**
  * Created by Saif Addin on 5/6/2017.
  */

/**
  * Base Symbols that may be extended later on. For now kept in the pragmatic scope.
  */
trait RuleSymbols {

  /**
    * Separation symbols for list items and numbers
    * alt 197
    */
  val BREAK_INDICATOR = "┼"

  /**
    * looks up .
    * alt 401
    */
  val DOT = "æ"

  /**
    * looks up ,
    * alt 402
    */
  val COMMA = "Æ"

  /**
    * looks up ;
    * alt 403
    */
  val SEMICOLON = "ô"

  /**
    * looks up ?
    * alt 404
    */
  val QUESTION = "ö"

  /**
    * looks up !
    * alt 405
    */
  val EXCLAMATION = "ò"

  /**
    * ====================
    * PROTECTION SYMBOL
    * ====================
    */

  /**
    * Between punctuations marker
    * alt 505
    * alt 506
    */
  val PROTECTION_MARKER_OPEN = "∙"
  val PROTECTION_MARKER_CLOSE = "·"

  /**
    * Magic regex ensures no breaking within protection
    */
    // http://rubular.com/r/Tq7lWxGkQl
  val UNPROTECTED_BREAK_INDICATOR = s"$BREAK_INDICATOR(?![^$PROTECTION_MARKER_OPEN]*$PROTECTION_MARKER_CLOSE)"

  def symbolRecovery: Map[String, String] = Map(
    DOT -> ".",
    SEMICOLON -> ";",
    QUESTION -> "?",
    EXCLAMATION -> "!",
    PROTECTION_MARKER_OPEN -> "",
    PROTECTION_MARKER_CLOSE -> ""
  )

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy