All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.johnsnowlabs.nlp.annotators.sbd.pragmatic.RuleSymbols.scala Maven / Gradle / Ivy

package com.johnsnowlabs.nlp.annotators.sbd.pragmatic

/**
  * Created by Saif Addin on 5/6/2017.
  */

/**
  * Base Symbols that may be extended later on. For now kept in the pragmatic scope.
  */
trait RuleSymbols {

  /**
    * Separation symbols for list items and numbers
    * 
    */
  val BREAK_INDICATOR = "\uF050"

  /**
    * looks up .
    */
  val DOT = "\uF051"

  /**
    * looks up ,
    */
  val COMMA = "\uF052"

  /**
    * looks up ;
    */
  val SEMICOLON = "\uF053"

  /**
    * looks up ?
    */
  val QUESTION = "\uF054"

  /**
    * looks up !
    */
  val EXCLAMATION = "\uF055"

  /**
    * ====================
    * PROTECTION SYMBOL
    * ====================
    */

  /**
    * Between punctuations marker
    */
  val PROTECTION_MARKER_OPEN = "\uF056"
  val PROTECTION_MARKER_CLOSE = "\uF057"

  /**
    * Magic regex ensures no breaking within protection
    */
    // http://rubular.com/r/Tq7lWxGkQl
  val UNPROTECTED_BREAK_INDICATOR = s"$BREAK_INDICATOR(?![^$PROTECTION_MARKER_OPEN]*$PROTECTION_MARKER_CLOSE)"

  def symbolRecovery: Map[String, String] = Map(
    DOT -> ".",
    SEMICOLON -> ";",
    QUESTION -> "?",
    EXCLAMATION -> "!",
    PROTECTION_MARKER_OPEN -> "",
    PROTECTION_MARKER_CLOSE -> ""
  )

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy