
grizzled.string.Implicits.scala Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of grizzled-scala_2.11 Show documentation
Show all versions of grizzled-scala_2.11 Show documentation
A general-purpose Scala utility library
The newest version!
package grizzled.string
import scala.collection.immutable.WrappedString
/** String and character implicits.
*/
object Implicits {
import scala.language.implicitConversions
import grizzled.parsing.StringToken
import scala.util.matching.Regex.Match
/** `Char` enrichments
*/
object Char {
implicit def GrizzledChar_Char(gc: GrizzledChar): Char = gc.character
implicit def JavaCharacter_GrizzledChar(c: java.lang.Character): GrizzledChar =
new GrizzledChar(c.charValue)
implicit def GrizzledChar_JavaCharacter(c: GrizzledChar): java.lang.Character =
new java.lang.Character(c.character)
/** An analog to Scala's `RichChar` class, providing some methods
* that neither `RichChar` nor `Char` (nor, for that matter,
* `java.lang.Character`) provide. By importing the implicit conversion
* functions, you can use the methods in this class transparently from a
* `Char`, `RichChar` or `Character` object.
*
* {{{
* import grizzled.string.implicits._
* val ch = 'a'
* println(ch.isHexDigit) // prints: true
* }}}
*/
implicit class GrizzledChar(val character: Char) {
/** Determine whether the character represents a valid hexadecimal
* digit. This is a specialization of `isDigit(radix)`.
*
* @return `true` if the character is a valid hexadecimal
* digit, `false` if not.
*/
def isHexDigit: Boolean = isDigit(16)
/** Determine whether the character represents a valid digit in a
* given base.
*
* @param radix the radix
* @return `true` if the character is a valid digit in the
* indicated radix, `false` if not.
*/
def isDigit(radix: Int): Boolean = {
try {
Integer.parseInt(character.toString, radix)
true
}
catch {
case _: NumberFormatException => false
}
}
/** Determine if a character is non-printable. Note that the notion
* of "non-printable" in Unicode can be problematic, depending on the
* encoding. A printable Unicode character, printed in UTF-8 on a
* terminal that only handles ISO-8859.1 may not, strictly speaking,
* be "printable" on that terminal.
*
* This method's notion of "printable" assumes that the output device
* is capable of displaying Unicode encodings (e.g., UTF-8). In other
* words, this method could also be called `isUnicodePrintable()`.
*
* See also http://stackoverflow.com/q/220547
*
* @return `true` if printable, `false` if not.
*/
def isPrintable: Boolean = {
val block = Option(Character.UnicodeBlock.of(character))
(!Character.isISOControl(character)) &&
block.exists(_ != Character.UnicodeBlock.SPECIALS)
}
}
}
/** String enrichment classes.
*/
object String {
implicit def WrappedString_GrizzledString(rs: WrappedString): GrizzledString =
new GrizzledString(rs.toString)
implicit def StringBuilder_GrizzledString(rs: StringBuilder): GrizzledString =
new GrizzledString(rs.toString)
/** An analog to Scala's `RichString` class, providing some methods
* that neither `RichString` nor `String` provide. By
* importing the implicit conversion functions, you can use the methods in
* this class transparently from a `String` or `RichString`
* object.
*
* ===Examples===
*
* These examples assume you've included this import:
*
* {{{import grizzled.string.Implicits.String._}}}
*
* These are just a few of the enrichments available. See below for
* the entire set.
*
* {{{
* val s = "a b c"
* println(s.tokenize) // prints: List(a, b, c)
* }}}
*
* {{{
* " abc def ".rtrim // yields " abc def"
* }}}
*
* {{{
* "\u00a9 2016 The Example Company" // yields "© 2016 The Example Company™
* }}}
*/
implicit class GrizzledString(val string: String) {
private val LTrimRegex = """^\s*(.*)$""".r
private val SpecialMetachars = Map(
'\n' -> """\n""",
'\f' -> """\f""",
'\t' -> """\t""",
'\r' -> """\r"""
)
/** Trim white space from the front (left) of a string.
*
* @return possibly modified string
*/
def ltrim: String = {
LTrimRegex.findFirstMatchIn(string).map(m => m.group(1)).getOrElse(string)
}
private lazy val RTrimRegex = """\s*$""".r
/** Trim white space from the back (right) of a string.
*
* @return possibly modified string
*/
def rtrim: String = RTrimRegex.replaceFirstIn(string, "")
/** Like perl's `chomp()`: Remove any newline at the end of the
* line.
*
* @return the possibly modified line
*/
def chomp: String =
if (string.endsWith("\n"))
string.substring(0, string.length - 1)
else
string
/** Tokenize the string on white space. An empty string and a string
* with only white space are treated the same. Note that doing a
* `split("""\s+""").toList` on an empty string ("") yields a
* list of one item, an empty string. Doing the same operation on a
* blank string (" ", for example) yields an empty list. This method
* differs from `split("""\s+""").toList`, in that both cases are
* treated the same, returning a `Nil`.
*
* @return A list of tokens, or `Nil` if there aren't any.
*/
def tokenize: List[String] = {
string.trim.split("""\s+""").toList match {
case Nil => Nil
case s :: Nil if s.isEmpty => Nil
case s :: Nil => List(s)
case s :: rest => s :: rest
}
}
/** Tokenize the string on a set of delimiter characters.
*
* @param delims the delimiter characters
* @return A list of tokens, or `Nil` if there aren't any.
*/
def tokenize(delims: String): List[String] = {
string.trim.split("[" + delims + "]").toList match {
case Nil => Nil
case s :: Nil => List(s)
case s :: rest => s :: rest
}
}
/** Tokenize the string on a set of delimiter characters, returning
* `Token` objects. This method is useful when you need to keep
* track of the locations of the tokens within the original string.
*
* @param delims the delimiter characters
* @return A list of tokens, or `Nil` if there aren't any.
*/
def toTokens(delims: String): List[StringToken] = {
val delimRe = ("([^" + delims + "]+)").r
def find(substr: String, offset: Int): List[StringToken] = {
def handleMatch(m: Match): List[StringToken] = {
val start = m.start
val end = m.end
val absStart = start + offset
val token = StringToken(m.toString, start + offset)
if (end >= (substr.length - 1))
List(token)
else
token :: find(substr.substring(end + 1), end + 1 + offset)
}
delimRe.findFirstMatchIn(substr).map(m => handleMatch(m)).getOrElse(Nil)
}
find(this.string, 0)
}
/** Tokenize the string on white space, returning `Token` objects. This
* method is useful when you need to keep track of the locations of
* the tokens within the original string.
*
* @return A list of tokens, or `Nil` if there aren't any.
*/
def toTokens: List[StringToken] = toTokens(""" \t""")
/** Escape any non-printable characters by converting them to
* metacharacter sequences.
*
* @return the possibly translated string
*/
def escapeNonPrintables: String = {
import Char._
string.map {
case c if SpecialMetachars.get(c).isDefined => SpecialMetachars(c)
case c if c.isPrintable => c
case c => f"\\u${c.toLong}%04x"
}.mkString("")
}
/** Translate any metacharacters (e.g,. \t, \n, \\u2122) into their real
* characters, and return the translated string. Metacharacter sequences
* that cannot be parsed (because they're unrecognized, because the
* Unicode number isn't four digits, etc.) are passed along unchanged.
*
* @return the possibly translated string
*/
def translateMetachars: String = {
import scala.annotation.tailrec
import Char._
def isHexString(s: String): Boolean = s.count(_.isHexDigit) == s.length
@tailrec
def doParse(chars: List[Char], buf: String): String = {
chars match {
case Nil => buf
case '\\' :: 't' :: rest => doParse(rest, buf + "\t")
case '\\' :: 'n' :: rest => doParse(rest, buf + "\n")
case '\\' :: 'r' :: rest => doParse(rest, buf + "\r")
case '\\' :: 'f' :: rest => doParse(rest, buf + "\f")
case '\\' :: '\\' :: rest => doParse(rest, buf + "\\")
case '\\' :: 'u' :: a :: b :: c :: d :: rest if isHexString(s"${a.toString}${b.toString}${c.toString}${d.toString}") =>
val chars = Integer.parseInt(Array(a, b, c, d).mkString(""), 16)
doParse(rest.toList, buf + Character.toChars(chars).mkString(""))
case '\\' :: 'u' :: rest =>
doParse(rest, buf + "\\u")
case '\\' :: c :: rest =>
doParse(rest, buf + s"\\${c.toString}")
case '\\' :: Nil =>
buf + "\\"
case c :: rest => doParse(rest, buf :+ c)
}
}
doParse(this.string.toList, "")
}
/** Replace the first instance of the specified character with another
* character, returning the new string. This method differs from the
* JDK's `replaceFirst()` method in that it takes a target character,
* not a regular expression. It's ideal when you don't want to worry
* about escaping a regular expression metacharacter.
*
* @param c the character to find
* @param replacement the replacement character
*
* @return the (possibly changed) new string
*/
def replaceFirstChar(c: Char, replacement: Char): String = {
replaceFirstChar(c, replacement.toString)
}
/** Replace the first instance of the specified character with string,
* returning the new string. This method differs from the JDK's
* `replaceFirst()` method in that it takes a target character,
* not a regular expression. It's ideal when you don't want to worry
* about escaping a regular expression metacharacter.
*
* @param c the character to find
* @param replacement the replacement string
*
* @return the (possibly changed) new string
*/
def replaceFirstChar(c: Char, replacement: String): String = {
val i = string.indexOf(c)
if (i < 0)
string
else
string.slice(0, i) + replacement + string.slice(i + 1, string.length)
}
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy