All Downloads are FREE. Search and download functionalities are using the official Maven repository.

epic.features.EnglishWordClassGenerator.scala Maven / Gradle / Ivy

The newest version!
package epic.features


/**
 * Converts a string into another string with properties of that string
 * Useful for rare or 0 count words
 * @author dlwh
 */
@SerialVersionUID(1L)
object EnglishWordClassGenerator extends (String=>String) with Serializable {
  def apply(x: String) = signatureFor(x)

  def signatureFor(word: String) = {
    val sb = new StringBuilder;
    val wlen = word.length();
    val numCaps = (word: Seq[Char]).count(_.isUpper);
    val hasDigit = word.exists(_.isDigit);
    val hasDash = word.contains('-');
    val hasLower = numCaps < wlen;
    val ch0 = word.charAt(0);
    val lowered = word.toLowerCase();
    if (Character.isUpperCase(ch0) || Character.isTitleCase(ch0)) {
      if (numCaps == 1) {
        sb.append("-INITC");
      } else {
        sb.append("-CAPS");
      }
    } else if (!Character.isLetter(ch0) && numCaps > 0) {
      sb.append("-CAPS");
    } else if (hasLower) {
      sb.append("-LC");
    }

    if (hasDigit) {
      sb.append("-NUM");
    }
    if (hasDash) {
      sb.append("-DASH");
    }
    if (lowered.endsWith("s") && wlen >= 3) {
      // here length 3, so you don't miss out on ones like 80s
      val ch2 = lowered.charAt(wlen - 2);
      // not -ess suffixes or greek/latin -us, -is
      if (ch2 != 's' && ch2 != 'i' && ch2 != 'u') {
        sb.append("-s");
      }
    } else if (word.length() >= 5 && !hasDash && !(hasDigit && numCaps > 0)) {
      if (lowered.endsWith("ed")) {
        sb.append("-ed");
      } else if (lowered.endsWith("ing")) {
        sb.append("-ing");
      } else if (lowered.endsWith("ion")) {
        sb.append("-ion");
      } else if (lowered.endsWith("er")) {
        sb.append("-er");
      } else if (lowered.endsWith("est")) {
        sb.append("-est");
      } else if (lowered.endsWith("ly")) {
        sb.append("-ly");
      } else if (lowered.endsWith("ity")) {
        sb.append("-ity");
      } else if (lowered.endsWith("y")) {
        sb.append("-y");
      } else if (lowered.endsWith("al")) {
        sb.append("-al");
      }
    }
    sb.toString;
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy