All Downloads are FREE. Search and download functionalities are using the official Maven repository.

edu.arizona.sista.struct.Lexicon.scala Maven / Gradle / Ivy

package edu.arizona.sista.struct

import java.io._

import edu.arizona.sista.struct.Lexicon.logger
import edu.arizona.sista.utils.Files
import org.slf4j.LoggerFactory

import scala.Serializable

/**
 * Generic lexicon: maps objects of type T to Ints, both ways
 * User: mihais
 * Date: 3/1/13
 */
class Lexicon[T] extends Serializable {
  private var lexicon = new collection.mutable.HashMap[T, Int]
  private var index = new collection.mutable.ArrayBuffer[T]()

  /** Counts the number of objects that were saved by not storing strings repeatedly */
  private var savedMemory = 0

  /**
   * Adds a string to the lexicon without adding it twice if it already exists
   */
  def add(s:T):Int = {
    if (lexicon.contains(s)) {
      savedMemory += 1
      lexicon.get(s).get
    } else {
      val i = index.size
      lexicon += s -> i
      index += s
      i
    }
  }

  def exists(i:Int):Boolean = i < index.size

  /**
   * Fetches the string with the given index from the lexicon
   * This deliberately does NOT check for bounds for fast access.
   * Use exists() if you want to make sure that your index exists.
   * @param i Index of the string in the lexicon
   * @return The string corresponding to the index
   */
  def get(i:Int):T = index(i)

  def get(w:T):Option[Int] = lexicon.get(w)

  def size = lexicon.size

  override def toString:String = {
    val os = new StringBuilder
    for(w <- lexicon.keySet) {
      os.append(w + " -> " + lexicon.get(w) + "\n")
    }
    os.toString()
  }

  def keySet = lexicon.keySet

  def stats() {
    logger.info("Stored objects: " + index.size)
    logger.info(" Saved objects: " + savedMemory + " (" + (100.0 * savedMemory / (savedMemory + lexicon.size)) + "%)")
  }

  def saveTo(fileName:String) {
    val w = new BufferedWriter(new FileWriter(fileName))
    saveTo(w)
    w.close()
  }

  def saveTo(w:Writer) {
    val p = Files.toPrintWriter(w)
    p.println(index.size)
    if(index.size > 0) {
      val first = index(0)
      first match {
        // TODO: kinda hacky, but don't know how to recover from type erasure in loadFrom()...
        case i: Int => p.println("I")
        case d: Double => p.println("D")
        case s: String => p.println("S")
        case _ => throw new RuntimeException("ERROR: unknown type in lexicon!")
      }
    } else {
      p.println("S") // this does not matter
    }
    for(i <- 0 until index.size) {
      p.println(s"$i ${index(i)}")
    }
  }
}

object Lexicon {
  val logger = LoggerFactory.getLogger(classOf[Lexicon[String]])

  /** Copy constructor for Lexicon */
  def apply[T](other:Lexicon[T]):Lexicon[T] = {
    val lex = new Lexicon[T]
    for(w <- other.lexicon.keySet)
      lex.lexicon += w -> other.lexicon.get(w).get
    for(i <- 0 until other.index.size)
      lex.index += other.index(i)
    lex
  }

  /** Loads a lexicon saved by Lexicon.saveTo */
  def loadFrom[F](fileName:String):Lexicon[F] = {
    val is = new BufferedReader(new FileReader(fileName))
    val lex = loadFrom[F](is)
    is.close()
    lex
  }

  def loadFrom[F](r:Reader):Lexicon[F] = {
    val reader = Files.toBufferedReader(r)
    val lex = new Lexicon[F]
    var line = reader.readLine()
    val size = line.trim.toInt
    val ftype = reader.readLine() // type of features in lexicon
    //println(s"SIZE = $size FTYPE = $ftype")
    assert(size >= 0)
    for(i <- 0 until size) {
      line = reader.readLine().trim
      //println("LEX LINE = " + line)
      val space = line.indexOf(' ')
      assert(space > 0)
      val index = line.substring(0, space).toInt
      assert(index == i)
      val f = line.substring(space + 1)
      ftype match {
        case "S" => lex.add(f.asInstanceOf[F])
        case "I" => lex.add(f.toInt.asInstanceOf[F])
        case "D" => lex.add(f.toDouble.asInstanceOf[F])
        case _ => throw new RuntimeException("ERROR: unknown type in lexicon!")
      }
    }
    lex
  }
}





© 2015 - 2024 Weber Informatics LLC | Privacy Policy