All Downloads are FREE. Search and download functionalities are using the official Maven repository.

cc.factorie.app.bib.hcoref.Keystore.scala Maven / Gradle / Ivy

/* Copyright (C) 2008-2016 University of Massachusetts Amherst.
   This file is part of "FACTORIE" (Factor graphs, Imperative, Extensible)
   http://factorie.cs.umass.edu, http://github.com/factorie
   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License. */
package cc.factorie.app.bib.hcoref

import java.io.File

import cc.factorie.util.{DefaultCmdOptions, VectorUtils}

import scala.collection.JavaConverters._
import scala.io.Source

/**
 * @author John Sullivan
 */
trait Keystore {
  def dimensionality:Int
  def retrieve(key:String):Option[Array[Double]]

  val missingKeys = new java.util.concurrent.ConcurrentHashMap[String, Int]().asScala.withDefaultValue(0)

  import VectorUtils._
  def generateVector(keys:Iterable[String]):Array[Double] = keys.flatMap{ key =>
    val res = retrieve(key)
    if(res.isEmpty) {
      missingKeys += key -> (missingKeys(key) + 1)
    }
    res
  }.foldLeft(new Array[Double](dimensionality)){case (tot, arr) => tot += arr; tot}
}

trait InMemoryHashMapKeystoreOpts extends DefaultCmdOptions {
  val keystorePath = new CmdOption("embedding-file", "", "FILE", "The file which contains the embeddings", true)
  val keystoreDim = new CmdOption("embedding-dim", 200, "INT", "The number of dimensions in the embedding")
  val keystoreDelim = new CmdOption("embedding-delim", " ", "DELIMITER", "The delimiter between fields in the embedding file")
}

object InMemoryHashMapKeystore {
  def fromOpts(opts:InMemoryHashMapKeystoreOpts):InMemoryHashMapKeystore = new InMemoryHashMapKeystore(new File(opts.keystorePath.value), opts.keystoreDim.value, opts.keystoreDelim.value)
}

class InMemoryHashMapKeystore(embeddingFile:File, val dimensionality:Int, fileDelimiter:String = " ") extends Keystore {

  val src = Source.fromFile(embeddingFile)
  private val store = src.getLines().map { line =>
    val key :: vec = line.split(fileDelimiter).toList
    assert(vec.length == dimensionality)
    key -> vec.map(_.toDouble).toArray
  }.toMap
  src.close()

  def retrieve(key: String) = store.get(key)
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy