All Downloads are FREE. Search and download functionalities are using the official Maven repository.

cc.factorie.tutorial.RegressionExample.scala Maven / Gradle / Ivy

/* Copyright (C) 2008-2016 University of Massachusetts Amherst.
   This file is part of "FACTORIE" (Factor graphs, Imperative, Extensible)
   http://factorie.cs.umass.edu, http://github.com/factorie
   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License. */
package cc.factorie.tutorial

import java.io.File

import cc.factorie.app.regress.LinearRegressionTrainer
import cc.factorie.la.{DenseTensor1, Tensor1}
import cc.factorie.variable.{BinaryFeatureVectorVariable, CategoricalVectorDomain, DiffList, TensorVariable}

import scala.collection.mutable
import scala.io.Source

/**
 * An example of Linear Regression.  Tries to predict the hash value
 */
object RegressionExample {

  // input features
  object InputDomain extends CategoricalVectorDomain[String]
  class Input(file: File) extends BinaryFeatureVectorVariable[String] {
    def domain = InputDomain

    { // add all words in document to vector
      val text = Source.fromFile(file, "ISO-8859-1").mkString
      val words = """\w+""".r.findAllIn(text.trim)
      words.foreach{ word => this += word }
    }
  }

  class Output(val input: Input, val label: Double)(implicit d: DiffList = null) extends TensorVariable[Tensor1] {
    set(new DenseTensor1(1))
    value(0) = label
  }

  def main(args: Array[String]): Unit = {
    require(args.length == 2, "Usage: scala cc.factorie.tutorial.RegressionExample folder1/ folder2/")

    /** Load documents **/
    var outputs = mutable.ArrayBuffer[Output]()
    for ((directory, i) <- args.zipWithIndex) {
      for (file <- new File(directory).listFiles; if file.isFile) {
        outputs += new Output(new Input(file), (2 * i - 1) + math.random * 0.001)
      }
    }

    /** Run regression **/
    val regressor = LinearRegressionTrainer.train[Input, Output](outputs, {f => f.input}, 0.0)
  }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy