org.emmalanguage.examples.imdb.GraphPreprocessing.scala Maven / Gradle / Ivy

Go to download

Show more of this group Show more artifacts with this name
Show all versions of emma-examples-library Show documentation

There is a newer version: 0.2.0

Show newest version

/*
 * Copyright © 2014 TU Berlin ([email protected])
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.emmalanguage
package examples.imdb

import api._
import examples.graphs.model._
import model._

@emma.lib
object GraphPreprocessing {

  type Proj[L] = DataBag[Collaboration] => L

  def apply[L: Meta.Tag](input: String, csv: CSV)(proj: Proj[L]): DataBag[LEdge[Person, L]] = {

    val people : DataBag[Person] = DataBag.readCSV[Person](s"$input/people.csv", csv)
    val movies : DataBag[Movie]  = DataBag.readCSV[Movie](s"$input/people.csv", csv)
    val credits: DataBag[Credit] = DataBag.readCSV[Credit](s"$input/people.csv", csv)

    val collaborations = for {
      pd <- people
      cd <- credits
      mv <- movies
      ca <- credits
      pa <- people
      if pd.id == cd.personID
      if mv.id == cd.movieID
      if mv.id == ca.movieID
      if pa.id == ca.personID
      if cd.creditType == "director" // CreditType.Director.toString
      if ca.creditType == "actor" // CreditType.Actor.toString
    } yield Collaboration(pd, cd, mv, ca, pa)

    for {
      Group((pd, pa), cs) <- collaborations.groupBy(c => (c.director, c.actor))
    } yield {
      LEdge(pd, pa, proj(cs))
    }
  }

  //----------------------------------------------------------------------------
  // helper model
  //----------------------------------------------------------------------------

  //@formatter:off
  case class Collaboration
  (
    director       : Person,
    creditDirector : Credit,
    movie          : Movie,
    creditActor    : Credit,
    actor          : Person
  )
  //@formatter:on

}