All Downloads are FREE. Search and download functionalities are using the official Maven repository.

cc.factorie.db.mongo.GraphLoader.scala Maven / Gradle / Ivy

/* Copyright (C) 2008-2014 University of Massachusetts Amherst.
   This file is part of "FACTORIE" (Factor graphs, Imperative, Extensible)
   http://factorie.cs.umass.edu, http://github.com/factorie
   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License. */
package cc.factorie.db.mongo

import cc.factorie.util.Cubbie
import annotation.tailrec
import scala.collection.{Map => GenericMap}


object GraphLoader {

  case class SlotInCollection[+R <: Cubbie](slot: Cubbie#AbstractRefSlot[R], coll: AbstractCubbieCollection[R])

  type Refs = GenericMap[Any, Cubbie]
  type Invs = Cubbie#InverseSlot[Cubbie] => Iterable[Cubbie]

  //Map from (cubbie class, attribute name, value) to the cubbies of that class with that attribute value
  type Index = GenericMap[(Class[Cubbie], String, Any), Iterable[Cubbie]]

  /**
   * Loads a cache from ids to cubbies based on the root objects and a neighborhood function.
   * @param roots the cubbies to start with.
   * @param neighbors the neigborhood function from cubbies to (refslot,collection) pairs.
   * @param maxDepth How far from the root are we allowed to travel. If maxDepth == 0 no ids are added to the cache, for
   *                 maxDepth == 1 only the roots are added to the cache, for maxDeptn == 2 the roots immediate children etc.
   * @param oldRefs an existing cache. Cubbies with ids in this cache will not be loaded/traversed.
   * @return a cache that maps ids to the cubbie objects in the graph defined by the roots, neighborhood function and
   *         maximum depth.
   */
  @tailrec
  def load(roots: TraversableOnce[Cubbie],
           neighbors: PartialFunction[Cubbie, Seq[SlotInCollection[Cubbie]]],
           maxDepth: Int = Int.MaxValue,
           oldRefs: Refs = Map.empty): Refs = {

    if (maxDepth == 0) {
      oldRefs
    }
    else if (maxDepth == 1) {
      //fill-up roots into refs
      oldRefs ++ roots.map(c => c.id -> c).toMap
    }
    else {
      //fill-up roots into refs
      val refs = oldRefs ++ roots.map(c => c.id -> c).toMap

      //mapping from collections to the ids that need to be loaded
      val colls2ids = new collection.mutable.HashMap[AbstractCubbieCollection[Cubbie], List[Any]]

      //gather ids to load for each collection
      for (c <- roots) {
        for (slots <- neighbors.lift(c)) {
          for (slot <- slots) {
            for (idRef <- slot.slot.opt) {
              if (!refs.isDefinedAt(idRef)) {
                colls2ids(slot.coll) = colls2ids.getOrElse(slot.coll, Nil) :+ idRef
              }
            }
          }
        }
      }

      //now do loading
      var loaded: List[Cubbie] = Nil
      for ((coll, ids) <- colls2ids) {
        loaded = loaded ++ coll.findByIds(ids).toList
      }

      //instantiate the yield
      if (loaded.size > 0) load(loaded, neighbors, maxDepth - 1, refs) else refs
    }

  }

  case class InvSlotInCollection[+R <: Cubbie](invSlot: Cubbie#AbstractInverseSlot[R], coll: AbstractCubbieCollection[R])


  //
  @tailrec
  def load2(roots: TraversableOnce[Cubbie],
            neighbors: PartialFunction[Cubbie, Seq[InvSlotInCollection[Cubbie]]],
            maxDepth: Int = Int.MaxValue,
            oldIndex: Index = Map.empty): Index = {

    if (maxDepth == 0) {
      oldIndex
    }
    else if (maxDepth == 1) {
      //fill-up roots into refs
      //todo: does this need to fill up inverse links too?
      oldIndex ++ roots.map(c => (c.cubbieClass, c.Id.name, c.id) -> Seq(c))
      //oldGraph.copy(refs = oldGraph.refs ++ roots.map(c => c.id -> c).toMap)
    }
    else {
      //fill-up roots into refs
      var graph = oldIndex ++ roots.map(c => (c.cubbieClass, c.Id.name, c.id) -> Seq(c))

      //mapping from collections and attributes to the values that need to be queried for.
      val collsAttr2ids = new collection.mutable.HashMap[(InvSlotInCollection[Cubbie], String), List[Any]]

      //gather queries to execute
      for (c <- roots) {
        for (slots <- neighbors.lift(c)) {
          for (slotAndColl <- slots) {
            val invSlot = slotAndColl.invSlot
            val foreignSlot = invSlot.foreignSlot(slotAndColl.coll.prototype)
            val foreignCubbieClass = foreignSlot.cubbie.getClass.asInstanceOf[Class[Cubbie]]
            for (target <- invSlot.target) {
              val attrName = foreignSlot.name
              if (!graph.isDefinedAt((foreignCubbieClass, attrName, target))) {
                collsAttr2ids(slotAndColl -> attrName) = collsAttr2ids.getOrElse(slotAndColl -> attrName, Nil) :+ target
              }
            }
          }
        }
      }
      //now do the loading
      //todo: for every loaded cubbie we should also add the (cubbie.class,"_id", cubbie.id) -> cubbie mapping
      var loaded: List[Cubbie] = Nil
      for (((coll, attrName), targets) <- collsAttr2ids) {
        val prototype = coll.coll.prototype
        val foreignClass = prototype.cubbieClass
        val result = coll.coll.findByAttribute(attrName, targets).toList
        //replace cubbies with already loaded cubbies with same id
        val deduped = result.map(c => {
          val existing = graph.get((foreignClass, prototype.Id.name, c.id))
          existing match {
            case Some(refs) =>
              refs.head
            case _ =>
              loaded = loaded :+ c
              graph = graph + ((foreignClass, prototype.Id.name, c.id) -> List(c))
              c
          }
        })
        val grouped = deduped.groupBy(c => {
          val foreignSlot = coll.invSlot.foreignSlot(c)
          val foreignValue = foreignSlot.value
          (foreignClass, attrName, foreignValue)
        })
        graph = graph ++ grouped
      }
      if (loaded.size > 0) load2(loaded, neighbors, maxDepth - 1, graph) else graph
    }

  }

  def toInverter(index:Index) = new IndexBasedInverter(index)
  def toRefs(index:Index) = index.filter(_._1._2 == "_id").map(pair => pair._1._3 -> pair._2.head)

  class IndexBasedInverter(index:Index) extends (Cubbie#InverseSlot[Cubbie] => Iterable[Cubbie]){
    val prototypeCache = new collection.mutable.HashMap[Manifest[Cubbie],Cubbie]

    def apply(v1: Cubbie#InverseSlot[Cubbie]) = {
      val prototype = prototypeCache.getOrElseUpdate(v1.manifest, v1.manifest.runtimeClass.newInstance().asInstanceOf[Cubbie])
      val prototypeClass = prototype.cubbieClass
      val attrName = v1.slot(prototype).name
      index((prototypeClass,attrName,v1.target.get))
    }
  }



}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy