All Downloads are FREE. Search and download functionalities are using the official Maven repository.

scray.querying.source.AbstractHashJoinSource.scala Maven / Gradle / Ivy

There is a newer version: 1.1.2
Show newest version
//// See the LICENCE.txt file distributed with this work for additional
//// information regarding copyright ownership.
////
//// Licensed under the Apache License, Version 2.0 (the "License");
//// you may not use this file except in compliance with the License.
//// You may obtain a copy of the License at
////
//// http://www.apache.org/licenses/LICENSE-2.0
////
//// Unless required by applicable law or agreed to in writing, software
//// distributed under the License is distributed on an "AS IS" BASIS,
//// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
//// See the License for the specific language governing permissions and
//// limitations under the License.
//package scray.querying.source
//
//import com.twitter.concurrent.Spool
//import com.twitter.util.{Await, Future}
//import scala.annotation.tailrec
//import scray.querying.description.internal.Domain
//import scala.collection.mutable.ArraySeq
//import scala.collection.parallel.mutable.ParArray
//import scalax.collection.GraphEdge.DiEdge
//import scalax.collection.GraphPredef._
//import scalax.collection.immutable.Graph
//import scray.querying.description.{Column, CompositeRow, EmptyRow, Row, TableIdentifier}
//import scray.querying.planning.MergingResultSpool
//import scray.querying.queries.{DomainQuery, KeyBasedQuery}
//import scray.querying.description.ColumnOrdering
//import scray.querying.caching.Cache
//import scray.querying.caching.NullCache
//import org.slf4j.LoggerFactory
//import scala.collection.parallel.ForkJoinTaskSupport
//import scala.concurrent.forkjoin.ForkJoinPool
//import com.twitter.concurrent.Spool.LazyCons
//import com.twitter.concurrent.Spool.LazyCons
//import scala.collection.mutable.ArrayBuffer
//import scray.querying.queries.KeySetBasedQuery
//import scray.common.serialization.KryoPoolSerialization
//import java.io.FileOutputStream
//import com.esotericsoftware.kryo.io.Output
//import com.esotericsoftware.kryo.Kryo
//import scray.querying.description.RowColumn
//import scray.querying.caching.serialization.RowColumnSerialization
//import scray.querying.description.SimpleRow
//import scray.querying.caching.serialization.ColumnSerialization
//import scray.querying.caching.serialization.SimpleRowSerialization
//import scala.util.hashing.MurmurHash3
//import java.nio.ByteBuffer
//import com.esotericsoftware.minlog.Log
//import scray.querying.description.internal.CombinedIndexColumnMissingException
//import scray.querying.description.internal.SingleValueDomain
//import scray.querying.description.internal.RangeValueDomain
//import scray.querying.description.internal.ComposedMultivalueDomain
//import com.typesafe.scalalogging.slf4j.LazyLogging
//
///**
// * This hash joined source provides a template for implementing hashed-joins. This is a relational lookup.
// * 
// * The query Q1 of "source" yields a spool which can produce references of type R.
// * The query Q2 of "lookupSource" is then queried for R and the combined results are returned.
// * 
// * This is an inner join, but results must be filtered for EmptyRow.
// */
//abstract class AbstractHashJoinSource[Q <: DomainQuery, M, R /* <: Product */, V](
//    indexsource: LazySource[Q],
//    lookupSource: KeyedSource[R, V],
//    lookupSourceTable: TableIdentifier,
//    lookupkeymapper: Option[M => R] = None,
//    combinedIndexColumns: Set[Column] = Set(),
//    sequencedmapper: Option[Int] = None)
//  extends LazySource[Q] with LazyLogging {
//
//  import SpoolUtils._
//  
//  protected def tableTransformDomainColumn[T](domain: Domain[T], indexTable: TableIdentifier): Domain[T] = domain match {
//    case single: SingleValueDomain[T] => single.copy(column = Column(single.column.columnName, indexTable))
//    case range: RangeValueDomain[T] => range.copy(column = Column(range.column.columnName, indexTable))(range.ordering)
//    case composed: ComposedMultivalueDomain[T] => composed.copy(column = Column(composed.column.columnName, indexTable))
//  }
//  
//  protected def getCombinedIndexColumns(query: DomainQuery, indexTable: TableIdentifier): List[Domain[_]] = {
//    val combinedIndexDomains = query.getWhereAST.filter { domain => 
//      combinedIndexColumns.contains(domain.column) &&
//      domain.isInstanceOf[SingleValueDomain[_]] &&
//      !domain.asInstanceOf[SingleValueDomain[_]].isNull
//    }
//    // check that all predicates are there...
//    if(combinedIndexDomains.size != combinedIndexColumns.size) {
//      throw new CombinedIndexColumnMissingException(query)
//    }
//    // now transform request to target table (i.e. index table)
//    combinedIndexDomains.map(tableTransformDomainColumn(_, indexTable))
//  }
//  
//  /**
//   * transforms the spool and queries the lookup source.
//   */
//  @inline protected def spoolTransform(spool: () => Spool[Row], query: Q): Future[Spool[Row]] = {
//    @tailrec def insertRowsIntoSpool(rows: Array[Row], spoolElements: => Spool[Row]): Spool[Row] = {
//      if(rows.isEmpty) {
//        spoolElements
//      } else {
//        insertRowsIntoSpool(rows.tail, rows.head *:: Future.value(spoolElements))
//      }
//    }
//    sequencedmapper.flatMap { seqSize =>
//      lookupSource match {
//        case pls: ParallelizedKeyValueSource[R, V] => 
//          Some(Future.value(spool().mapBuffered(seqSize, bufferedseq => {
//            val joinables = bufferedseq.flatMap(in => getJoinablesFromIndexSource(in))
//            val setquery = new KeySetBasedQuery[R](
//              joinables.map(lookupTuple => lookupkeymapper.map(_(lookupTuple)).getOrElse(lookupTuple.asInstanceOf[R])).toSet,
//              lookupSourceTable,
//              lookupSource.getColumns,
//              query.querySpace,
//              query.querySpaceVersion,
//              query.getQueryID)
//            Await.result(pls.request(setquery))
//          })))
//        case _ => None
//      }
//    }.getOrElse {
//      // if a reasonable limit (say 1000, we try to process in parallel)
//      val pos = query.range.map { range => range.skip.getOrElse(0L) + range.limit.getOrElse(0L) }.getOrElse(Long.MaxValue)
//      if(pos > 1000L) {
//        spool().flatMap { in => 
//          val joinables = getJoinablesFromIndexSource(in)          
//          def joinableMapFunction: M => Option[Row] = { lookupTuple =>
//            val keyValueSourceQuery = new KeyBasedQuery[R](
//                lookupkeymapper.map(_(lookupTuple)).getOrElse(lookupTuple.asInstanceOf[R]),
//                lookupSourceTable,
//                lookupSource.getColumns,
//                query.querySpace,
//                query.querySpaceVersion,
//                query.getQueryID)
//              val seq = Await.result(lookupSource.request(keyValueSourceQuery))
//              if(seq.size > 0) {
//                val head = seq.head
//                Some(new CompositeRow(List(head, in))) 
//              } else { None }
//          }
//          // execute this flatMap on a parallel collection if it exceeds a limit of 20
//          if(joinables.size > 20) {
//            val parseq: Array[Future[Option[Row]]] = joinables.map(futureJoinableMapFunction(query, in)(_))
//            spoolRows(parseq)
//          } else {
//            // if there are less many entries we process it sequentially
//            val parseq: Array[Option[Row]] = joinables.map { joinableMapFunction(_) }
//            val results = parseq.filter(_.isDefined).map(_.get)
//            if(results.size > 0) {
//              // we can map the first one; the others must be inserted 
//              // into the spool before we return it
//              val sp = results(0) *:: Future.value(Spool.empty[Row])
//              if(results.size > 1) {
//                Future.value(insertRowsIntoSpool(results.tail, sp))
//              } else {
//                Future.value(sp)
//              }
//            } else {
//              Future.value(in *:: Future.value(Spool.empty[Row]))
//            }
//          }
//        }
//      } else {
//        spool().flatMap { in =>
//          val joinables = getJoinablesFromIndexSource(in)
//          val parseq: Array[Future[Option[Row]]] = joinables.map(futureJoinableMapFunction(query, in)(_))
//          spoolRows(parseq)
//        }
//      }
//    }
//  }
//
//  private def spoolRows(futures: Array[Future[Option[Row]]]): Future[Spool[Row]] = {
//    if(futures.isEmpty) {
//      Future.value(Spool.empty[Row])
//    } else {
//      futures.head.flatMap { optRow => 
//        optRow.map(row => Future.value(row *:: spoolRows(futures.tail))).getOrElse(spoolRows(futures.tail))
//      }
//    }
//  }
//
//  private def futureJoinableMapFunction(query: Q, in: => Row): M => Future[Option[Row]] = { (lookupTuple) => 
//    val keyValueSourceQuery = new KeyBasedQuery[R](
//      lookupkeymapper.map(_(lookupTuple)).getOrElse(lookupTuple.asInstanceOf[R]),
//      lookupSourceTable,
//      lookupSource.getColumns,
//      query.querySpace,
//      query.querySpaceVersion,
//      query.getQueryID)
//    val fseq = lookupSource.request(keyValueSourceQuery)
//    fseq.map { seq =>  
//      if(seq.size > 0) {
//        val head = seq.head
//        Some(new CompositeRow(List(head, in)))      
//      } else { None }
//    }
//  }
//  
//  /**
//   * Implementors can override this method to make transformations.
//   * Default is to perform no transformations.
//   */
//  @inline protected def transformIndexQuery(query: Q): Set[Q] = Set(query)
//  
//  /**
//   * Return a list of references into the lookupSource from an index row.
//   * Each one will create a new row in the result spool.
//   */
//  protected def getJoinablesFromIndexSource(index: Row): Array[M]
//  
//  override def request(query: Q): Future[Spool[Row]] = {
//    logger.debug(s"Joining in ${lookupSource.getDiscriminant} into ${indexsource.getDiscriminant} for ${query.getQueryID}")
//    val queries = transformIndexQuery(query)
//    val results = queries.par.map(mappedquery =>
//      indexsource.request(mappedquery).flatMap(spool => spoolTransform(() => spool, mappedquery)))
//    if(results.size > 1) {
//      if(isOrdered(query)) {
//        val rowComp = rowCompWithOrdering(query.ordering.get.column, query.ordering.get.ordering, query.ordering.get.descending)
//        MergingResultSpool.mergeOrderedSpools(Seq(), results.seq.toSeq, rowComp, query.ordering.get.descending, Array[Int]())
//      } else {
//        MergingResultSpool.mergeUnorderedResults(Seq(), results.seq.toSeq, Array[Int]())
//      }
//    } else {
//      results.head
//    }
//  }
//  
//  /**
//   * implementors provide information whether their ordering is the same as 
//   * or is a transformation of the one of the the original query.
//   */
//  @inline protected def isOrderedAccordingToOrignalOrdering(transformedQuery: Q, ordering: ColumnOrdering[_]): Boolean
//  
//  /**
//   * As lookupSource is always ordered, ordering depends only on the original source
//   * or on the transformed queries. If we have one unordered query everything is unordered.
//   */ 
//  @inline override def isOrdered(query: Q): Boolean = {
//    query.getOrdering match {
//      case Some(ordering) => transformIndexQuery(query).find(
//          q => !(indexsource.isOrdered(q) && isOrderedAccordingToOrignalOrdering(q, ordering))).isEmpty
//      case None => false
//    }
//  }
//  
//  /**
//   * Splits up the source graph into two sub-graphs
//   */
//  override def getGraph: Graph[Source[DomainQuery, Spool[Row]], DiEdge] = (indexsource.getGraph + 
//    DiEdge(indexsource.asInstanceOf[Source[DomainQuery, Spool[Row]]],
//    this.asInstanceOf[Source[DomainQuery, Spool[Row]]])) ++ 
//    (lookupSource.getGraph.asInstanceOf[Graph[Source[DomainQuery, Spool[Row]], DiEdge]] +
//    DiEdge(lookupSource.asInstanceOf[Source[DomainQuery, Spool[Row]]],
//    this.asInstanceOf[Source[DomainQuery, Spool[Row]]]))
//    
//  override def getDiscriminant: String = indexsource.getDiscriminant + lookupSource.getDiscriminant
//  
//  override def createCache: Cache[Nothing] = new NullCache
//}
//
//object SpoolUtils {
//  implicit class SpoolBufferExtender(val spool: Spool[Row]) {
//    val buffer = new ArrayBuffer
//    def mapBuffered(count: Int, f: Seq[Row] => Seq[Row]): Spool[Row] = {
//      // auto-buffer next count elements; eager fetch
//      @tailrec def fillSubBuffer(counter: Int, seq: Seq[Row], spool: => Spool[Row]): (Seq[Row], Spool[Row]) = {
//        if(counter <= 0 || spool.isEmpty) {
//          (seq, spool)
//        } else {
//          fillSubBuffer(counter - 1, seq :+ spool.head, Await.result(spool.tail))
//        }
//      }
//      if (spool.isEmpty) {
//        Spool.empty[Row]
//      } else {
//        // def _tail = spool.tail flatMap (tail => new SpoolBufferExtender(Await.result(tail)).mapBuffered(count, f)))
//        fillSubBuffer(count, Seq(), spool) match {
//          case (seq, reducedSpool) => rowSeqToSpool(f(seq)) ++ reducedSpool.mapBuffered(count, f)
//        }
//      }
//    }
//  }
//  
//  def rowSeqToSpool(seq: Seq[Row]): Spool[Row] = 
//    if (!seq.isEmpty)
//      seq.head *:: Future.value(rowSeqToSpool(seq.tail))
//    else
//      Spool.empty
//}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy