All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.spark.aliyun.odps.datasource.ODPSRelation.scala Maven / Gradle / Ivy

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.spark.aliyun.odps.datasource

import com.aliyun.odps.Odps
import com.aliyun.odps.account.AliyunAccount
import com.aliyun.odps.tunnel.TableTunnel
import org.apache.spark.aliyun.utils.OdpsUtils
import org.apache.spark.internal.Logging
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.types._
import org.apache.spark.sql._
import org.apache.spark.sql.sources.{BaseRelation, Filter, PrunedFilteredScan}

case class ODPSRelation(
    accessKeyId: String,
    accessKeySecret: String,
    odpsUrl: String,
    tunnelUrl: String,
    project: String,
    table: String,
    partitionSpec: String,
    numPartitions: Int)(@transient val sqlContext: SQLContext)
  extends BaseRelation
  with PrunedFilteredScan with Logging {

  @transient val account = new AliyunAccount(accessKeyId, accessKeySecret)
  @transient val odps = new Odps(account)

  odps.setEndpoint(odpsUrl)
  @transient val tunnel = new TableTunnel(odps)
  tunnel.setEndpoint(tunnelUrl)
  @transient val odpsUtils = new OdpsUtils(odps)

  override val needConversion: Boolean = false

  override val schema: StructType = {
    val tableSchema = odpsUtils.getTableSchema(project, table, false)

    StructType(
      tableSchema.map(e => odpsUtils.getCatalystType(e._1, e._2, true))
    )
  }

  private val isPartitionTable = odpsUtils.isPartitionTable(table, project)

  override def buildScan(requiredColumns: Array[String], filters: Array[Filter]): RDD[Row] = {
    // Rely on a type erasure hack to pass RDD[InternalRow] back as RDD[Row]
    val parSpec = if (!isPartitionTable) "Non-Partitioned"
    else if (isPartitionTable && partitionSpec == null) "all"
    else partitionSpec

    val requiredSchema = StructType(requiredColumns.map(c => schema.fields(schema.fieldIndex(c))))
    val rdd = if (!parSpec.equals("all")) {
      new ODPSRDD(sqlContext.sparkContext,
        requiredSchema, accessKeyId,
        accessKeySecret,
        odpsUrl,
        tunnelUrl,
        project,
        table,
        parSpec,
        numPartitions)
    } else {
      val parts = odpsUtils.getAllPartitionSpecs(table, project)
      if (parts.nonEmpty) {
        parts.map {
          ptSpec =>
            new ODPSRDD(sqlContext.sparkContext,
              requiredSchema, accessKeyId,
              accessKeySecret,
              odpsUrl,
              tunnelUrl,
              project,
              table,
              ptSpec.toString,
              numPartitions)

        }.map(_.asInstanceOf[RDD[Row]]).reduce((r1, r2) => r1.union(r2))
      } else sqlContext.sparkContext.emptyRDD
    }

    rdd.asInstanceOf[RDD[Row]]
  }

  override def toString: String = {
    // credentials should not be included in the plan output, table information is sufficient.
    s"ODPSRelation($table)"
  }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy