All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.alibaba.hologres.spark.sink.FieldWriter.scala Maven / Gradle / Ivy

The newest version!
package com.alibaba.hologres.spark.sink

import com.alibaba.hologres.client.model.Column
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.types.StringType

import java.sql.{Date, Timestamp, Types}
import java.time.LocalDate

trait FieldWriter {
  def writeValue(row: InternalRow, idx: Int): Any
}

class ShortFieldWriter extends FieldWriter {
  override def writeValue(row: InternalRow, idx: Int): Short = {
    row.getShort(idx)
  }
}

class IntFieldWriter extends FieldWriter {
  override def writeValue(row: InternalRow, idx: Int): Int = {
    row.getInt(idx)
  }
}

class LongFieldWriter extends FieldWriter {
  override def writeValue(row: InternalRow, idx: Int): Long = {
    row.getLong(idx)
  }
}

class FloatFieldWriter extends FieldWriter {
  override def writeValue(row: InternalRow, idx: Int): Float = {
    row.getFloat(idx)
  }
}

class DoubleFieldWriter extends FieldWriter {
  override def writeValue(row: InternalRow, idx: Int): Double = {
    row.getDouble(idx)
  }
}

class DecimalFieldWriter(precision: Int, scale: Int) extends FieldWriter {
  override def writeValue(row: InternalRow, idx: Int): java.math.BigDecimal = {
    row.getDecimal(idx, precision, scale).toJavaBigDecimal
  }
}

class BooleanFieldWriter extends FieldWriter {
  override def writeValue(row: InternalRow, idx: Int): Boolean = {
    row.getBoolean(idx)
  }
}

class StringFieldWriter(removeU0000: Boolean) extends FieldWriter {
  override def writeValue(row: InternalRow, idx: Int): String = {
    if (removeU0000) {
      row.getString(idx).replaceAll("\u0000", "")
    } else {
      row.getString(idx)
    }
  }
}

class DateFieldWriter extends FieldWriter {
  override def writeValue(row: InternalRow, idx: Int): Date = {
    Date.valueOf(LocalDate.ofEpochDay(row.getLong(idx)))
  }
}

class TimestampFieldWriter extends FieldWriter {
  override def writeValue(row: InternalRow, idx: Int): Timestamp = {
    new Timestamp(row.getLong(idx) / 1000)
  }
}

class BinaryFieldWriter extends FieldWriter {
  override def writeValue(row: InternalRow, idx: Int): Array[Byte] = {
    row.getBinary(idx)
  }
}

class IntArrayFieldWriter extends FieldWriter {
  override def writeValue(row: InternalRow, idx: Int): Array[Int] = {
    row.getArray(idx).toIntArray()
  }
}

class LongArrayFieldWriter extends FieldWriter {
  override def writeValue(row: InternalRow, idx: Int): Array[Long] = {
    row.getArray(idx).toLongArray()
  }
}

class FloatArrayFieldWriter extends FieldWriter {
  override def writeValue(row: InternalRow, idx: Int): Array[Float] = {
    row.getArray(idx).toFloatArray()
  }
}

class DoubleArrayFieldWriter extends FieldWriter {
  override def writeValue(row: InternalRow, idx: Int): Array[Double] = {
    row.getArray(idx).toDoubleArray()
  }
}

class BooleanArrayFieldWriter extends FieldWriter {
  override def writeValue(row: InternalRow, idx: Int): Array[Boolean] = {
    row.getArray(idx).toBooleanArray()
  }
}

class StringArrayFieldWriter extends FieldWriter {
  override def writeValue(row: InternalRow, idx: Int): Array[String] = {
    row.getArray(idx).toObjectArray(StringType).map(e => {
      // 与InternalRow get array 表现一致,当数组元素有null值时,使用默认值空字符串""
      if (e == null) {
        ""
      } else {
        e.toString
      }
    })
  }
}

object FieldWriterUtils {
  def createFieldWriter(holoColumn: Column, removeU0000: Boolean = false): FieldWriter = {
    holoColumn.getType match {
      case Types.TINYINT | Types.SMALLINT =>
        new ShortFieldWriter
      case Types.INTEGER =>
        new IntFieldWriter
      case Types.BIGINT =>
        new LongFieldWriter
      case Types.REAL | Types.FLOAT =>
        new FloatFieldWriter
      case Types.DOUBLE =>
        new DoubleFieldWriter
      case Types.NUMERIC | Types.DECIMAL =>
        new DecimalFieldWriter(holoColumn.getPrecision, holoColumn.getScale)
      case Types.BOOLEAN | Types.BIT =>
        new BooleanFieldWriter
      case Types.CHAR | Types.VARCHAR | Types.LONGVARCHAR =>
        new StringFieldWriter(removeU0000)
      case Types.DATE => new DateFieldWriter
      case Types.TIMESTAMP => new TimestampFieldWriter
      case Types.BINARY | Types.VARBINARY => new BinaryFieldWriter
      case Types.OTHER =>
        holoColumn.getTypeName match {
          case "json" | "jsonb" => new StringFieldWriter(removeU0000)
          case "roaringbitmap" => new BinaryFieldWriter
        }
      case Types.ARRAY =>
        holoColumn.getTypeName match {
          case "_int4" => new IntArrayFieldWriter
          case "_int8" => new LongArrayFieldWriter
          case "_float4" => new FloatArrayFieldWriter
          case "_float8" => new DoubleArrayFieldWriter
          case "_bool" => new BooleanArrayFieldWriter
          case "_varchar" | "_text" => new StringArrayFieldWriter
        }
      case _ =>
        throw new IllegalArgumentException(String.format("Hologres sink does not support data type %s for now", holoColumn.getTypeName))
    }
  }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy