com.astrolabsoftware.sparkfits.FitsSchema.scala Maven / Gradle / Ivy
The newest version!
/*
* Copyright 2018 AstroLab Software
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.astrolabsoftware.sparkfits
import org.apache.spark.sql.types._
import com.astrolabsoftware.sparkfits.FitsLib.Fits
/**
* Object to handle the conversion from a HDU header to a DataFrame Schema.
*/
object FitsSchema {
/**
* Conversion from fits type to DataFrame Schema type.
* This can be used to set the name of a column and the type of elements
* in that column. Fits types nomenclature explained here:
* https://fits.gsfc.nasa.gov/standard30/fits_standard30.pdf
*
* @param name : (String)
* The name of the future column in the DataFrame
* @param fitstype : (String)
* The type of elements from the fits HEADER. See the link provided.
* @param isNullable : (Boolean)
* Column is nullable if True (default).
* @return a `StructField` containing name, type and isNullable informations.
*
*/
def ReadMyType(name : String, fitstype : String, isNullable : Boolean = true): StructField = {
// We make the difference between column with scalar numbers (I, E, K, ...) and
// columns with vectors of numbers (nI, nE, nK, ...).
fitstype match {
case x if fitstype == "I" => StructField(name, ShortType, isNullable)
case x if fitstype == "1I" => StructField(name, ShortType, isNullable)
case x if fitstype.contains("I") => StructField(name, ArrayType(ShortType), isNullable)
case x if fitstype == "J" => StructField(name, IntegerType, isNullable)
case x if fitstype == "1J" => StructField(name, IntegerType, isNullable)
case x if fitstype.contains("J") => StructField(name, ArrayType(IntegerType), isNullable)
case x if fitstype == "K" => StructField(name, LongType, isNullable)
case x if fitstype == "1K" => StructField(name, LongType, isNullable)
case x if fitstype.contains("K") => StructField(name, ArrayType(LongType), isNullable)
case x if fitstype == "E" => StructField(name, FloatType, isNullable)
case x if fitstype == "1E" => StructField(name, FloatType, isNullable)
case x if fitstype.contains("E") => StructField(name, ArrayType(FloatType), isNullable)
case x if fitstype == "D" => StructField(name, DoubleType, isNullable)
case x if fitstype == "1D" => StructField(name, DoubleType, isNullable)
case x if fitstype.contains("D") => StructField(name, ArrayType(DoubleType), isNullable)
case x if fitstype.contains("L") => StructField(name, BooleanType, isNullable)
case x if fitstype.contains("B") => StructField(name, ByteType, isNullable)
case x if fitstype.contains("X") => StructField(name, ArrayType(BinaryType), isNullable)
case x if fitstype.contains("A") => StructField(name, StringType, isNullable)
case _ => {
println(s"""FitsSchema.ReadMyType> Cannot infer type $fitstype from the header!
See com.astrolabsoftware.sparkfits.FitsSchema.scala
""")
StructField(name, StringType, isNullable)
}
}
}
/**
* Construct a list of `StructField` to be used to construct a DataFrame Schema.
* This routine is recursive. By default it includes all columns.
*
* @param fits : (Fits)
* Fits instance.
* @param col : (Int)
* The index of the column used for the recursion. Should be left at 0.
* @return a `List[StructField]` with informations about name and type for all columns.
*/
def ListOfStruct(fits : Fits, col : Int = 0) : List[StructField] = {
// Reset the cursor at header
fits.resetCursorAtHeader
// Read the header
val header = fits.blockHeader
checkAnyHeader(header)
if (fits.hdu.implemented){
fits.hdu.listOfStruct
}
else {
List[StructField]()
}
}
/**
* Retrieve DataFrame Schema from HDU header.
*
* @param fits : (Fits)
* Fits instance
* @return Return a `StructType` which contain a list of `StructField`
* with informations about name and type for all columns.
*
*/
def getSchema(fits : Fits) : StructType = {
// Construct the schema from the header.
StructType(ListOfStruct(fits))
}
/**
* Return schema for empty DataFrame
*
* @return Return a `StructType` with one entry stating nothing.
*
*/
def getEmptySchema : StructType = {
// Construct empty schema
StructType(StructField("empty", StringType, true) :: Nil)
}
/**
* A few checks on the header for any header type
*
* @param header : (Array[String])
* The header of the HDU.
*/
def checkAnyHeader(header : Array[String]) : Unit = {
// Check that we have an extension
// Do not raise an exception for primary header (containing SIMPLE but
// no XTENSION).
val keysHasXtension = header(0).contains("XTENSION") | header(0).contains("SIMPLE")
keysHasXtension match {
case true => keysHasXtension
case false => throw new AssertionError("""
Your header has no keywords called XTENSION.
Check that the HDU number you want to
access is correct: spark.readfits.option("HDU", ).
""")
}
// Check that header end.
val headerEND = header.reverse(0).contains("END")
headerEND match {
case true => headerEND
case false => throw new AssertionError("""
There is a problem with your HEADER. It should end with END.
Is it a standard header of size 2880 bytes? You should check it
using the option spark.readfits.option("verbose", true).
""")
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy