All Downloads are FREE. Search and download functionalities are using the official Maven repository.

uk.ac.starlink.ecsv.EcsvEncoder Maven / Gradle / Ivy

There is a newer version: 4.3
Show newest version
package uk.ac.starlink.ecsv;

import java.lang.reflect.Array;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
import uk.ac.starlink.table.ColumnInfo;
import uk.ac.starlink.table.Tables;

/**
 * Converts values for a given column into strings suitable for inclusion
 * in the body of an ECSV file.
 *
 * @author   Mark Taylor
 * @since    28 Apr 2020
 */
public abstract class EcsvEncoder {

    private final String datatype_;
    private final String subtype_;
    private static final String ECSV_TRUE = "True";
    private static final String ECSV_FALSE = "False";
    private static final String JSON_BLANK = "null";
    private static final String JSON_TRUE = "true";
    private static final String JSON_FALSE = "false";

    /**
     * Constructor.
     *
     * @param  datatype  datatype name as declared by ECSV
     * @param  subtype   subtype string as declared by ECSV, may be null
     */
    protected EcsvEncoder( String datatype, String subtype ) {
        datatype_ = datatype;
        subtype_ = subtype;
    }

    /**
     * Constructor with blank subtype.
     *
     * @param  datatype  datatype name as declared by ECSV
     */
    private EcsvEncoder( String datatype ) {
        this( datatype, null );
    }

    /**
     * Formats a value of this encoder's datatype for insertion into
     * the body of an ECSV file.  The output string must include any
     * required quoting, for instance it may not include unescaped
     * delimiters, newlines or double quote characters.
     * As a special case, null may be returned instead if no conversion
     * can be performed.
     *
     * @param   value   typed value to encode
     * @return   formatted string representation, or null
     */
    public abstract String encode( Object value );

    /**
     * Returns the ECSV datatype handled by this encoder.
     *
     * @return   datatype string
     */
    public String getDatatype() {
        return datatype_;
    }

    /**
     * Returns the ECSV subtype for this decoder.
     *
     * @return  subtype, may be null
     */
    public String getSubtype() {
        return subtype_;
    }

    /**
     * Returns an encoder suitable for encoding data from a column with
     * given metadata.
     *
     * @param  info  column metadata
     * @param  delimiter   delimiter value, must be comma or space,
     *                     required to determine quoting details
     * @return    encoder, or null for unknown or unsupported data type
     */
    public static EcsvEncoder createEncoder( ColumnInfo info,
                                             final char delimiter ) {
        Class clazz = info.getContentClass();
        if ( Boolean.class.equals( clazz ) ) {
            return new EcsvEncoder( "bool" ) {
                public String encode( Object value ) {
                    if ( Boolean.TRUE.equals( value ) ) {
                        return ECSV_TRUE;
                    }
                    else if ( Boolean.FALSE.equals( value ) ) {
                        return ECSV_FALSE;
                    }
                    else {
                        return null;
                    }
                }
            };
        }
        else if ( Byte.class.equals( clazz ) ) {
            return new EcsvEncoder( "int8" ) {
                public String encode( Object value ) {
                    return value instanceof Byte
                         ? ((Byte) value).toString()
                         : null;
                }
            };
        }
        else if ( Short.class.equals( clazz ) ) {
            boolean isUnsignedByte =
                Boolean.TRUE
               .equals( info.getAuxDatumValue( Tables.UBYTE_FLAG_INFO,
                                               Boolean.class ) );
            return new EcsvEncoder( isUnsignedByte ? "uint8" : "int16" ) {
                public String encode( Object value ) {
                    return value instanceof Short
                         ? ((Short) value).toString()
                         : null;
                }
            };
        }
        else if ( Integer.class.equals( clazz ) ) {
            return new EcsvEncoder( "int32" ) {
                public String encode( Object value ) {
                    return value instanceof Integer
                         ? ((Integer) value).toString()
                         : null;
                }
            };
        }
        else if ( Long.class.equals( clazz ) ) {
            return new EcsvEncoder( "int64" ) {
                public String encode( Object value ) {
                    return value instanceof Long
                         ? ((Long) value).toString()
                         : null;
                }
            };
        }
        else if ( Float.class.equals( clazz ) ) {
            return new EcsvEncoder( "float32" ) {
                public String encode( Object value ) {
                    if ( value instanceof Float ) {
                        Float fval = (Float) value;
                        float f = fval.floatValue();
                        if ( Float.isFinite( f ) ) {
                            return fval.toString();
                        }
                        else if ( Float.isNaN( f ) ) {
                            return "nan";
                        }
                        else if ( Float.isInfinite( f ) ) {
                            return f > 0 ? "inf" : "-inf";
                        }
                        else {
                            assert false;
                            return null;
                        }
                    }
                    else if ( value == null ) {
                        return "nan";
                    }
                    else {
                        return null;
                    }
                }
            };
        }
        else if ( Double.class.equals( clazz ) ) {
            return new EcsvEncoder( "float64" ) {
                public String encode( Object value ) {
                    if ( value instanceof Double ) {
                        Double dval = (Double) value;
                        double d = dval.doubleValue();
                        if ( Double.isFinite( d ) ) {
                            return dval.toString();
                        }
                        else if ( Double.isNaN( d ) ) {
                            return "nan";
                        }
                        else if ( Double.isInfinite( d ) ) {
                            return d > 0 ? "inf" : "-inf";
                        }
                        else {
                            assert false;
                            return null;
                        }
                    }
                    else if ( value == null ) {
                        return "nan";
                    }
                    else {
                        return null;
                    }
                }
            };
        }
        else if ( String.class.equals( clazz ) ) {
            return new EcsvEncoder( "string" ) {
                public String encode( Object value ) {
                    return value instanceof String
                         ? quoteString( (String) value, delimiter )
                         : null;
                }
            };
        }
        else if ( Character.class.equals( clazz ) ) {
            return new EcsvEncoder( "string" ) {
                public String encode( Object value ) {
                    return value instanceof Character
                         ? quoteString( ((Character) value).toString(),
                                        delimiter )
                         : null;
                }
            };
        }
        else if ( boolean[].class.equals( clazz ) ) {
            return createArrayEncoder( "bool", info.getShape(),
                                       boolean[].class, delimiter,
                ( arr, i ) -> arr[ i ] ? JSON_TRUE : JSON_FALSE
            );
        }
        else if ( byte[].class.equals( clazz ) ) {
            return createArrayEncoder( "int8", info.getShape(),
                                       byte[].class, delimiter,
                ( arr, i ) -> Byte.toString( arr[ i ] )
            );
        }
        else if ( short[].class.equals( clazz ) ) {
            boolean isUnsignedByte =
                Boolean.TRUE
               .equals( info.getAuxDatumValue( Tables.UBYTE_FLAG_INFO,
                                               Boolean.class ) );
            return createArrayEncoder( isUnsignedByte ? "uint8" : "int16",
                                       info.getShape(), short[].class,
                                       delimiter,
                ( arr, i ) -> Short.toString( arr[ i ] )
            );
        }
        else if ( int[].class.equals( clazz ) ) {
            return createArrayEncoder( "int32", info.getShape(), int[].class,
                                       delimiter,
                ( arr, i ) -> Integer.toString( arr[ i ] )
            );
        }
        else if ( long[].class.equals( clazz ) ) {
            return createArrayEncoder( "int64", info.getShape(), long[].class,
                                       delimiter,
                ( arr, i ) -> Long.toString( arr[ i ] )
            );
        }
        else if ( float[].class.equals( clazz ) ) {
            return createArrayEncoder( "float32", info.getShape(),
                                       float[].class, delimiter, ( arr, i ) -> {
                
                float f = arr[ i ];
                return Float.isNaN( f ) || Float.isInfinite( f )
                     ? JSON_BLANK
                     : Float.toString( f );
            } );
        }
        else if ( double[].class.equals( clazz ) ) {
            return createArrayEncoder( "float64", info.getShape(),
                                       double[].class, delimiter, ( arr, i ) ->{
                double d = arr[ i ];
                return Double.isNaN( d ) || Double.isInfinite( d )
                     ? JSON_BLANK
                     : Double.toString( d );
            } );
        }
        else if ( String[].class.equals( clazz ) ) {
            return createArrayEncoder( "string", info.getShape(),
                                       String[].class, delimiter,
                                       ( arr, i ) -> toJsonString( arr[ i ] ) );
        }
        else {
            return null;
        }
    }

    /**
     * Returns a suitably escaped value of a string, ensuring that no
     * ECSV control characters are present in the result.
     *
     * @param  txt  raw string
     * @param  delimiter   delimiter character to be avoided,
     *                     must be comma or space
     * @return   sanitised version of the string
     */
    public static String quoteString( String txt, char delimiter ) {
        int nc = txt.length();
        if ( nc == 0 ) {
            return delimiter == ' ' ? "\"\"" : "";
        }
        boolean needsQuote = false;
        int ndq = 0;
        for ( int ic = 0; ic < nc; ic++ ) {
            char c = txt.charAt( ic );
            switch ( c ) {
                case ' ':
                case ',':
                    needsQuote |= ( c == delimiter );
                    break;
                case '"':
                    ndq++;
                    needsQuote = true;
                    break;
                case '\n':
                    needsQuote = true;
                    break;
                default:
            }
        }
        if ( needsQuote ) {
            int nqc = 2 + nc + ndq;
            StringBuilder sbuf = new StringBuilder( nqc );
            sbuf.append( '"' );
            for ( int ic = 0; ic < nc; ic++ ) {
                char c = txt.charAt( ic );
                sbuf.append( c );
                if ( c == '"' ) {
                    sbuf.append( c );
                }
            }
            sbuf.append( '"' );
            assert sbuf.length() == nqc;
            return sbuf.toString();
        }
        else {
            return txt;
        }
    }

    /**
     * Encodes a string value to its JSON representation.
     *
     * @param  txt  input string
     * @return   JSON serialization, including surrounding quote characters
     */
    private static String toJsonString( String txt ) {
        if ( txt == null ) {
            return JSON_BLANK;
        }
        else {
            int leng = txt.length();
            StringBuffer sbuf = new StringBuffer( leng + 2 );
            sbuf.append( '"' );
            for ( int i = 0; i < leng; i++ ) {
                char c = txt.charAt( i );
                switch ( c ) {
                    case '"':
                        sbuf.append( "\\\"" );
                        break;
                    case '\\':
                        sbuf.append( "\\\\" );
                        break;
                    case '\b':
                        sbuf.append( "\\b" );
                        break;
                    case '\f':
                        sbuf.append( "\\f" );
                        break;
                    case '\n':
                        sbuf.append( "\\n" );
                        break;
                    case '\r':
                        sbuf.append( "\\r" );
                        break;
                    case '\t':
                        sbuf.append( "\\t" );
                        break;
                    default:
                        if ( c <= 0x7f ) {
                            sbuf.append( c );
                        }
                        else {
                            sbuf.append( "\\u" );
                            String hex = Integer.toHexString( c );
                            for ( int j = hex.length(); j < 4; j++ ) {
                                sbuf.append( '0' );
                            }
                            sbuf.append( hex );
                        }
                }
            }
            sbuf.append( '"' );
            return sbuf.toString();
        }
    }

    /**
     * Returns an encoder for array-valued types.
     *
     * @param   elementType  ECSV name for element type
     * @param   stilShape  STIL-format shape array
     * @param   aclazz    array class for output type
     * @param   delimiter   ECSV delimiter character
     * @param   elEnc   function for encoding a single element to JSON format
     * @return   array encoder instance, or null if it can't be done
     */
    private static  EcsvEncoder
            createArrayEncoder( String elementType, int[] stilShape,
                                Class aclazz, char delimiter,
                                final ElementEncoder elEnc ) {
        final String dimsTxt;
        final int nel;
        final int ndim;
        if ( stilShape == null || stilShape.length == 0 ) {
            dimsTxt = JSON_BLANK;
            nel = -1;
            ndim = 1;
        }
        else {
            int nd = stilShape.length;
            boolean isFixed = true;
            int n = 1;
            for ( int i = 0; i < nd; i++ ) {
                int dim = stilShape[ i ];
                isFixed = isFixed && dim > 0;
                n *= Math.abs( dim );
            }
            if ( isFixed ) {
                nel = n;
                dimsTxt = IntStream
                         .range( 0, nd )
                         .map( i -> stilShape[ nd - 1 - i ] )
                         .mapToObj( Integer::toString )
                         .collect( Collectors.joining( "," ) );
                ndim = nd;
            }
            else {
                nel = -1;
                dimsTxt = JSON_BLANK;
                ndim = 1;
            }
        }
        String subtype = elementType + "[" + dimsTxt + "]";
        if ( ndim == 1 ) {
            return new EcsvEncoder( "string", subtype ) {
                public String encode( Object value ) {
                    if ( aclazz.isInstance( value ) ) {
                        T tval = aclazz.cast( value );
                        int tleng = Array.getLength( tval );
                        int leng = nel > 0 ? nel : tleng;
                        StringBuffer sbuf = new StringBuffer();
                        sbuf.append( '[' );
                        for ( int i = 0; i < leng; i++ ) {
                            if ( i > 0 ) {
                                sbuf.append( ',' );
                            }
                            sbuf.append( i < tleng
                                             ? elEnc.elementToJson( tval, i )
                                             : JSON_BLANK );
                        }
                        sbuf.append( ']' );
                        return quoteString( sbuf.toString(), delimiter );
                    }
                    else {
                        return null;
                    }
                }
            };
        }
        else {
            return new EcsvEncoder( "string", subtype ) {
                public String encode( Object value ) {
                    if ( aclazz.isInstance( value ) ) {
                        T tval = aclazz.cast( value );
                        if ( Array.getLength( tval ) == nel ) {
                            StringBuffer sbuf = new StringBuffer();
                            appendElements( sbuf, tval, 0, stilShape, elEnc );
                            return quoteString( sbuf.toString(), delimiter );
                        }
                        else {
                            return null;
                        }
                    }
                    else {
                        return null;
                    }
                }
            };
        }
    }

    /**
     * Recursive routine for printing out elements of a multidimensional
     * array in JSON-friendly format.
     *
     * @param  sbuf  string buffer accumulating result
     * @param  array   input full array value, values stored in one dimension
     * @param  pos    array index of next element to be appended
     * @param  dims   multidimensional shape of array
     * @param   elEnc   function for encoding a single element to JSON format
     * @return   updated array index of next element to be appended
     */
    private static  int appendElements( StringBuffer sbuf, T array,
                                           int pos, int[] dims,
                                           ElementEncoder elEnc ) {
        sbuf.append( '[' );
        int ndim = dims.length;
        int limit = dims[ ndim - 1 ];
        if ( ndim == 1 ) {
            for ( int i = 0; i < limit; i++ ) {
                if ( i > 0 ) {
                    sbuf.append( ',' );
                }
                sbuf.append( elEnc.elementToJson( array, pos++ ) );
            }
        }
        else {
            int[] subdims = new int[ ndim - 1 ];
            System.arraycopy( dims, 0, subdims, 0, ndim - 1 );
            for ( int i = 0; i < limit; i++ ) {
                if ( i > 0 ) {
                    sbuf.append( ',' );
                }
                pos = appendElements( sbuf, array, pos, subdims, elEnc );
            }
        }
        sbuf.append( ']' );
        return pos;
    }

    /**
     * Defines how to write an array element.
     */
    @FunctionalInterface
    private static interface ElementEncoder {

        /**
         * Serializes a single element of an array to a JSON scalar
         * representation.
         *
         * @param  array  input array
         * @param  index  position of element to output
         * @return  JSON representation of element
         */
        String elementToJson( T array, int index );
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy