All Downloads are FREE. Search and download functionalities are using the official Maven repository.

uk.ac.starlink.table.formats.TstStarTable Maven / Gradle / Ivy

package uk.ac.starlink.table.formats;

import java.io.IOException;
import java.io.InputStream;
import java.io.PushbackInputStream;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.StringTokenizer;
import java.util.logging.Logger;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import uk.ac.starlink.table.ColumnInfo;
import uk.ac.starlink.table.DefaultValueInfo;
import uk.ac.starlink.table.DescribedValue;
import uk.ac.starlink.table.TableFormatException;
import uk.ac.starlink.table.ValueInfo;
import uk.ac.starlink.util.DataSource;


/**
 * StarTable implementation which reads tables in Tab-Separated Table format.
 * This is used by GAIA/SkyCat amongst other software.
 * Documentationof the format can be found in Starlink System Note 75
 * (SSN/75).
 *
 * @author   Mark Taylor
 * @since    1 Aug 2006
 */
class TstStarTable extends StreamStarTable {

    private int ncol_;

    private static final Pattern COMMENT_REGEX =
        Pattern.compile( "^\\s*#.*" );
    private static final Pattern BLANK_REGEX =
        Pattern.compile( "^\\s*$" );
    private static final Pattern RULER_REGEX =
        Pattern.compile( "^[\\t\\-]*-[\\t\\-]*$" );
    private static final Pattern PARAM_REGEX =
        Pattern.compile( "^(\\S+):\\s*(.*)" );
    private static final Pattern EOD_REGEX =
        Pattern.compile( "^\\s*\\[EOD\\]\\s*$" );

    private static final Logger logger_ =
        Logger.getLogger( "uk.ac.starlink.table.formats" );

    /** Key for parameter composed of freeform description lines. */
    public static final ValueInfo DESCRIPTION_INFO =
        new DefaultValueInfo( "Description", String.class, 
                              "Free text description of the table" );

    /**
     * Constructor.
     *
     * @param  datsrc  data source for table text
     */
    public TstStarTable( DataSource datsrc )
            throws IOException, TableFormatException {
        super();
        init( datsrc );
    }

    protected RowEvaluator.Metadata obtainMetadata()
            throws TableFormatException, IOException {

        /* Get an input stream. */
        PushbackInputStream in = super.getInputStream();

        /* Read all the text before the data itself. */
        List lineList = readHeaderLines( in );

        /* Acquire and validate the column names. */
        String ruler = lineList.remove( lineList.size() - 1 );
        String colsLine = lineList.remove( lineList.size() - 1 );
        assert RULER_REGEX.matcher( ruler ).matches();
        List colNames = tabSplit( colsLine );

        /* SExtractor likes to add a trailing TAB to this line. */
        if ( colNames.get( colNames.size() - 1 ).length() == 0 ) {
            colNames.remove( colNames.size() - 1 );
        }
        ncol_ = colNames.size();
        if ( colNames.size() != ncol_ ) {
            throw new TableFormatException( "Ruler line and column name line "
                                          + "have different numbers of tabs" );
        }

        /* Get table title from first line. */
        String title = null;
        if ( ! lineList.isEmpty() ) {
            String line0 = lineList.get( 0 );
            if ( ! COMMENT_REGEX.matcher( line0 ).matches() &&
                 ! BLANK_REGEX.matcher( line0 ).matches() &&
                 ! PARAM_REGEX.matcher( line0 ).matches() ) {
                setName( lineList.remove( 0 ).trim() );
            }
        }

        /* Set default values for special column indices. */
        int idIndex = ncol_ > 0 ? 0 : -1;
        int raIndex = ncol_ > 1 ? 1 : -1;
        int decIndex = ncol_ > 2 ? 2 : -1;
        int xIndex = -1;
        int yIndex = -1;

        /* Read table parameters. */
        List paramList = new ArrayList();
        for ( Iterator it = lineList.iterator(); it.hasNext(); ) {
            String line = it.next();
            Matcher pmatcher = PARAM_REGEX.matcher( line );
            if ( ! COMMENT_REGEX.matcher( line ).matches() &&
                 pmatcher.matches() ) {
                String pname = pmatcher.group( 1 );
                String pval = pmatcher.group( 2 );
                if ( "id_col".equalsIgnoreCase( pname ) ) {
                    idIndex = parseColumnIndex( pval, ncol_ );
                    it.remove();
                }
                else if ( "ra_col".equalsIgnoreCase( pname ) ) {
                    raIndex = parseColumnIndex( pval, ncol_ );
                    it.remove();
                }
                else if ( "dec_col".equalsIgnoreCase( pname ) ) {
                    decIndex = parseColumnIndex( pval, ncol_ );
                    it.remove();
                }
                else if ( "x_col".equalsIgnoreCase( pname ) ) {
                    xIndex = parseColumnIndex( pval, ncol_ );
                    it.remove();
                }
                else if ( "y_col".equalsIgnoreCase( pname ) ) {
                    yIndex = parseColumnIndex( pval, ncol_ );
                    it.remove();
                }
                else {
                    DescribedValue param = createDescribedValue( pname, pval );
                    if ( param != null ) {
                        paramList.add( param );
                    }
                    it.remove();
                }
            }
        }

        /* Get table description from all the other comment lines. */
        StringBuffer descBuf = new StringBuffer();
        for ( String line : lineList ) {
            if ( ! COMMENT_REGEX.matcher( line ).matches() &&
                 ! BLANK_REGEX.matcher( line ).matches() ) {
                if ( descBuf.length() != 0 ) {
                    descBuf.append( '\n' );
                }
                descBuf.append( line );
            }
        }
        String description = descBuf.toString();
        if ( description.trim().length() > 0 ) {
            paramList.add( new DescribedValue( DESCRIPTION_INFO,
                                               description ) );
        }
        setParameters( paramList );

        /* Now read through all the data rows to find out what kind of
         * values each column contains. */
        RowEvaluator evaluator = new RowEvaluator( ncol_ );
        for ( List row; ( row = readRow( in ) ) != null; ) {
            evaluator.submitRow( row );
        }
        RowEvaluator.Metadata metadata = evaluator.getMetadata();

        /* Doctor the column infos according to the information we have
         * acquired. */
        ColumnInfo[] colInfos = metadata.colInfos_;
        for ( int icol = 0; icol < ncol_; icol++ ) {
            colInfos[ icol ].setName( colNames.get( icol ) );
        }
        if ( raIndex >= 0 ) {
            ColumnInfo info = colInfos[ raIndex ];
            info.setUCD( "pos.eq.ra" );
            if ( Number.class.isAssignableFrom( info.getContentClass() ) ) {
                info.setUnitString( "deg" );
            }
        }
        if ( decIndex >= 0 ) {
            ColumnInfo info = colInfos[ decIndex ];
            info.setUCD( "pos.eq.dec" );
            if ( Number.class.isAssignableFrom( info.getContentClass() ) ) {
                info.setUnitString( "deg" );
            }
        }
        if ( idIndex >= 0 ) {
            ColumnInfo info = colInfos[ idIndex ];
            colInfos[ idIndex ].setUCD( "meta.id" );
        }
        if ( xIndex >= 0 ) {
            colInfos[ xIndex ].setUCD( "pos.cartesian.x" );
        }
        if ( yIndex >= 0 ) {
            colInfos[ yIndex ].setUCD( "pos.cartesian.y" );
        }
        return metadata;
    }

    protected PushbackInputStream getInputStream() throws IOException {

        /* Skip the header lines before returning the superclass implementation
         * stream. */
        PushbackInputStream in = super.getInputStream();
        readHeaderLines( in );
        return in;
    }

    @SuppressWarnings("fallthrough")
    protected List readRow( PushbackInputStream in )
            throws TableFormatException, IOException {
        StringBuffer sbuf = new StringBuffer();
        String line = null;
        while( line == null ) {
            char c = (char) in.read();
            switch ( c ) {
                case END:
                    if ( sbuf.length() == 0 ) {
                        return null;
                    }
                    // fall through
                case '\r':
                case '\n':
                    if ( sbuf.length() > 0 ) {
                        line = sbuf.toString();
                    }
                    break;
                default:
                    sbuf.append( c );
            }
        }

        /* Check for End Of Data marker. */
        if ( EOD_REGEX.matcher( line ).matches() ) {
            return null;
        }

        /* Split the line into fields. */
        List words = tabSplit( line );

        /* SExtractor likes to put a trailing tab at the end of each line. */
        if ( words.size() == ncol_ + 1 && words.get( ncol_ ).length() == 0 ) {
            words.remove( ncol_ );
        }

        /* Check the number of fields and return if OK. */
        if ( words.size() != ncol_ ) {
            throw new TableFormatException( "Wrong number of fields ("
                                          + words.size() + " != " + ncol_
                                          + ") for line: " + line );
        }
        return words;
    }

    /**
     * Reads all header the lines up to and including the last pre-data one
     * which consists only of ----'s and tabs.
     * An array of all the header lines is returned.
     *
     * @param   in  input stream
     * @return  list of strings containing header lines
     */
    private static List readHeaderLines( InputStream in ) 
            throws TableFormatException, IOException {
        List lineList = new ArrayList();
        while ( lineList.size() < 10000 ) {
            String line = readHeaderLine( in );
            lineList.add( line );
            if ( RULER_REGEX.matcher( line ).matches() ) {
                return lineList;
            }
        }
        throw new TableFormatException( "Header looks too long for TST" );
    }

    /**
     * Reads a line of text from an input stream.
     *
     * @param  in  input stream
     * @return  line (excluding terminators)
     */
    @SuppressWarnings("fallthrough")
    private static String readHeaderLine( InputStream in )
            throws TableFormatException, IOException {
        StringBuffer sbuf = new StringBuffer();
        while ( sbuf.length() < 1024 * 1024 ) {
            char c = (char) in.read();
            switch ( c ) {
                case END:
                    if ( sbuf.length() == 0 ) {
                        throw new TableFormatException( "No TST rows" );
                    }
                    // fall through
                case '\r':
                case '\n':
                    return sbuf.toString();
                default:
                    sbuf.append( c );
            }
        }
        throw new TableFormatException( "Too long for a line in a TST table" );
    }

    /**
     * Splits a line of text using tab characters as delimiters.
     * 
     * @param   line  line of text
     * @return  list of strings constituting the tab-separated tokens
     */
    private static List tabSplit( String line ) {
        List fields = new ArrayList();
        for ( int start = 0; start >= 0; ) {
            int end = line.indexOf( '\t', start );
            if ( end >= 0 ) {
                fields.add( line.substring( start, end ) );
                start = end + 1;
            }
            else {
                fields.add( line.substring( start ) );
                break;
            }
        }
        return fields;
    }

    /**
     * Returns the column index represented by a string.
     * If it looks wrong, a warning will be logged and -1 will be returned.
     *
     * @param   txt  column index representation
     * @param   ncol  number of columns in table
     * @return  column index or -1
     */
    private static int parseColumnIndex( String txt, int ncol ) {
        Integer index;
        try {
            int ix = Integer.parseInt( txt.trim() );
            index = ( ix >= -1 && ix < ncol ) ? new Integer( ix ) : null;
        }
        catch ( NumberFormatException e ) {
            index = null;
        }
        if ( index == null ) {
            logger_.warning( "Bad value \"" + txt + "\" for column index - "
                           + "using -1" );
            return -1;
        }
        else {
            return index.intValue();
        }
    }

    /**
     * Turns a name, value pair into a DescribedValue.  Makes a guess about
     * the data type of the value on the basis of what it looks like.
     *
     * @param  name  parameter name
     * @param  sval  parameter string value
     * @return  described value
     */
    private static DescribedValue createDescribedValue( String name,
                                                        String sval ) {
        RowEvaluator re1 = new RowEvaluator( 1 );
        try {
            re1.submitRow( Collections.singletonList( sval ) );
            RowEvaluator.Metadata meta1 = re1.getMetadata();
            DefaultValueInfo info =
                new DefaultValueInfo( meta1.colInfos_[ 0 ] );
            info.setName( name );
            Object value = sval == null || sval.trim().length() == 0 
                         ? null
                         : meta1.decoders_[ 0 ].decode( sval );
            return new DescribedValue( info, value );
        }
        catch ( TableFormatException e ) {   // unlikely
            logger_.warning( "Failed to parse parameter "
                           + name + ": " + sval );
            return null;
        }
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy