All Downloads are FREE. Search and download functionalities are using the official Maven repository.

uk.ac.starlink.ecsv.EcsvTableBuilder Maven / Gradle / Ivy

There is a newer version: 4.3
Show newest version
package uk.ac.starlink.ecsv;

import java.awt.datatransfer.DataFlavor;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.SequenceInputStream;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.logging.Logger;
import uk.ac.starlink.table.RowSequence;
import uk.ac.starlink.table.StarTable;
import uk.ac.starlink.table.StoragePolicy;
import uk.ac.starlink.table.TableFormatException;
import uk.ac.starlink.table.TableSink;
import uk.ac.starlink.table.formats.DocumentedTableBuilder;
import uk.ac.starlink.util.ConfigMethod;
import uk.ac.starlink.util.DataSource;
import uk.ac.starlink.util.IOUtils;

/**
 * TableBuilder implementation for ECSV tables.
 * The format currently supported is ECSV 1.0, as documented at
 * Astropy APE6
 * (V1.0 DOI:10.5281/zenodo.4792325).
 *
 * @author   Mark Taylor
 * @since    29 Apr 2020
 */
public class EcsvTableBuilder extends DocumentedTableBuilder {

    private final YamlParser yamlParser_;
    private String headerLoc_;
    private MessagePolicy colCheck_;
    private byte[] headerBuf_;

    private static final Logger logger_ =
        Logger.getLogger( "uk.ac.starlink.ecsv" );

    /**
     * Constructor.
     */
    public EcsvTableBuilder() {
        super( new String[] { "ecsv" } );
        yamlParser_ = new SnakeYamlParser();
        colCheck_ = MessagePolicy.WARN;
    }

    public String getFormatName() {
        return "ECSV";
    }

    public boolean canImport( DataFlavor flavor ) {
        return false;
    }

    /**
     * Sets the location of a header file which will be prepended to
     * any file read by this handler.  It can be ste to a file containing
     * an ECSV header, so that this handler can read plain CSV files
     * but include prepared YAML metadata.
     *
     * @param  headerLoc  filename or URL pointing to a header file
     */
    @ConfigMethod(
        property = "header",
        doc = "

Location of a file containing a header to be applied\n" + "to the start of the input file.\n" + "By using this you can apply your own ECSV-format metadata\n" + "to plain CSV files.\n" + "

", usage = "", example = "http://cdn.gea.esac.esa.int/Gaia/gedr3/ECSV_headers/" + "gaia_source.header", sequence = 1 ) public void setHeader( String headerLoc ) { headerLoc_ = headerLoc; } @ConfigMethod( property = "colcheck", doc = "

Determines the action taken if the columns named\n" + "in the YAML header differ from the columns named in the\n" + "first line of the CSV part of the file.\n" + "

", example = "FAIL", sequence = 2 ) public void setColcheck( MessagePolicy colCheck ) { colCheck_ = colCheck; } public void streamStarTable( InputStream in, TableSink sink, String pos ) throws IOException { try ( EcsvReader reader = createEcsvReader( in, colCheck_ ) ) { EcsvStarTable stMeta = new EcsvStarTable( reader.getMeta() ) { public RowSequence getRowSequence() { throw new UnsupportedOperationException(); } }; sink.acceptMetadata( stMeta ); while ( reader.next() ) { sink.acceptRow( reader.getRow() ); } sink.endRows(); } catch ( EcsvFormatException e ) { throw new TableFormatException( e.getMessage(), e ); } } public StarTable makeStarTable( final DataSource datsrc, boolean wantRandom, StoragePolicy storagePolicy ) throws IOException { if ( headerLoc_ == null && ! EcsvHeader.isMagic( datsrc.getIntro() ) ) { throw new TableFormatException( "No ECSV header" ); } EcsvMeta meta; try ( EcsvReader reader = createEcsvReader( datsrc.getInputStream(), colCheck_ ) ) { meta = reader.getMeta(); } EcsvColumn[] ecols = meta.getColumns(); for ( int ic = 0; ic < ecols.length; ic++ ) { EcsvColumn ecol = ecols[ ic ]; String msg = ecol.getDecoder().getWarning(); if ( msg != null ) { logger_.warning( "Column " + ecol.getName() + " (#" + ( ic + 1 ) + "): " + msg ); } } return new EcsvStarTable( meta ) { public RowSequence getRowSequence() throws IOException { final EcsvReader rdr = createEcsvReader( datsrc.getInputStream(), MessagePolicy.IGNORE ); return new RowSequence() { public boolean next() throws IOException { try { return rdr.next(); } catch ( EcsvFormatException e ) { throw new TableFormatException( e.getMessage(), e ); } } public Object[] getRow() { return rdr.getRow(); } public Object getCell( int icol ) { return rdr.getCell( icol ); } public void close() throws IOException { rdr.close(); } }; } }; } public String getXmlDescription() { return readText( "EcsvTableBuilder.xml" ); } public boolean canStream() { return true; } public boolean docIncludesExample() { return false; } /** * Returns an input stream which is based on the supplied stream, * but with the content of this handler's header file, if any, * inserted at the start. * * @param in base input stream * @return input stream with possible adjustment */ private InputStream applyHeader( InputStream in ) throws IOException { return headerLoc_ == null ? in : new SequenceInputStream( new ByteArrayInputStream( getHeaderBytes() ), in ); } /** * Returns the content of this handler's header file. * * @return lazily read header file content as a byte array */ private byte[] getHeaderBytes() throws IOException { if ( headerLoc_ == null ) { return new byte[ 0 ]; } else if ( headerBuf_ == null ) { ByteArrayOutputStream bout = new ByteArrayOutputStream(); IOUtils.copy( getInputStream( headerLoc_ ), bout ); headerBuf_ = bout.toByteArray(); } return headerBuf_; } /** * Returns an input stream specified by a string location. * * @param location filename or URL * @return input stream * @throws IOException if no such file */ private static InputStream getInputStream( String location ) throws IOException { File file = new File( location ); if ( file.exists() ) { return new FileInputStream( file ); } else { URL url; try { url = new URL( location ); } catch ( MalformedURLException e ) { String msg = "No file or URL \"" + location + "\""; throw new FileNotFoundException( msg ); } return url.openStream(); } } /** * Creates an EcsvReader given an input stream. * * @param in input stream * @param colCheck behviour for YAML/CSV column name mismatch * @return reader */ private EcsvReader createEcsvReader( InputStream in, MessagePolicy colCheck ) throws IOException { try { return new EcsvReader( applyHeader( in ), yamlParser_, colCheck ); } catch ( EcsvFormatException e ) { throw new TableFormatException( e.getMessage(), e ); } } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy