gov.nasa.pds.objectAccess.example.ExtractTable Maven / Gradle / Ivy

Show more of this group Show more artifacts with this name
Show all versions of pds4-jparser Show documentation
This is the parser library for the PDS4 planetary data standard.
There is a newer version: 2.8.4
// Copyright 2019, California Institute of Technology ("Caltech").
// U.S. Government sponsorship acknowledged.
//
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// * Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistributions must reproduce the above copyright notice, this list of
// conditions and the following disclaimer in the documentation and/or other
// materials provided with the distribution.
// * Neither the name of Caltech nor its operating division, the Jet Propulsion
// Laboratory, nor the names of its contributors may be used to endorse or
// promote products derived from this software without specific prior written
// permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
// POSSIBILITY OF SUCH DAMAGE.

package gov.nasa.pds.objectAccess.example;

import gov.nasa.arc.pds.xml.generated.FileAreaObservational;
import gov.nasa.arc.pds.xml.generated.ProductObservational;
import gov.nasa.arc.pds.xml.generated.TableCharacter;
import gov.nasa.arc.pds.xml.generated.TableDelimited;
import gov.nasa.pds.label.object.FieldDescription;
import gov.nasa.pds.label.object.TableRecord;
import gov.nasa.pds.objectAccess.ExporterFactory;
import gov.nasa.pds.objectAccess.ObjectAccess;
import gov.nasa.pds.objectAccess.ObjectProvider;
import gov.nasa.pds.objectAccess.TableReader;
import gov.nasa.pds.objectAccess.utility.Utility;

import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.net.MalformedURLException;
import java.net.URISyntaxException;
import java.net.URL;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.CommandLineParser;
import org.apache.commons.cli.GnuParser;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.Option;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;

import com.opencsv.exceptions.CsvValidationException;

/**
 * Implements a table extraction application. Uses the Apache
 * Jakarta Commons CLI library to parse the command line.
 */
public class ExtractTable {

	private static final String HELP_OPTION = "help";

	private static final String EXTRACT_ALL = "all";
	
	private static final String LIST_TABLES_OPTION = "list-tables";

	private static final String FIELDS_OPTION = "fields";

	private static final String INDEX_OPTION = "index";
	
	private static final String DATA_FILE_OPTION = "data-file";

	private static final String OUTPUT_FILE_OPTION = "output-file";

	private static final String CSV_OPTION = "csv";

	private static final String FIXED_WIDTH_OPTION = "fixed-width";

	private static final String FIELD_SEPARATOR_OPTION = "field-separator";

	private static final String QUOTE_CHARACTER_OPTION = "quote-character";

	private static final String PLATFORM_OPTION = "platform";

	private static final String UNIX_OPTION = "unix";

	private static final String WINDOWS_OPTION = "windows";

	/** A system property name for setting the program name in the
	 * usage message.
	 */
	private static final String PROGRAM_NAME = "pds4.tools.progname";

	private Options options;

	private boolean listTables;
	private boolean extractAll;
	private URL labelUrl;
	private File outputFile;
	private File dataFile;
	private PrintWriter out;
	private OutputFormat format;
	private String fieldSeparator;
	private String lineSeparator;
	private String quoteCharacter;
	private Pattern quoteCharacterPattern;
	private int tableIndex;
	private String[] requestedFields;

	/**
	 * Runs the application with given command-line arguments.
	 *
	 * @param args the command-line arguments
	 * @throws CsvValidationException 
	 */
	public static void main(String[] args) throws CsvValidationException {
		(new ExtractTable()).run(args);
	}

	/**
	 * Creates a new instance of the extraction object. Sets up
	 * the command-line options.
	 */
	public ExtractTable() {
		options = new Options();

		options.addOption("h", HELP_OPTION, false, "show help text");

		options.addOption("l", LIST_TABLES_OPTION, false, "list tables present in the product (overrides all but output file options)");

		Option tableIndex = new Option("n", INDEX_OPTION, true, "table index, if more than one table is present (1..N) (default is 1)");
		tableIndex.setArgName("NUMBER");
		options.addOption(tableIndex);

		Option dataFile = new Option("d", DATA_FILE_OPTION, true, "data file name, if more than one data file is present (default is the first one listed)");
		dataFile.setArgName("FILE");
		options.addOption(dataFile);
		
		Option fields = new Option("f", FIELDS_OPTION, true, "comma-separated list of field names or numbers (default is all fields)");
		fields.setArgName("FIELD_LIST");
		fields.setValueSeparator(',');
		options.addOption(fields);

		Option outputFile = new Option("o", OUTPUT_FILE_OPTION, true, "output file name (default is stdout)");
		outputFile.setArgName("FILE");
		options.addOption(outputFile);

		Option fieldSep = new Option("t", FIELD_SEPARATOR_OPTION, true, "output field separator (default is 1 space for fixed-width, or comma for CSV)");
		fieldSep.setArgName("SEP");
		options.addOption(fieldSep);

		options.addOption("c", CSV_OPTION, false, "output in CSV format");
		options.addOption("w", FIXED_WIDTH_OPTION, false, "output in fixed-width format (default)");

		Option quoteChar = new Option("q", QUOTE_CHARACTER_OPTION, true, "quote character (for CSV output)");
		quoteChar.setArgName("CHAR");
		options.addOption(quoteChar);

		options.addOption("W", WINDOWS_OPTION, false, "output using Windows line separator (CRLF)");
		options.addOption("U", UNIX_OPTION, false, "output using Unix line separator (LF)");
		options.addOption("P", PLATFORM_OPTION, false, "output using current platform line separator (default)");
		options.addOption("a", EXTRACT_ALL, false, "extract all tables");
	}

	/**
	 * Runs the extractor.
	 *
	 * @param args the command-line arguments
	 * @throws CsvValidationException 
	 */
	private void run(String[] args) throws CsvValidationException {
		parseArguments(args);

		if (outputFile != null) {
			try {
				out = new PrintWriter(new FileWriter(outputFile));
			} catch (IOException e) {
				System.err.println("Cannot open output file: " + e.getMessage());
				System.exit(1);
			}
		} else {
			out = new PrintWriter(new OutputStreamWriter(System.out));
		}

		try {
		  Utility.openConnection(labelUrl.openConnection()).close();
		} catch (IOException io) {
      System.err.println("Cannot read label file " + labelUrl.toString());
      System.exit(1);		  
		}

		ObjectProvider objectAccess = null;
		ProductObservational product = null;
		URL parent = null;
		try {
		  objectAccess = new ObjectAccess();
			product = objectAccess.getProduct(labelUrl, ProductObservational.class);
	    parent = labelUrl.toURI().getPath().endsWith("/") ?
	        labelUrl.toURI().resolve("..").toURL() :
	          labelUrl.toURI().resolve(".").toURL();
		} catch (gov.nasa.pds.objectAccess.ParseException e) {
			System.err.println(e.getMessage());
			e.printStackTrace();
			System.exit(1);
		} catch (MalformedURLException mu) {
      System.err.println(mu.getMessage());
      mu.printStackTrace();
      System.exit(1);
    } catch (URISyntaxException ue) {
      System.err.println(ue.getMessage());
      ue.printStackTrace();
      System.exit(1);
    }
		
		for (FileAreaObservational fileArea : product.getFileAreaObservationals()) {
  		String fileName = fileArea.getFile().getFileName();
  		URL dataFile = null;
      try {
        dataFile = new URL(parent, fileName);
      } catch (MalformedURLException mu) {
        mu.printStackTrace();
        System.exit(1);
      }
  		if (listTables) {
  		  out.println("\nfile: " + dataFile.toString());
  		}
  		int currentIndex = 1;
  		for (Object obj : objectAccess.getTableObjects(fileArea)) {
  			TableType tableType = TableType.FIXED_BINARY;
  			if (obj instanceof TableCharacter) {
  				tableType = TableType.FIXED_TEXT;
  			} else if (obj instanceof TableDelimited) {
  				tableType = TableType.DELIMITED;
  			}
  
  			TableReader reader = null;
  			try {
  				reader = ExporterFactory.getTableReader(obj, dataFile);
  			} catch (Exception ex) {
  				System.err.println("Cannot create a table reader for the table: " + ex.getMessage());
  				ex.printStackTrace();
  				out.close();
  				return;
  			}
  			if (listTables) {
  				out.println("  table " + currentIndex + ": " + tableType.getReadableType());
  				listFields(reader.getFields());
  			} else if (extractAll || (currentIndex == tableIndex && 
  			    (this.dataFile == null || this.dataFile.getName().equalsIgnoreCase(fileName))) ) {
  				extractTable(reader);
  				break;
  			}  
  			++currentIndex;
  		}
  		if (!listTables && (this.dataFile == null && !extractAll)) {
  		  break;
  		}
		}
		out.close();
	}

	/**
	 * Lists the fields in a table.
	 *
	 * @param fields an array of field descriptions
	 */
	private void listFields(FieldDescription[] fields) {
		int i = 0;

		for (FieldDescription field : fields) {
			++i;
			out.println("    field " + i + ": " + field.getName() + " (" + field.getType().getXMLType() + ")");
		}
	}

	/**
	 * Extracts a table to the output file.
	 *
	 * @param reader the table reader to use for reading data
	 * @throws CsvValidationException 
	 */
	private void extractTable(TableReader reader) throws CsvValidationException {
		FieldDescription[] fields = reader.getFields();
		int[] displayFields = getSelectedFields(fields);

		int[] fieldLengths = getFieldLengths(fields, displayFields);

		displayHeaders(fields, displayFields, fieldLengths);
		displayRows(reader, fields, displayFields, fieldLengths);
	}

	/**
	 * Gets an array of field indices to display. Uses the
	 * field indices specified on the command line, if any,
	 * otherwise all fields will be displayed.
	 *
	 * @param totalFields the total number of fields in the table
	 * @return an array of fields to display
	 */
	private int[] getSelectedFields(FieldDescription[] fields) {
		int[] displayFields;

		if (requestedFields == null) {
			displayFields = new int[fields.length];
			for (int i=0; i < fields.length; ++i) {
				displayFields[i] = i;
			}
		} else {
			displayFields = new int[requestedFields.length];
			for (int i=0; i < requestedFields.length; ++i) {
				displayFields[i] = findField(requestedFields[i], fields);
			}
		}

		return displayFields;
	}

	/**
	 * Try to convert a field name or index into a field index.
	 * Prints an error message and exits if the field is not
	 * present in the table.
	 *
	 * @param nameOrIndex the string form of the name or index requested
	 * @param fields the field descriptions for the table fields
	 * @return the index of the requested field
	 */
	private int findField(String nameOrIndex, FieldDescription[] fields) {
		// First try to convert as an integer.
		try {
			return Integer.parseInt(nameOrIndex) - 1;
		} catch (NumberFormatException ex) {
			// ignore
		}

		// Now try to find a matching field name, ignoring case.
		for (int i=0; i < fields.length; ++i) {
			if (nameOrIndex.equalsIgnoreCase(fields[i].getName())) {
				return i;
			}
		}

		// If we get here, then we couldn't find a matching field.
		System.err.println("Requested field not present in table: " + nameOrIndex);
		System.exit(1);
		return -1; // Still have to return, because Java doesn't know that exit() doesn't return.
	}

	/**
	 * Gets an array of field lengths to use for output.
	 *
	 * @param fields an array of field descriptions
	 * @param displayFields an array of field indices to display
	 * @return
	 */
	private int[] getFieldLengths(FieldDescription[] fields, int[] displayFields) {
		int[] fieldLengths = new int[displayFields.length];

		for (int i=0; i < displayFields.length; ++i) {
			int fieldIndex = displayFields[i];

			if (format == OutputFormat.CSV) {
				fieldLengths[i] = 0;
			} else {
				fieldLengths[i] = Math.max(fields[fieldIndex].getName().length(), fields[fieldIndex].getLength());
			}
		}

		return fieldLengths;
	}

	/**
	 * Displays the headers of the table.
	 *
	 * @param fields an array of field descriptions
	 * @param displayFields an array of field indices to display
	 * @param fieldLengths an array of field lengths to use for output
	 */
	private void displayHeaders(FieldDescription[] fields, int[] displayFields, int[] fieldLengths) {
		for (int i=0; i < displayFields.length; ++i) {
			if (i > 0) {
				out.append(fieldSeparator);
			}

			FieldDescription field = fields[displayFields[i]];
			displayJustified(field.getName(), fieldLengths[i], field.getType().isRightJustified());
		}
		out.append(lineSeparator);
	}

	/**
	 * Displays the rows from the table.
	 *
	 * @param reader the table reader for reading rows
	 * @param fields an array of field descriptions
	 * @param displayFields an array of field indices to display
	 * @param fieldLengths an array of field lengths to use for output
	 * @throws CsvValidationException 
	 * @throws IOException
	 */
	private void displayRows(TableReader reader, FieldDescription[] fields, int[] displayFields, int[] fieldLengths) throws CsvValidationException {
		TableRecord record;
		try {
			while ((record = reader.readNext()) != null) {
				for (int i=0; i < displayFields.length; ++i) {
					if (i > 0) {
						out.append(fieldSeparator);
					}

					int index = displayFields[i];
					FieldDescription field = fields[index];
					displayJustified(record.getString(index+1).trim(), fieldLengths[i], field.getType().isRightJustified());
				}

				out.append(lineSeparator);
			}
		} catch (IOException e) {
			System.err.println("Cannot read the next table record: " + e.getMessage());
			System.exit(1);
		}
	}

	/**
	 * Displays a string, justified in a field.
	 *
	 * @param s the string to display
	 * @param length the field length
	 * @param isRightJustified true, if the value should be right-justified, else left-justified
	 */
	private void displayJustified(String s, int length, boolean isRightJustified) {
		if (format == OutputFormat.CSV) {
			// Double any quote characters.
			if (s.contains(quoteCharacter)) {
				Matcher matcher = quoteCharacterPattern.matcher(s);
				s = matcher.replaceAll(quoteCharacter + quoteCharacter);
			}

			// If the value is all whitespace or contains the field separator, quote the value.
			if (s.trim().isEmpty() || s.contains(fieldSeparator)) {
				s = quoteCharacter + s + quoteCharacter;
			}
		}

		int padding = length - s.length();

		if (isRightJustified) {
			displayPadding(padding);
		}
		out.append(s);
		if (!isRightJustified) {
			displayPadding(padding);
		}
	}

	/**
	 * Displays a number of padding spaces.
	 *
	 * @param n the number of spaces
	 */
	private void displayPadding(int n) {
		for (int i=0; i < n; ++i) {
			out.append(' ');
		}
	}

	/**
	 * Parses the command-line arguments.
	 *
	 * @param args the command-line arguments
	 */
	private void parseArguments(String[] args) {
		CommandLineParser parser = new GnuParser();
		CommandLine cmdLine = null;
		try {
			cmdLine = parser.parse(options, args);
		} catch (ParseException e) {
			showHelp("Error parsing command-line options: " + e.getMessage(), 1);
		}

		if (cmdLine.hasOption(HELP_OPTION)) {
			showHelp(null, 0);
		}

		listTables = (cmdLine.hasOption(LIST_TABLES_OPTION));
		extractAll = (cmdLine.hasOption(EXTRACT_ALL));

		if (cmdLine.hasOption(INDEX_OPTION)) {
			tableIndex = Integer.parseInt(cmdLine.getOptionValue(INDEX_OPTION));
		} else {
			tableIndex = 1;
		}

		if (cmdLine.hasOption(DATA_FILE_OPTION)) {
		  dataFile = new File(cmdLine.getOptionValue(DATA_FILE_OPTION));
		} else {
		  dataFile = null;
		}
		
		String[] files = cmdLine.getArgs();
		if (files.length == 0) {
			showHelp("A label file is required", 1);
		}
		
    try {
      labelUrl = new URL(files[0]);
    } catch (MalformedURLException u) {
      File file = new File(files[0]);
      file = file.getAbsoluteFile();
      try {
        labelUrl = file.toURI().toURL();
      } catch (MalformedURLException mu) {
        mu.printStackTrace();
        System.exit(1);
      }
    }

		if (cmdLine.hasOption(CSV_OPTION)) {
			format = OutputFormat.CSV;
		} else {
			format = OutputFormat.FIXED_WIDTH;
		}

		if (cmdLine.hasOption(FIELD_SEPARATOR_OPTION)) {
			fieldSeparator = cmdLine.getOptionValue(FIELD_SEPARATOR_OPTION);
		} else if (format == OutputFormat.FIXED_WIDTH) {
			fieldSeparator = " ";
		} else {
			fieldSeparator = ",";
		}

		if (cmdLine.hasOption(QUOTE_CHARACTER_OPTION)) {
			quoteCharacter = cmdLine.getOptionValue(QUOTE_CHARACTER_OPTION);
		} else {
			quoteCharacter = "\"";
		}
		quoteCharacterPattern = Pattern.compile("\\Q" + quoteCharacter + "\\E");

		if (cmdLine.hasOption(WINDOWS_OPTION)) {
			lineSeparator = "\r\n";
		} else if (cmdLine.hasOption(UNIX_OPTION)) {
			lineSeparator = "\n";
		} else {
			lineSeparator = System.getProperty("line.separator");
		}

		if (!cmdLine.hasOption(FIELDS_OPTION)) {
			requestedFields = null;
		} else {
			requestedFields = cmdLine.getOptionValue(FIELDS_OPTION).split(" *, *");
		}

		if (cmdLine.hasOption(OUTPUT_FILE_OPTION)) {
			outputFile = new File(cmdLine.getOptionValue(OUTPUT_FILE_OPTION));
		} else {
			outputFile = null;
		}
	}

	/**
	 * Shows the help message and, optionally, an error message, and exits.
	 *
	 * @param errorMessage the error message, or null if there is no error message
	 * @param exitCode the exit code to use
	 */
	private void showHelp(String errorMessage, int exitCode) {
		if (errorMessage != null) {
			System.err.println(errorMessage);
			System.err.println();
		}

		String programName = getClass().getName();
		if (System.getProperty(PROGRAM_NAME) != null) {
			programName = System.getProperty(PROGRAM_NAME);
		}

		HelpFormatter formatter = new HelpFormatter();
		formatter.printHelp(programName + " [-f field,...] [-o outputfile] [options] labelfile", options);
		System.exit(exitCode);
	}

	/**
	 * Defines an enumeration for the different table types
	 * that can be extracted. Holds a readable description of
	 * the table type.
	 */
	private static enum TableType {

		/** A fixed-width binary table. */
		FIXED_BINARY("fixed-width binary table"),

		/** A fixed-width text table. */
		FIXED_TEXT("fixed-width character table"),

		/** A delimited table. */
		DELIMITED("delimited table");

		private String readableType;

		private TableType(String readableType) {
			this.readableType = readableType;
		}

		/**
		 * Gets the readable name for the table type.
		 *
		 * @return the name of the table type
		 */
		public String getReadableType() {
			return readableType;
		}

	}

	/**
	 * Defines an enumeration for the different output formats.
	 */
	private static enum OutputFormat {
		CSV, FIXED_WIDTH;
	}

}