ucar.nc2.FileWriter2 Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of cdm Show documentation
The newest version!
/*
 * Copyright (c) 1998-2018 John Caron and University Corporation for Atmospheric Research/Unidata
 * See LICENSE for license information.
 */
package ucar.nc2;

import ucar.ma2.*;
import ucar.nc2.util.CancelTask;
import ucar.nc2.util.CancelTaskImpl;
import ucar.nc2.write.Nc4Chunking;

import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

/**
 * Utility class for copying a NetcdfFile object, or parts of one, to a netcdf-3 or netcdf-4 disk file.
 * Uses NetcdfFileWriter.
 * This handles the entire CDM model (groups, etc) if you are writing to netcdf-4.
 * 
 * The fileIn may be an NcML file which has a referenced dataset in the location URL, the underlying data
 * (modified by the NcML) is written to the new file. If the NcML does not have a referenced dataset,
 * then the new file is filled with fill values, like ncgen.
 * 

 * 
 Use a NetcdfFileWriter object for a lower level API.
 *
 * @see ucar.nc2.dt.grid.CFGridWriter2
 * @see ucar.nc2.ft.point.writer.CFPointWriter
 *
 * @author caron
 */

public class FileWriter2 {
  static private final org.slf4j.Logger log = org.slf4j.LoggerFactory.getLogger(FileWriter2.class);
  static private final long maxSize = 50 * 1000 * 1000; // 50 Mbytes
  static private boolean debug = false, debugWrite = false, debugChunk = false;

  /**
   * Set debugging flags
   *
   * @param debugFlags debug flags
   */
  public static void setDebugFlags(ucar.nc2.util.DebugFlags debugFlags) {
    debug = debugFlags.isSet("ncfileWriter2/debug");
    debugWrite = debugFlags.isSet("ncfileWriter2/debugWrite");
    debugChunk = debugFlags.isSet("ncfileWriter2/debugChunk");
  }

  //////////////////////////////////////////////////////////////////////////////////////
  private final NetcdfFile fileIn;
  private final NetcdfFileWriter writer;
  private final NetcdfFileWriter.Version version;

  private final Map varMap = new HashMap<>(100);  // oldVar, newVar
  private final List varList = new ArrayList<>(100);        // old Vars
  private final Map gdimHash = new HashMap<>(33); // name, newDim : global dimensions (classic mode)

  /**
   * Use this constructor to copy entire file. Use this.write() to do actual copy.
   *
   * @param fileIn      copy this file
   * @param fileOutName to this output file
   * @param version     output file version
   * @param chunker     chunking strategy (netcdf4 only)
   * @throws IOException on read/write error
   */
  public FileWriter2(NetcdfFile fileIn, String fileOutName, NetcdfFileWriter.Version version, Nc4Chunking chunker) throws IOException {
    this.fileIn = fileIn;
    this.writer = NetcdfFileWriter.createNew(version, fileOutName, chunker);
    this.version = version;
  }

  public enum N3StructureStrategy {flatten, exclude}

  private N3StructureStrategy n3StructureStrategy;
  public void setN3StructureStrategy(N3StructureStrategy n3StructureStrategy) {
    this.n3StructureStrategy = n3StructureStrategy;
  }

  public NetcdfFileWriter getNetcdfFileWriter() {
    return writer;
  }

  /////////////////////////////////////////////////////////////////////////////////////////////
  // might be better to push these next up into NetcdfCFWriter, but we want to use copyVarData

  /**
   * Use this constructor to copy specific variables to new file.
   * Only supports classic mode
   * 

   * Use addVariable() to load in variables, then this.write().
   *
   * @param fileWriter this encapsolates new file.
   * @throws IOException on read/write error
   */
  public FileWriter2(NetcdfFileWriter fileWriter) throws IOException {
    this.fileIn = null;
    this.writer = fileWriter;
    this.version = fileWriter.getVersion();
  }

  /**
   * Specify which variable will get written
   *
   * @param oldVar add this variable, and all parent groups
   * @return new Variable.
   */
  public Variable addVariable(Variable oldVar) {
    List newDims = getNewDimensions(oldVar);

    Variable newVar;
    if ((oldVar.getDataType().equals(DataType.STRING)) && (!version.isExtendedModel())) {
      newVar = writer.addStringVariable(null, oldVar, newDims);
    } else {
      newVar = writer.addVariable(null, oldVar.getShortName(), oldVar.getDataType(), newDims);
    }
    varMap.put(oldVar, newVar);
    varList.add(oldVar);

    for (Attribute orgAtt : oldVar.getAttributes())
      writer.addVariableAttribute(newVar, convertAttribute(orgAtt));

    return newVar;
  }

  private List getNewDimensions(Variable oldVar) {
    List result = new ArrayList<>(oldVar.getRank());

    // dimensions
    for (Dimension oldD : oldVar.getDimensions()) {
      Dimension newD = gdimHash.get(oldD.getShortName());
      if (newD == null) {
        newD = writer.addDimension(null, oldD.getShortName(), oldD.isUnlimited() ? 0 : oldD.getLength(),
                oldD.isUnlimited(), oldD.isVariableLength());
        gdimHash.put(oldD.getShortName(), newD);
        if (debug) System.out.println("add dim= " + newD);
      }
      result.add(newD);
    }
    return result;
  }

  ////////////////////////////////////////////////////////////////////////////////////////////////

  public NetcdfFile write() throws IOException {
    return write(null);
  }

  /**
   * Write the input file to the output file.
   * @param cancel  allow user to cancel; may be null.
   * @return the open output file.
   * @throws IOException
   */
  public NetcdfFile write(CancelTask cancel) throws IOException {

    try {
      if (version.isExtendedModel())
        addGroupExtended(null, fileIn.getRootGroup());
      else
        addGroupClassic();

      if (cancel != null && cancel.isCancel()) return null;

      // create the file
      writer.create();

      if (cancel != null && cancel.isCancel()) return null;
      double total = copyVarData(varList, null, cancel);
      if (cancel != null && cancel.isCancel()) return null;

      writer.flush();
      if (debug) System.out.println("FileWriter done total bytes = " + total);

    } catch (IOException ioe) {
      ioe.printStackTrace();
      writer.abort();  // clean up
      throw ioe;
    }

    return writer.getNetcdfFile();
  }

  private void addGroupClassic() throws IOException {

    if (fileIn.getRootGroup().getGroups().size() != 0) {
      throw new IllegalStateException("Input file has nested groups: cannot write to netcdf-3 format");
    }

    // attributes
    for (Attribute orgAtt : fileIn.getGlobalAttributes()) {
      writer.addGroupAttribute(null, convertAttribute(orgAtt));
    }

    // dimensions
    Map dimHash = new HashMap<>();
    for (Dimension oldD : fileIn.getDimensions()) {
      Dimension newD = writer.addDimension(null, oldD.getShortName(), oldD.isUnlimited() ? 0 : oldD.getLength(),
              oldD.isUnlimited(), oldD.isVariableLength());
      dimHash.put(oldD.getShortName(), newD);
      if (debug) System.out.println("add dim= " + newD);
    }

    // Variables
    int anonCount = 0;
    for (Variable oldVar : fileIn.getVariables()) {
      if (oldVar instanceof Structure) continue; // ignore for the moment

      List dims = new ArrayList<>();
      for (Dimension oldD : oldVar.getDimensions()) {
        if (!oldD.isShared()) { // netcdf3 dimensions must be shared
          String anonName = "anon" + anonCount;
          anonCount++;
          Dimension newD = writer.addDimension(null, anonName, oldD.getLength());
          dims.add(newD);

        } else {
          Dimension dim = dimHash.get(oldD.getShortName());
          if (dim != null)
            dims.add(dim);
          else
            throw new IllegalStateException("Unknown dimension= " + oldD.getShortName());
        }
      }

      DataType newType = oldVar.getDataType();

      // convert STRING to CHAR
      if (oldVar.getDataType() == DataType.STRING) {
        Array data = oldVar.read();
        IndexIterator ii = data.getIndexIterator();
        int max_len = 0;
        while (ii.hasNext()) {
          String s = (String) ii.getObjectNext();
          max_len = Math.max(max_len, s.length());
        }

        // add last dimension
        String useName = oldVar.getShortName() + "_strlen";
        Dimension newD = writer.addDimension(null, useName, max_len);
        dims.add(newD);

        newType = DataType.CHAR;
      }

      Variable v = writer.addVariable(null, oldVar.getShortName(), newType, dims);
      if (debug) System.out.println("add var= " + v.getNameAndDimensions());
      varMap.put(oldVar, v);
      varList.add(oldVar);

      // attributes
      for (Attribute orgAtt : oldVar.getAttributes()) {
        writer.addVariableAttribute(v, convertAttribute(orgAtt));
      }
    }
  }

  private void addGroupExtended(Group newParent, Group oldGroup) throws IOException {
    Group newGroup = writer.addGroup(newParent, oldGroup.getShortName());

    // attributes
    for (Attribute att : oldGroup.getAttributes()) {
      writer.addGroupAttribute(newGroup, att); // atts are immutable
      if (debug) System.out.println("add gatt= " + att);
    }

    // typedefs
    for (EnumTypedef td : oldGroup.getEnumTypedefs()) {
      writer.addTypedef(newGroup, td); // td are immutable
      if (debug) System.out.println("add td= " + td);
    }

    // dimensions
    Map dimHash = new HashMap<>();
    for (Dimension oldD : oldGroup.getDimensions()) {
      Dimension newD = writer.addDimension(newGroup, oldD.getShortName(), oldD.isUnlimited() ? 0 : oldD.getLength(),
              oldD.isUnlimited(), oldD.isVariableLength());
      dimHash.put(oldD.getShortName(), newD);
      if (debug) System.out.println("add dim= " + newD);
    }

    // Variables
    for (Variable oldVar : oldGroup.getVariables()) {
      List dims = new ArrayList<>();
      for (Dimension oldD : oldVar.getDimensions()) {
        // in case the name changed
        Dimension newD = oldD.isShared() ? dimHash.get(oldD.getShortName()) : oldD;
        if (newD == null)
          newD = newParent.findDimension(oldD.getShortName());
        if (newD == null)
          throw new IllegalStateException("Cant find dimension " + oldD.getShortName());
        dims.add(newD);
      }

      DataType newType = oldVar.getDataType();
      Variable v;
      if (newType == DataType.STRUCTURE) {
        v = writer.addCopyOfStructure(newGroup, (Structure) oldVar, oldVar.getShortName(), dims);
      } else if(newType.isEnum()) {
        EnumTypedef en = oldVar.getEnumTypedef();
        v = writer.addVariable(newGroup, oldVar.getShortName(), newType, dims);
        v.setEnumTypedef(en);
      } else {
        v = writer.addVariable(newGroup, oldVar.getShortName(), newType, dims);
      }
      varMap.put(oldVar, v);
      varList.add(oldVar);
      if (debug) System.out.println("add var= " + v);

      // attributes
      for (Attribute att : oldVar.getAttributes())
        writer.addVariableAttribute(v, att); // atts are immutable
    }

    // nested groups
    for (Group nested : oldGroup.getGroups())
      addGroupExtended(newGroup, nested);

  }

  // munge attribute if needed
  private Attribute convertAttribute(Attribute org) {
    if (version.isExtendedModel()) return org;
    if (!org.getDataType().isUnsigned()) return org;
    Array orgValues = org.getValues();
    Array nc3Values = Array.makeFromJavaArray(orgValues.getStorage(), false);
    return new Attribute(org.getShortName(), nc3Values);
  }

  /**
   * Write data from varList into new file. Read/Write a maximum of  maxSize bytes at a time.
   * When theres a record variable, its much more efficient to use it.
   *
   * @param oldVars   list of variables from the original file, with data in them
   * @param recordVar the record variable from the original file, or null means dont use record variables
   * @param cancel  allow user to cancel, may be null.
   * @return total number of bytes written
   * @throws IOException if I/O error
   */
  public double copyVarData(List oldVars, Structure recordVar, CancelTask cancel) throws IOException {

    boolean useRecordDimension = (recordVar != null);

    // write non-record data
    double total = 0;
    int countVars = 0;
    for (Variable oldVar : oldVars) {
      if (useRecordDimension && oldVar.isUnlimited())
        continue; // skip record variables
      if (oldVar == recordVar)
        continue;

      if (debug)
        System.out.println("write var= " + oldVar.getShortName() + " size = " + oldVar.getSize() + " type=" + oldVar.getDataType());
      if (cancel != null)
        cancel.setProgress("writing " + oldVar.getShortName(), countVars++);

      long size = oldVar.getSize() * oldVar.getElementSize();
      total += size;

      if (size <= maxSize) {
        copyAll(oldVar, varMap.get(oldVar));
      } else {
        copySome(oldVar, varMap.get(oldVar), maxSize, cancel);
      }

      if (cancel != null && cancel.isCancel()) return total;
    }

    // write record data
    if (useRecordDimension) {
      int[] origin = new int[]{0};
      int[] size = new int[]{1};

      int nrecs = (int) recordVar.getSize();
      int sdataSize = recordVar.getElementSize();
      Variable recordVarNew = varMap.get(recordVar);

      double totalRecordBytes = 0;
      for (int count = 0; count < nrecs; count++) {
        origin[0] = count;
        try {
          Array recordData = recordVar.read(origin, size);
          writer.write(recordVarNew, origin, recordData);  // rather magic here - only writes the ones in ncfile !!
          if (debug && (count == 0)) System.out.println("write record size = " + sdataSize);
        } catch (InvalidRangeException e) {
          e.printStackTrace();
          break;
        }
        totalRecordBytes += sdataSize;
        if (cancel != null && cancel.isCancel()) return total;
      }
      total += totalRecordBytes;
      totalRecordBytes /= 1000 * 1000;
      if (debug) System.out.println("write record var; total = " + totalRecordBytes + " Mbytes # recs=" + nrecs);
    }
    return total;
  }

  // copy all the data in oldVar to the newVar
  void copyAll(Variable oldVar, Variable newVar) throws IOException {

    Array data = oldVar.read();
    try {
      if (!version.isNetdf4format() && oldVar.getDataType() == DataType.STRING) {
        data = convertToChar(newVar, data);
      }
      if (data.getSize() > 0)  // zero when record dimension = 0
        writer.write(newVar, data);

    } catch (InvalidRangeException e) {
      e.printStackTrace();
      throw new IOException(e.getMessage() + " for Variable " + oldVar.getFullName());
    }
  }

  /**
   * Copies data from {@code oldVar} to {@code newVar}. The writes are done in a series of chunks no larger than
   * {@code maxChunkSize} bytes.
   *
   * @param oldVar       a variable from the original file to copy data from.
   * @param newVar       a variable from the original file to copy data from.
   * @param maxChunkSize the size, in bytes, of the largest chunk to write.
   * @param cancel      allow user to cancel, may be null.
   * @throws IOException if an I/O error occurs.
   */
  private void copySome(Variable oldVar, Variable newVar, long maxChunkSize, CancelTask cancel) throws IOException {
    long maxChunkElems = maxChunkSize / oldVar.getElementSize();
    long byteWriteTotal = 0;

    ChunkingIndex index = new ChunkingIndex(oldVar.getShape());
    while (index.currentElement() < index.getSize()) {
      try {
        int[] chunkOrigin = index.getCurrentCounter();
        int[] chunkShape = index.computeChunkShape(maxChunkElems);

        if (cancel != null) cancel.setProgress("Reading chunk "+new Section(chunkOrigin, chunkShape)+" from variable: " + oldVar.getShortName(), -1);
        /* writeProgressEvent.setWriteStatus("Reading chunk from variable: " + oldVar.getShortName());
        if (progressListeners != null) {
          for (FileWriterProgressListener listener : progressListeners) {
            listener.writeProgress(writeProgressEvent);
          }
        } */

        Array data = oldVar.read(chunkOrigin, chunkShape);
        if (!version.isNetdf4format() && oldVar.getDataType() == DataType.STRING) {
          data = convertToChar(newVar, data);
        }

        if (data.getSize() > 0) {// zero when record dimension = 0
          if (cancel != null) cancel.setProgress("Writing chunk "+new Section(chunkOrigin, chunkShape)+" from variable: " + oldVar.getShortName(), -1);

          writer.write(newVar, chunkOrigin, data);
          if (debugWrite)
            System.out.println(" write " + data.getSize() + " bytes at " + new Section(chunkOrigin, chunkShape));

          byteWriteTotal += data.getSize();
        }

        index.setCurrentCounter(index.currentElement() + (int) Index.computeSize(chunkShape));
        if (cancel != null && cancel.isCancel()) return;

      } catch (InvalidRangeException e) {
        e.printStackTrace();
        throw new IOException(e.getMessage());
      }
    }
  }

  private Array convertToChar(Variable newVar, Array oldData) {
    ArrayChar newData = (ArrayChar) Array.factory(DataType.CHAR, newVar.getShape());
    Index ima = newData.getIndex();
    IndexIterator ii = oldData.getIndexIterator();
    while (ii.hasNext()) {
      String s = (String) ii.getObjectNext();
      int[] c = ii.getCurrentCounter();
      for (int i = 0; i < c.length; i++)
        ima.setDim(i, c[i]);
      newData.setString(ima, s);
    }
    return newData;
  }

  /* private boolean hasRecordStructure(NetcdfFile file) {
    Variable v = file.findVariable("record");
    return (v != null) && (v.getDataType() == DataType.STRUCTURE);
  } */

  ///////////////////////////////////////////////////////////////////////////////////////////////////////////
  // contributed by  [email protected] 4/12/2010

  /**
   * An index that computes chunk shapes. It is intended to be used to compute the origins and shapes for a series
   * of contiguous writes to a multidimensional array.
   * It writes the first n elements (n < maxChunkElems), then the next, etc.
   */
  public static class ChunkingIndex extends Index {
    public ChunkingIndex(int[] shape) {
      super(shape);
    }

    /**
     * Computes the shape of the largest possible contiguous chunk, starting at {@link #getCurrentCounter()}
     * and with {@code numElems <= maxChunkElems}.
     *
     * @param maxChunkElems the maximum number of elements in the chunk shape. The actual element count of the shape
     *                      returned is likely to be different, and can be found with {@link Index#computeSize}.
     * @return the shape of the largest possible contiguous chunk.
     */
    public int[] computeChunkShape(long maxChunkElems) {
      int[] chunkShape = new int[rank];

      for (int iDim = 0; iDim < rank; ++iDim) {
        int size = (int) (maxChunkElems / stride[iDim]);
        size = (size == 0) ? 1 : size;
        size = Math.min(size, shape[iDim] - current[iDim]);
        chunkShape[iDim] = size;
      }

      return chunkShape;
    }
  }

  ////////////////////////////////////////////////////////////////////////////////////////////////////////

  private static void usage() {
    System.out.println("usage: ucar.nc2.FileWriter2 -in  -out  [-netcdf4]");
  }

  /**
   * Better to use ucar.nc.dataset.NetcdfDataset main program instead.
   * 
ucar.nc2.FileWriter -in fileIn -out fileOut.
   * 
where: 
   *  fileIn : path of any CDM readable file
   *  fileOut: local pathname where netdf-3 file will be written
   * 
   *
   * @param arg -in fileIn -out fileOut [-netcdf4]
   * @throws IOException on read or write error
   */
  public static void main(String arg[]) throws IOException {
    if (arg.length < 4) {
      usage();
      System.exit(0);
    }

    String datasetIn = null, datasetOut = null;
    NetcdfFileWriter.Version version = NetcdfFileWriter.Version.netcdf3;
    for (int i = 0; i < arg.length; i++) {
      String s = arg[i];
      if (s.equalsIgnoreCase("-in")) datasetIn = arg[i + 1];
      if (s.equalsIgnoreCase("-out")) datasetOut = arg[i + 1];
      if (s.equalsIgnoreCase("-netcdf4")) version = NetcdfFileWriter.Version.netcdf4;
    }
    if ((datasetIn == null) || (datasetOut == null)) {
      usage();
      System.exit(0);
    }

    System.out.printf("FileWriter2 copy %s to %s ", datasetIn, datasetOut);
    CancelTaskImpl cancel = new CancelTaskImpl();
    NetcdfFile ncfileIn = ucar.nc2.NetcdfFile.open(datasetIn, cancel);
    if (cancel.isCancel()) return;

    FileWriter2 writer2 = new FileWriter2(ncfileIn, datasetOut, version, null); // currently only the default chunker
    NetcdfFile ncfileOut = writer2.write(cancel);
    if (ncfileOut != null) ncfileOut.close();
    ncfileIn.close();
    System.out.printf("%s%n", cancel);
  }

  // Q:/cdmUnitTest/formats/netcdf4/tst/tst_groups.nc

}