All Downloads are FREE. Search and download functionalities are using the official Maven repository.

ucar.nc2.internal.ncml.AggregationExisting Maven / Gradle / Ivy

The newest version!
/*
 * Copyright (c) 1998-2018 John Caron and University Corporation for Atmospheric Research/Unidata
 * See LICENSE for license information.
 */

package ucar.nc2.internal.ncml;

import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.nio.channels.FileChannel;
import java.nio.channels.FileLock;
import java.nio.channels.OverlappingFileLockException;
import java.nio.charset.StandardCharsets;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import org.jdom2.Element;
import thredds.client.catalog.Catalog;
import thredds.inventory.MFile;
import ucar.ma2.Array;
import ucar.ma2.DataType;
import ucar.nc2.Attribute;
import ucar.nc2.Dimension;
import ucar.nc2.Group;
import ucar.nc2.NetcdfFile;
import ucar.nc2.Variable;
import ucar.nc2.constants.CDM;
import ucar.nc2.constants.CF;
import ucar.nc2.constants._Coordinate;
import ucar.nc2.dataset.NetcdfDataset;
import ucar.nc2.dataset.VariableDS;
import ucar.nc2.util.CancelTask;

/**
 * "JoinExisting" Aggregation. Existing means that the dimension already exists in the component variables.
 * For every variable with that dimension, replace it with its "aggreation" whose dimensions lebgth is the
 * sum of the the component lengths. The dimensions must be the outer dimension.
 *
 * @author caron
 */
class AggregationExisting extends AggregationOuter {

  AggregationExisting(NetcdfDataset.Builder ncd, String dimName, String recheckS) {
    super(ncd, dimName, Type.joinExisting, recheckS);
  }

  protected void buildNetcdfDataset(CancelTask cancelTask) throws IOException {
    // open a "typical" nested dataset and copy it to newds
    AggDataset typicalDataset = getTypicalDataset();
    NetcdfFile typical = typicalDataset.acquireFile(null);
    BuilderHelper.transferDataset(typical, ncDataset, null);
    Group.Builder rootGroup = ncDataset.rootGroup;

    // LOOK not dealing with groups

    // a little tricky to get the coord var cached if we have to read through the datasets on the buildCoords()
    String dimName = getDimensionName();
    CacheVar coordCacheVar = null;
    if (type != Type.joinExistingOne) {
      Variable tcv = typical.findVariable(dimName);
      if (tcv != null) {
        coordCacheVar = new CoordValueVar(dimName, tcv.getDataType(), tcv.getUnitsString());
      } else {
        // docs claim we can add the coord variable in the outer ncml, so make a fake one for now, make
        // sure it gets removed if user adds it outside of aggregation.
        VariableDS.Builder fake = VariableDS.builder().setName(dimName).setDataType(DataType.INT)
            .setParentGroupBuilder(rootGroup).setDimensionsByName(dimName);
        fake.setAutoGen(0, 1);
        rootGroup.addVariable(fake);
        logger.warn("Adding a fake coord var for {}", dimName);
      }

    } else {
      coordCacheVar = new CoordValueVar(dimName, DataType.STRING, "");
    }
    if (coordCacheVar != null) {
      cacheList.add(coordCacheVar); // coordinate variable is always cached
    }

    // gotta check persistence info - before buildCoords - if its going to do any good
    persistRead();

    // now find out how many coordinates we have, caching values if needed
    buildCoords(cancelTask);

    // create aggregation dimension, now that we know the size
    Dimension aggDim = new Dimension(dimName, getTotalCoords());
    rootGroup.removeDimension(dimName); // remove previous declaration, if any
    rootGroup.addDimension(aggDim);

    promoteGlobalAttributes((AggDatasetOuter) typicalDataset);

    // now create the agg variables
    // all variables with the named aggregation dimension
    for (Variable v : typical.getVariables()) {
      if (v.getRank() < 1) {
        continue;
      }
      // TODO whats with the full name ?
      String outerName = v.getDimension(0).makeFullName();
      if (!dimName.equals(outerName)) {
        continue;
      }

      // LOOK was Group.Builder newGroup = BuilderHelper.findGroup(rootGroup, v.getParentGroup());
      VariableDS.Builder vagg = VariableDS.builder().setName(v.getShortName()).setDataType(v.getDataType())
          .setParentGroupBuilder(rootGroup).setDimensionsByName(v.getDimensionsString());
      vagg.setProxyReader(this);
      BuilderHelper.transferAttributes(v, vagg.getAttributeContainer());

      rootGroup.replaceVariable(vagg);
      aggVars.add(vagg);

      if (cancelTask != null && cancelTask.isCancel()) {
        return;
      }
    }

    // handle the agg coordinate variable
    Optional> joinAggCoordOpt = rootGroup.findVariableLocal(dimName); // long name of dimension,
                                                                                          // coord
    // variable
    if (!joinAggCoordOpt.isPresent() && (type == Type.joinExisting)) {
      typicalDataset.close(typical); // clean up
      throw new IllegalArgumentException("No existing coordinate variable for joinExisting on " + getLocation());
    }
    VariableDS.Builder joinAggCoord = (VariableDS.Builder) joinAggCoordOpt.orElse(null);

    if (type == Type.joinExistingOne) {
      // replace aggregation coordinate variable
      joinAggCoordOpt.ifPresent(joinAgg -> rootGroup.removeVariable(joinAgg.shortName));

      joinAggCoord = VariableDS.builder().setName(dimName).setDataType(DataType.STRING).setParentGroupBuilder(rootGroup)
          .setDimensionsByName(dimName);
      joinAggCoord.setProxyReader(this);
      rootGroup.addVariable(joinAggCoord);
      aggVars.add(joinAggCoord);

      joinAggCoord.addAttribute(new Attribute(_Coordinate.AxisType, "Time"));
      joinAggCoord.addAttribute(new Attribute(CDM.LONG_NAME, "time coordinate"));
      joinAggCoord.addAttribute(new Attribute(CF.STANDARD_NAME, "time"));
    }

    if (timeUnitsChange && joinAggCoord != null) {
      readTimeCoordinates(joinAggCoord, cancelTask);
    }

    // make it a cacheVar
    if (joinAggCoord != null) {
      joinAggCoord.setSPobject(coordCacheVar);
    }

    // check persistence info - may have cached values other than coordinate LOOK ????
    persistRead();

    setDatasetAcquireProxy(typicalDataset, ncDataset);
    typicalDataset.close(typical);

    if (debugInvocation) {
      System.out.println(ncDataset.location + " invocation count = " + AggregationOuter.invocation);
    }

    // LOOK ncDataset.finish();
  }

  /**
   * Persist info (ncoords, coordValues) from joinExisting, since that can be expensive to
   * recreate.
   */
  public void persistWrite() throws IOException {
    if (diskCache2 == null) {
      return;
    }

    String cacheName = getCacheName();
    if (cacheName == null) {
      return;
    }
    if (cacheName.startsWith("file:")) { // LOOK HACK
      cacheName = cacheName.substring(5);
    }
    File cacheFile = diskCache2.getCacheFile(cacheName);
    if (cacheFile == null) {
      throw new IllegalStateException();
    }

    // only write out if something changed after the cache file was last written, or if the file has been deleted
    if (!cacheDirty && cacheFile.exists()) {
      return;
    }

    File dir = cacheFile.getParentFile();
    if (!dir.exists()) {
      if (!dir.mkdirs()) {
        logger.error("Cant make cache directory= " + cacheFile);
      }
    }

    // Get a file channel for the file
    try (FileOutputStream fos = new FileOutputStream(cacheFile);
        FileChannel channel = fos.getChannel();
        PrintWriter out = new PrintWriter(new OutputStreamWriter(fos, StandardCharsets.UTF_8))) {

      // Try acquiring the lock without blocking. This method returns
      // null or throws an exception if the file is already locked.
      FileLock lock;
      try {
        lock = channel.tryLock();
      } catch (OverlappingFileLockException e) {
        // File is already locked in this thread or virtual machine
        return; // give up
      }
      if (lock == null) {
        return;
      }

      out.print("\n");
      out.print("\n");

      List nestedDatasets = getDatasets();
      for (AggDataset dataset : nestedDatasets) {
        AggDatasetOuter dod = (AggDatasetOuter) dataset;
        if (dod.getId() == null) {
          logger.warn("id is null");
        }

        out.print("  \n");

        for (CacheVar pv : cacheList) {
          Array data = pv.getData(dod.getId());
          if (data != null) {
            out.print("    ");
            while (data.hasNext()) {
              out.printf("%s ", data.next());
            }
            out.print("\n");
            if (logger.isDebugEnabled()) {
              logger.debug(
                  " wrote array = " + pv.varName + " nelems= " + data.getSize() + " for " + dataset.getLocation());
            }
          }
        }
        out.print("  \n");
      }

      out.print("\n");

      long time = datasetManager.getLastScanned();
      if (time == 0) {
        time = System.currentTimeMillis(); // no scans (eg all static) will have a 0
      }

      if (!cacheFile.setLastModified(time)) {
        logger.warn("FAIL to set lastModified on {}", cacheFile.getPath());
      }
      cacheDirty = false;

      if (logger.isDebugEnabled()) {
        logger.debug("Aggregation persisted = " + cacheFile.getPath() + " lastModified= "
            + new Date(datasetManager.getLastScanned()));
      }
    }
  }

  // read info from the persistent XML file, if it exists
  protected void persistRead() {
    if (diskCache2 == null) {
      return;
    }

    String cacheName = getCacheName();
    if (cacheName == null) {
      return;
    }
    if (cacheName.startsWith("file:")) // LOOK
    {
      cacheName = cacheName.substring(5);
    }

    File cacheFile = diskCache2.getCacheFile(cacheName);
    if (cacheFile == null) {
      throw new IllegalStateException();
    }
    if (!cacheFile.exists()) {
      return;
    }
    long lastWritten = cacheFile.lastModified();

    if (logger.isDebugEnabled()) {
      logger.debug(" Try to Read cache {} " + cacheFile.getPath());
    }

    Element aggElem;
    try {
      aggElem = ucar.nc2.util.xml.Parse.readRootElement("file:" + cacheFile.getPath());
    } catch (IOException e) {
      if (debugCache) {
        System.out.println(" No cache for " + cacheName + " - " + e.getMessage());
      }
      return;
    }

    String version = aggElem.getAttributeValue("version");
    if (!"3".equals(version)) {
      return; // dont read old cache files, recreate
    }

    // use a map to find datasets to avoid O(n**2) searching
    Map map = new HashMap<>();
    for (AggDataset ds : getDatasets()) {
      map.put(ds.getId(), ds);
    }

    List ncList = aggElem.getChildren("netcdf", Catalog.ncmlNS);
    for (Element netcdfElemNested : ncList) {
      String id = netcdfElemNested.getAttributeValue("id");
      AggDatasetOuter dod = (AggDatasetOuter) map.get(id);

      if (null == dod) {
        // this should mean that the dataset has been deleted. so not a problem
        if (logger.isDebugEnabled()) {
          logger.debug(" have cache but no dataset= {}", id);
        }
        continue;
      }
      if (logger.isDebugEnabled()) {
        logger.debug(" use cache for dataset= {}", id);
      }

      MFile mfile = dod.getMFile();
      if (mfile != null && mfile.getLastModified() > lastWritten) { // skip datasets that have changed
        if (logger.isDebugEnabled()) {
          logger.debug(" dataset was changed= {}", mfile);
        }
        continue;
      }

      if (dod.ncoord == 0) {
        String ncoordsS = netcdfElemNested.getAttributeValue("ncoords");
        try {
          dod.ncoord = Integer.parseInt(ncoordsS);
          if (logger.isDebugEnabled()) {
            logger.debug(" Read the cache; ncoords = {}", dod.ncoord);
          }
        } catch (NumberFormatException e) {
          logger.error("bad ncoord attribute on dataset=" + id);
        }
      }

      // if (dod.coordValue != null) continue; // allow ncml to override

      List cacheElemList = netcdfElemNested.getChildren("cache", Catalog.ncmlNS);
      for (Element cacheElemNested : cacheElemList) {
        String varName = cacheElemNested.getAttributeValue("varName");
        CacheVar pv = findCacheVariable(varName);
        if (pv != null) {
          String sdata = cacheElemNested.getText();
          if (sdata.isEmpty()) {
            continue;
          }
          if (logger.isDebugEnabled()) {
            logger.debug(" read data for var = " + varName + " size= " + sdata.length());
          }

          // long start = System.nanoTime();
          String[] vals = sdata.split(" ");

          try {
            Array data = Array.makeArray(pv.dtype, vals);
            pv.putData(id, data);
            countCacheUse++;

          } catch (Exception e) {
            logger.warn("Error reading cached data ", e);
          }

        } else {
          logger.warn("not a cache var=" + varName);
        }
      }
    }

  }

  // name to use in the DiskCache2 for the persistent XML info.
  // Document root is aggregation

  // has the name getCacheName()
  private String getCacheName() {
    String cacheName = ncDataset.location;
    // if (cacheName == null) { LOOK
    // cacheName = ncDataset.getCacheName();
    // }
    return cacheName;
  }

  //////////////////////////////////////////////////
  // back door for testing
  public static int countCacheUse;

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy