All Downloads are FREE. Search and download functionalities are using the official Maven repository.

edu.ucr.cs.bdlab.beast.indexing.SpatialPartitioner Maven / Gradle / Ivy

There is a newer version: 0.10.1-RC2
Show newest version
/*
 * Copyright 2018 University of California, Riverside
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package edu.ucr.cs.bdlab.beast.indexing;

import edu.ucr.cs.bdlab.beast.common.BeastOptions;
import edu.ucr.cs.bdlab.beast.io.SpatialInputFormat;
import edu.ucr.cs.bdlab.beast.synopses.AbstractHistogram;
import edu.ucr.cs.bdlab.beast.synopses.Summary;
import edu.ucr.cs.bdlab.beast.util.IntArray;
import edu.ucr.cs.bdlab.beast.geolite.EnvelopeND;
import edu.ucr.cs.bdlab.beast.geolite.EnvelopeNDLite;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;

import java.io.Externalizable;
import java.io.IOException;
import java.lang.annotation.ElementType;
import java.lang.annotation.Retention;
import java.lang.annotation.RetentionPolicy;
import java.lang.annotation.Target;
import java.util.ArrayList;
import java.util.List;

/**
 * An interface for spatially partitioning data into partitions.
 * @author Ahmed Eldawy
 *
 */
public interface SpatialPartitioner extends Externalizable {
  Log LOG = LogFactory.getLog(SpatialPartitioner.class);

  @Target(ElementType.TYPE)
  @Retention(RetentionPolicy.RUNTIME)
  @interface Metadata {
    /**
     * Whether this global indexer supports disjoint partitions or not
     * @return {@code true} if disjoint partitioning is supported for this partitioner
     */
    boolean disjointSupported();

    /**
     * The extension used with the global index. Also used for choosing this partitioner from command line.
     * @return the extension including the dot
     */
    String extension();

    /**
     * A short description for this partitioner
     * @return the description of this partitioner
     */
    String description();
  }

  /**
   * Annotates the required parameters of the {@link #construct(Summary, double[][], AbstractHistogram, int)}
   * function in implementation of this interface.
   */
  @Target(ElementType.PARAMETER)
  @Retention(RetentionPolicy.RUNTIME)
  @interface Required {}

  /**
   * Annotates the parameters of the {@link #construct(Summary, double[][], AbstractHistogram, int)}
   * function that can be used but are not required.
   */
  @Target(ElementType.PARAMETER)
  @Retention(RetentionPolicy.RUNTIME)
  @interface Preferred {}

  /**
   * Initializes the partitioner before it is first constructed.
   * @param conf the environment configuration
   * @param disjoint set to true to produce disjoint partitions with probable replication of some features
   */
  default void setup(BeastOptions conf, boolean disjoint) {}


  /**
   * Constructs the histogram using the given parameters. By default, only the summary parameter is provided
   * and the other two parameters {@code sample} and {@code histogram} are {@code null}. If any of these are needed,
   * the parameter should be annotated with {@link Required}. If any of them can be used to improve the results but
   * are not necessarily required, they can be annotated with {@link Preferred}.
   * @param summary the summary of the input
   * @param sample a sample points from the input
   * @param histogram a histogram of the input
   * @param numPartitions the desired number of partitions. This is treated as a loose hint and not a strict value
   */
  void construct(Summary summary, double[][] sample, AbstractHistogram histogram, int numPartitions);

  /**
   * Find all the overlapping partitions in an envelope (MBR).
   * @param mbr the MBR to find overlapping partitions
   * @param matchedPartitions an array to use for matched partitions
   */
  void overlapPartitions(EnvelopeNDLite mbr, IntArray matchedPartitions);

  default void overlapPartitions(EnvelopeND mbr, IntArray matchedPartitions) {
    overlapPartitions(new EnvelopeNDLite(mbr), matchedPartitions);
  }
  
  /**
   * Returns the single partition that contains the center of the given envelope.
   * This method should return a value in the range [0, {@link #getPartitionCount()}[
   *
   * @param mbr the minimum bounding rectangle to find a partition
   * @return the ID of the best partition that the given MBR should be assigned to.
   */
  default int overlapPartition(EnvelopeND mbr) {
    return overlapPartition(new EnvelopeNDLite(mbr));
  }

  int overlapPartition(EnvelopeNDLite mbr);
  
  /**
   * Returns the minimum bounding rectangle (MBR) of the given partition.
   * @param partitionID the ID of the partition to get its MBR.
   * @param mbr output parameter for the MBR to avoid allocation of objects
   */
  void getPartitionMBR(int partitionID, EnvelopeNDLite mbr);

  default EnvelopeNDLite getPartitionMBR(int partitionID) {
    EnvelopeNDLite mbr = new EnvelopeNDLite(this.getCoordinateDimension());
    getPartitionMBR(partitionID, mbr);
    return mbr;
  }
  
  /**
   * Returns total number of partitions
   * @return the total number of partitions
   */
  int getPartitionCount();

  /**
   * Whether this partitioner is configured to produce disjoint partitions
   * @return {@code true} if this partitions will produce spatially disjoint partitions
   */
  boolean isDisjoint();

  /**
   * Number of dimensions for this partitioner
   * @return the dimensions of the partitioner
   */
  int getCoordinateDimension();

  default Metadata getMetadata() {
    return this.getClass().getAnnotation(Metadata.class);
  }

  /**
   * Reads the most recent master file from the given path.
   * @param fileSystem the file system that contains the index
   * @param indexPath the path to the index (not the master file)
   * @return an array of partitions from the file. If no master file is found, a {@code null} is returned.
   * @throws IOException if an error happened while reading the file
   */
  static PartitionInfo[] readMasterFile(FileSystem fileSystem, Path indexPath) throws IOException {
    Path masterFilePath = SpatialInputFormat.getMasterFilePath(fileSystem, indexPath);
    List partitions = new ArrayList<>();
    MasterFileReader reader = new MasterFileReader();
    try {
      Configuration conf = new Configuration();
      reader.initialize(masterFilePath, conf);
      while (reader.nextKeyValue()) {
        partitions.add(reader.getCurrentValue());
      }
      return partitions.toArray(new PartitionInfo[partitions.size()]);
    } finally {
      reader.close();
    }
  }

  /**
   * Returns the MBR of the underlying dataset on which the partitioner was created.
   * @return the envelope of the entire partitioner
   */
  EnvelopeNDLite getEnvelope();
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy