All Downloads are FREE. Search and download functionalities are using the official Maven repository.

tutorial.javaapi.GeoIndexing Maven / Gradle / Ivy

/*
 * This file is part of ELKI:
 * Environment for Developing KDD-Applications Supported by Index-Structures
 *
 * Copyright (C) 2019
 * ELKI Development Team
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU Affero General Public License for more details.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program. If not, see .
 */
package tutorial.javaapi;

import java.util.Arrays;
import java.util.Random;

import de.lmu.ifi.dbs.elki.data.DoubleVector;
import de.lmu.ifi.dbs.elki.data.NumberVector;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.database.Database;
import de.lmu.ifi.dbs.elki.database.QueryUtil;
import de.lmu.ifi.dbs.elki.database.StaticArrayDatabase;
import de.lmu.ifi.dbs.elki.database.ids.DBIDRange;
import de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter;
import de.lmu.ifi.dbs.elki.database.ids.KNNList;
import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.datasource.ArrayAdapterDatabaseConnection;
import de.lmu.ifi.dbs.elki.datasource.DatabaseConnection;
import de.lmu.ifi.dbs.elki.distance.distancefunction.geo.LatLngDistanceFunction;
import de.lmu.ifi.dbs.elki.index.Index;
import de.lmu.ifi.dbs.elki.index.tree.spatial.rstarvariants.rstar.RStarTreeFactory;
import de.lmu.ifi.dbs.elki.index.tree.spatial.rstarvariants.strategies.bulk.SortTileRecursiveBulkSplit;
import de.lmu.ifi.dbs.elki.logging.LoggingConfiguration;
import de.lmu.ifi.dbs.elki.math.geodesy.WGS84SpheroidEarthModel;
import de.lmu.ifi.dbs.elki.persistent.AbstractPageFileFactory;
import de.lmu.ifi.dbs.elki.utilities.ELKIBuilder;
import de.lmu.ifi.dbs.elki.utilities.datastructures.iterator.It;

/**
 * Example code for using the R-tree index of ELKI, with Haversine distance.
 *
 * @author Erich Schubert
 * @since 0.7.5
 */
public class GeoIndexing {
  public static void main(String[] args) {
    // Set the logging level to statistics:
    LoggingConfiguration.setStatistics();

    // Generate a random data set.
    Random rand = new Random(0L);
    // Note: ELKI has a nice data generator class, use that instead.
    double[][] data = new double[100000][];
    for(int i = 0; i < data.length; i++) {
      data[i] = randomLatitudeLongitude(rand);
    }

    // Adapter to load data from an existing array.
    DatabaseConnection dbc = new ArrayAdapterDatabaseConnection(data);

    // Since the R-tree has so many options, it is a bit easier to configure it
    // using the parameterization API, which handles defaults, instantiation,
    // and additional constraint checks.
    RStarTreeFactory indexfactory = new ELKIBuilder<>(RStarTreeFactory.class) //
        // If you have large query results, a larger page size can be better.
        .with(AbstractPageFileFactory.Parameterizer.PAGE_SIZE_ID, 512) //
        // Use bulk loading, for better performance.
        .with(RStarTreeFactory.Parameterizer.BULK_SPLIT_ID, SortTileRecursiveBulkSplit.class) //
        .build();
    // Create the database, and initialize it.
    Database db = new StaticArrayDatabase(dbc, Arrays.asList(indexfactory));
    // This will build the index of the database.
    db.initialize();
    // Relation containing the number vectors we put in above:
    Relation rel = db.getRelation(TypeUtil.NUMBER_VECTOR_FIELD);
    // We can use this to identify rows of the input data below.
    DBIDRange ids = (DBIDRange) rel.getDBIDs();

    // For all indexes, dump their statistics.
    for(It it = db.getHierarchy().iterDescendants(db).filter(Index.class); it.valid(); it.advance()) {
      it.get().logStatistics();
    }

    // We use the WGS84 earth model, and "latitude, longitude" coordinates:
    // This distance function returns meters.
    LatLngDistanceFunction df = new LatLngDistanceFunction(WGS84SpheroidEarthModel.STATIC);
    // k nearest neighbor query:
    KNNQuery knnq = QueryUtil.getKNNQuery(rel, df);

    // Let's find the closest points to New York:
    DoubleVector newYork = DoubleVector.wrap(new double[] { 40.730610, -73.935242 });
    KNNList knns = knnq.getKNNForObject(newYork, 10);
    // Iterate over all results.
    System.out.println("Close to New York:");
    for(DoubleDBIDListIter it = knns.iter(); it.valid(); it.advance()) {
      double km = it.doubleValue() / 1000; // To kilometers
      System.out.println(rel.get(it) + " distance: " + km + " km row: " + ids.getOffset(it));
    }

    // Many other indexes will fail if we search close to the date line.
    DoubleVector tuvalu = DoubleVector.wrap(new double[] { -7.4784205, 178.679924 });
    knns = knnq.getKNNForObject(tuvalu, 10);
    // Iterate over all results.
    System.out.println("Close to Tuvalu:");
    for(DoubleDBIDListIter it = knns.iter(); it.valid(); it.advance()) {
      double km = it.doubleValue() / 1000; // To kilometers
      System.out.println(rel.get(it) + " distance: " + km + " km row: " + ids.getOffset(it));
    }
    // Note that the ELKI index does find points both at longitude +179 and -179

    // For all indexes, dump their statistics, again.
    // You will see some have increased their values.
    // System.out.println(Arrays.toString(data[ids.getOffset(it)]));

    // But also that we only read a small part of the data, and only computed
    // the distances to a few points in the data set.
    for(It it = db.getHierarchy().iterDescendants(db).filter(Index.class); it.valid(); it.advance()) {
      it.get().logStatistics();
    }
  }

  /**
   * Generate random coordinates.
   * 
   * @param r Random generator
   * @return Latitude, Longitude array.
   */
  private static double[] randomLatitudeLongitude(Random r) {
    // Make marginally more realistic looking data by non-uniformly sampling
    // latitude, since Earth is a sphere, and there is not much at the poles
    double lat = Math.pow(1. - r.nextDouble() * 2., 2) / 2. * 180;
    double lng = (.5 - r.nextDouble()) * 360.;
    return new double[] { lat, lng };
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy