All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.github.tsingjyujing.geo.algorithm.cluster.MongoDBScan.scala Maven / Gradle / Ivy

There is a newer version: 2.8.9-2.11
Show newest version
package com.github.tsingjyujing.geo.algorithm.cluster

import com.github.tsingjyujing.geo.algorithm.containers.{ClusterResult, LabeledPoint}
import com.github.tsingjyujing.geo.basic.IGeoPoint
import com.github.tsingjyujing.geo.basic.operations.GeoJSONable
import com.github.tsingjyujing.geo.element.GeoPolygon
import com.github.tsingjyujing.geo.element.immutable.{GeoPoint, Vector2}
import com.github.tsingjyujing.geo.util.mathematical.ConvexHull2
import com.mongodb.client.MongoCollection
import org.bson.Document
import org.bson.types.ObjectId

import scala.collection.JavaConverters._

/**
  * @author [email protected]
  * @param pointCollection   mongodb collection to save points
  * @param polygonCollection mongo collection to save polygons
  * @param searchRadius      db-scan algorithm parameters to search points in radius
  * @param needInit          need to initialize collection while start
  * @param isMergeClass      need to merge class if find more than one class in searchRadius
  * @param updatePolygon     is update polygon automatically while inserted an point
  * @param extendedIndexes   other indexes to set as Map: field name -> index type
  */
class MongoDBScan(
                     val pointCollection: MongoCollection[Document],
                     val polygonCollection: MongoCollection[Document] = null,
                     val searchRadius: Double = 0.5,
                     val needInit: Boolean = false,
                     val isMergeClass: Boolean = false,
                     val updatePolygon: Boolean = false,
                     val extendedIndexes: Map[String, Object] = Map.empty
                 ) {

    private val withPolygon: Boolean = polygonCollection != null

    if (needInit) {
        try {
            pointCollection.createIndex(new Document(MongoDBScan.pointFieldName, "2dsphere"))
            pointCollection.createIndex(new Document(MongoDBScan.classIdFieldName, "hashed"))
            extendedIndexes.foreach(
                kv => {
                    pointCollection.createIndex(new Document(kv._1, kv._2))
                }
            )
        } catch {
            case ex: Throwable =>
                println("Init failed caused by:")
                ex.printStackTrace()
        }
    }

    /**
      * Insert one point into collection
      *
      * @param point      point to insert
      * @param appendInfo other field add to point
      * @return
      */
    def appendPoint(point: IGeoPoint, appendInfo: Document): Int = {
        val classId = if (isMergeClass) {
            appendPointWithMerge(point, appendInfo)
        } else {
            appendPointWithoutMerge(point, appendInfo)
        }
        if (updatePolygon && withPolygon) {
            generatePolygon(classId)
        }
        classId
    }

    /**
      * Insert one point into collection without merge
      *
      * @param point      point to insert
      * @param appendInfo other field add to point
      * @return
      */
    private def appendPointWithoutMerge(point: IGeoPoint, appendInfo: Document): Int = {

        try {
            val nearPoint = pointCollection.find(
                new Document(MongoDBScan.pointFieldName, MongoDBScan.getGeoSearchCondition(point, searchRadius))
            ).first()
            val classId = nearPoint.getInteger(MongoDBScan.classIdFieldName)
            insertPoint(point, classId, appendInfo)
            classId
        } catch {
            case _: Throwable =>
                val classId = getNewClassId
                insertPoint(point, classId, appendInfo)
                classId
        }
    }

    /**
      * Insert one point into collection with merge
      *
      * @param point      point to insert
      * @param appendInfo other field add to point
      * @return
      */
    private def appendPointWithMerge(point: IGeoPoint, appendInfo: Document): Int = {
        val nearestDocument = pointCollection.distinct(
            "classId",
            new Document(MongoDBScan.pointFieldName, MongoDBScan.getGeoSearchCondition(point, searchRadius)),
            classOf[java.lang.Integer]
        ).asScala.toSet

        try {
            if (nearestDocument.size <= 0) {
                throw new RuntimeException("Can't query data")
            } else if (nearestDocument.size == 1) {
                val classId = nearestDocument.head
                insertPoint(point, classId, appendInfo)
                classId
            } else {
                val classId = nearestDocument.min
                pointCollection.updateMany(
                    new Document(MongoDBScan.pointFieldName, MongoDBScan.getGeoSearchCondition(point, searchRadius)),
                    new Document("$set", new Document(MongoDBScan.classIdFieldName, classId))
                )
                insertPoint(point, classId, appendInfo)
                classId
            }
        } catch {
            case _: Throwable =>
                val classId = getNewClassId
                insertPoint(point, classId, appendInfo)
                classId
        }
    }

    /**
      * Insert one point into collection
      *
      * @param point      point to insert
      * @param classId    class of point
      * @param appendInfo other field add to point
      * @return
      */
    private def insertPoint(point: IGeoPoint, classId: Int, appendInfo: Document): ObjectId = {
        val document: Document = new Document(
            appendInfo
        ).append(
            MongoDBScan.classIdFieldName, classId
        ).append(
            MongoDBScan.pointFieldName, Document.parse(point.toGeoJSONString)
        )
        pointCollection.insertOne(document)
        document.getObjectId("_id")
    }

    /**
      * Get an unused class id
      *
      * @return
      */
    private def getNewClassId: Int = {
        if (pointCollection.count() == 0) {
            0
        } else {
            pointCollection.aggregate(
                IndexedSeq(
                    new Document(
                        "$group", new Document(
                            "_id", "id"
                        ).append(
                            "maxId", new Document(
                                "$max", "$" + MongoDBScan.classIdFieldName
                            )
                        )
                    )
                ).asJava
            ).first().getInteger("maxId") + 1
        }
    }


    /**
      * Generate polygon of some class
      *
      * @param classId class id
      */
    def generatePolygon(classId: Int): Unit = if (withPolygon) {

        removePolygon(classId)

        val points: IndexedSeq[Vector2] = pointCollection.find(new Document(MongoDBScan.classIdFieldName, classId)).asScala.map(doc => {
            val point = doc.get(MongoDBScan.pointFieldName, classOf[Document]).get("coordinates", classOf[java.util.ArrayList[Double]])
            Vector2(point.get(0), point.get(1))
        }).toIndexedSeq

        if (points.size >= 3) {
            val convexData = ConvexHull2(points).map(p => {
                GeoPoint(p.getX, p.getY)
            })
            if (convexData.size >= 3) {
                val polygon = GeoPolygon(convexData)
                polygonCollection.insertOne(new Document("_id", classId).append("area", Document.parse(polygon.toGeoJSONString)))
            } else {
                throw new RuntimeException("Less than 3 points got in polygon")
            }
        } else {
            throw new RuntimeException("Less than 3 points got in raw data")
        }
    }

    /**
      * Delete polygon of some class
      *
      * @param classId class id
      */
    def removePolygon(classId: Int): Unit = if (withPolygon) polygonCollection.deleteOne(new Document("_id", classId))

    /**
      * Regenerate all polygons
      */
    def regenerateAllPolygons(): Unit = if (withPolygon) {
        polygonCollection.deleteMany(new Document())
        val classSet = pointCollection.distinct(MongoDBScan.classIdFieldName, classOf[java.lang.Integer]).asScala.toSet
        classSet.foreach(x => try {
            generatePolygon(x.toInt)
        } catch {
            case ex: Throwable =>
                System.err.println("Error while processing polygon of class %d.".format(x.toInt))
                ex.printStackTrace()
        })
    }

    def getClusterResult: ClusterResult[Int, GeoPoint] = ClusterResult(
        pointCollection.find().asScala.map(
            doc => LabeledPoint(
                doc.getInteger(MongoDBScan.classIdFieldName).toInt,
                GeoJSONable.parseGeoPoint(doc.get(MongoDBScan.pointFieldName, classOf[Document]).toJson())
            )
        )
    )
}

object MongoDBScan {

    private val classIdFieldName: String = "classId"
    private val pointFieldName: String = "point"

    private def getGeoSearchCondition(point: IGeoPoint, searchRadius: Double): Document = new Document(
        "$nearSphere", new Document(
            "$geometry", Document.parse(point.toGeoJSONString)
        ).append(
            "$minDistance", 0.0
        ).append(
            "$maxDistance", searchRadius * 1000.0
        )
    )


}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy