* Copyright (c) 2013-2017 Commonwealth Computer Research, Inc.
* All rights reserved. This program and the accompanying materials are
* made available under the terms of the GNU GENERAL PUBLIC LICENSE,
* Version 2 which accompanies this distribution and is available at
import{BufferedOutputStream, OutputStream}
import java.util.concurrent.atomic.AtomicBoolean
import java.util.concurrent.{CountDownLatch, Executors}
import javax.xml.namespace.QName
import com.typesafe.scalalogging.LazyLogging
import net.opengis.wfs.{GetFeatureType => GetFeatureTypeV1, QueryType => QueryTypeV1}
import net.opengis.wfs20.{GetFeatureType => GetFeatureTypeV2, QueryType => QueryTypeV2}
import org.geoserver.config.GeoServer
import org.geoserver.ows.Response
import org.geoserver.platform.Operation
import org.geoserver.wfs.WFSGetFeatureOutputFormat
import org.geoserver.wfs.request.{FeatureCollectionResponse, GetFeatureRequest}
import org.geotools.util.Version
import org.locationtech.geomesa.index.conf.QueryHints._
import org.locationtech.geomesa.index.planning.QueryPlanner
import org.locationtech.geomesa.index.utils.bin.BinSorter
import org.locationtech.geomesa.utils.bin.BinaryOutputEncoder.EncodingOptions
import org.locationtech.geomesa.utils.bin.{AxisOrder, BinaryOutputEncoder}
import org.locationtech.geomesa.utils.collection.{CloseableIterator, SelfClosingIterator}
import scala.collection.JavaConversions._
* Output format for wfs requests that encodes features into a binary format.
* To trigger, use outputFormat=application/vnd.binary-viewer in your wfs request
* Required flags:
* format_options=trackId:;
* Optional flags:
* format_options=trackId:;geom:;dtg:;label:
* @param geoServer handle to geoserver
class BinaryViewerOutputFormat(geoServer: GeoServer)
extends WFSGetFeatureOutputFormat(geoServer, Set("bin", BinaryViewerOutputFormat.MIME_TYPE)) with LazyLogging {
import BinaryViewerOutputFormat._
override def getMimeType(value: AnyRef, operation: Operation): String = MIME_TYPE
override def getPreferredDisposition(value: AnyRef, operation: Operation): String = Response.DISPOSITION_INLINE
override def getAttachmentFileName(value: AnyRef, operation: Operation): String = {
val gfr = GetFeatureRequest.adapt(operation.getParameters()(0))
val name = Option(gfr.getHandle).getOrElse(gfr.getQueries.get(0).getTypeNames.get(0).getLocalPart)
// if they have requested a label, then it will be 24 byte encoding (assuming the field exists...)
val size = if (gfr.getFormatOptions.containsKey(LABEL_FIELD)) "24" else "16"
override def write(featureCollections: FeatureCollectionResponse,
output: OutputStream,
getFeature: Operation): Unit = {
// format_options flags for customizing the request
val request = GetFeatureRequest.adapt(getFeature.getParameters()(0))
val trackId = Option(request.getFormatOptions.get(TRACK_ID_FIELD).asInstanceOf[String]).getOrElse {
throw new IllegalArgumentException(s"$TRACK_ID_FIELD is a required format option")
val geom = Option(request.getFormatOptions.get(GEOM_FIELD).asInstanceOf[String])
val dtg = Option(request.getFormatOptions.get(DATE_FIELD).asInstanceOf[String])
val label = Option(request.getFormatOptions.get(LABEL_FIELD).asInstanceOf[String])
val binSize = if (label.isDefined) 24 else 16
// depending on srs requested and wfs versions, axis order can be flipped
val axisOrder = checkAxisOrder(getFeature)
val requestedSort =
val bos = new BufferedOutputStream(output)
val sort = requestedSort || sys.props.getOrElse(SORT_SYS_PROP, DEFAULT_SORT).toBoolean
val tserverSort = sort || sys.props.getOrElse(PARTIAL_SORT_SYS_PROP, DEFAULT_SORT).toBoolean
val batchSize = sys.props.getOrElse(BATCH_SIZE_SYS_PROP, DEFAULT_BATCH_SIZE).toInt
// set hints into thread local state - this prevents any wrapping feature collections from messing with
// the aggregation
val hints = {
val some = Map(BIN_TRACK -> trackId, BIN_SORT -> tserverSort, BIN_BATCH_SIZE -> batchSize)
val opts = Map(BIN_GEOM -> geom, BIN_DTG -> dtg, BIN_LABEL -> label).collect { case (k, Some(v)) => k -> v }
(some ++ opts).asInstanceOf[Map[AnyRef, AnyRef]]
try {
featureCollections.getFeatures.foreach { fc =>
val iter = CloseableIterator(fc.asInstanceOf[SimpleFeatureCollection].features())
// this check needs to be done *after* getting the feature iterator so that the return sft will be set
val schema = fc.asInstanceOf[SimpleFeatureCollection].getSchema
val aggregated = schema == BinaryOutputEncoder.BinEncodedSft
if (aggregated) {
// for accumulo, encodings have already been computed in the tservers
val aggregates =[Array[Byte]])
if (sort) {
// we do some asynchronous pre-merging while we are waiting for all the data to come in
// the pre-merging is expensive, as it merges in memory
// the final merge doesn't have to allocate space for merging, as it writes directly to the output
val numThreads = sys.props.getOrElse(SORT_THREADS_SYS_PROP, DEFAULT_SORT_THREADS).toInt
val executor = Executors.newFixedThreadPool(numThreads)
// access to this is manually synchronized so we can pull off 2 items at once
val mergeQueue = collection.mutable.PriorityQueue.empty[Array[Byte]](new Ordering[Array[Byte]] {
// shorter first
override def compare(x: Array[Byte], y: Array[Byte]): Int = y.length.compareTo(x.length)
// holds buffers we don't want to consider anymore due to there size - also manually synchronized
val doneMergeQueue = collection.mutable.ArrayBuffer.empty[Array[Byte]]
val maxSizeToMerge = sys.props.getOrElse(SORT_HEAP_SYS_PROP, DEFAULT_SORT_HEAP).toInt
val latch = new CountDownLatch(numThreads)
val keepMerging = new AtomicBoolean(true)
var i = 0
while (i < numThreads) {
executor.submit(new Runnable() {
override def run(): Unit = {
while (keepMerging.get()) {
// pull out the 2 smallest items to merge
// the final merge has to compare the first item in each buffer
// so reducing the number of buffers helps
val (left, right) = mergeQueue.synchronized {
if (mergeQueue.length > 1) {
(mergeQueue.dequeue(), mergeQueue.dequeue())
} else {
(null, null)
if (left != null) { // right will also not be null
if (right.length > maxSizeToMerge) {
if (left.length > maxSizeToMerge) {
doneMergeQueue.synchronized(doneMergeQueue.append(left, right))
} else {
} else {
val result = BinSorter.mergeSort(left, right, binSize)
} else {
// if we didn't find anything to merge, wait a bit before re-checking
latch.countDown() // indicate this thread is done
i += 1
// queue up the aggregates coming in so that they can be processed by the merging threads above
aggregates.foreach(a => mergeQueue.synchronized(mergeQueue.enqueue(a)))
// once all data is back from the tservers, stop pre-merging and start streaming back to the client
executor.shutdown() // this won't stop the threads, but will cleanup once they're done
latch.await() // wait for the merge threads to finish
// get an iterator that returns in sorted order
val bins = BinSorter.mergeSort((doneMergeQueue ++ mergeQueue).iterator, binSize)
while (bins.hasNext) {
val (aggregate, offset) =
bos.write(aggregate, offset, binSize)
} else {
// no sort, just write directly to the output
} else {
logger.warn(s"Server side bin aggregation is not enabled for feature collection '${fc.getClass}'")
// for non-accumulo fs we do the encoding here
val sfc = fc.asInstanceOf[SimpleFeatureCollection]
val geomIndex = != -1)
val dtgIndex = != -1)
val trackIndex = Some(trackId).map(schema.indexOf).filter(_ != -1)
val labelIndex = != -1)
val options = EncodingOptions(geomIndex, dtgIndex, trackIndex, labelIndex, Some(axisOrder))
val encoder = BinaryOutputEncoder(schema, options)
encoder.encode(SelfClosingIterator(sfc.features()), bos, sort)
} finally {
// none of the implementations in geoserver call 'close' on the output stream
object BinaryViewerOutputFormat extends LazyLogging {
val MIME_TYPE = "application/vnd.binary-viewer"
val FILE_EXTENSION = "bin"
val SORT_SYS_PROP = "geomesa.output.bin.sort"
val PARTIAL_SORT_SYS_PROP = "geomesa.output.bin.sort.partial"
val SORT_THREADS_SYS_PROP = "geomesa.output.bin.sort.threads"
val SORT_HEAP_SYS_PROP = "geomesa.output.bin.sort.memory"
val BATCH_SIZE_SYS_PROP = "geomesa.output.bin.batch.size"
val DEFAULT_SORT = "false"
val DEFAULT_SORT_HEAP = "2097152" // 2MB
val DEFAULT_BATCH_SIZE = "65536" // 1MB for 16 byte bins
// constants used to determine axis order from geoserver
val wfsVersion1 = new Version("1.0.0")
val srsVersionOnePrefix = ""
val srsVersionOnePlusPrefix = "urn:x-ogc:def:crs:epsg:"
val srsNonStandardPrefix = "epsg:"
* Determines the order of lat/lon in simple features returned by this request.
* See for details
* on how geoserver handles axis order.
* @param getFeature operation
* @return
def checkAxisOrder(getFeature: Operation): AxisOrder.AxisOrder =
getSrs(getFeature) match {
// if an explicit SRS is requested, that takes priority
// SRS format associated with WFS 1.1.0 and 2.0.0 - lat is first
case Some(srs) if srs.toLowerCase.startsWith(srsVersionOnePlusPrefix) => AxisOrder.LatLon
// SRS format associated with WFS 1.0.0 - lon is first
case Some(srs) if srs.toLowerCase.startsWith(srsVersionOnePrefix) => AxisOrder.LonLat
// non-standard SRS format - geoserver puts lon first
case Some(srs) if srs.toLowerCase.startsWith(srsNonStandardPrefix) => AxisOrder.LonLat
case Some(srs) =>
val valid = s"${srsVersionOnePrefix}xxxx, ${srsVersionOnePlusPrefix}xxxx, ${srsNonStandardPrefix}xxxx"
throw new IllegalArgumentException(s"Invalid SRS format: '$srs'. Valid options are: $valid")
// if no explicit SRS: wfs 1.0.0 stores x = lon y = lat, anything greater stores x = lat y = lon
case None => if (getFeature.getService.getVersion.compareTo(wfsVersion1) > 0) AxisOrder.LatLon else AxisOrder.LonLat
def getTypeName(getFeature: Operation): Option[QName] = {
val typeNamesV2 = getFeatureTypeV2(getFeature)
val typeNamesV1 = getFeatureTypeV1(getFeature)
val typeNames = typeNamesV2 ++ typeNamesV1
if (typeNames.lengthCompare(1) > 0) {
logger.warn(s"Multiple TypeNames detected in binary format request (using first): $typeNames")
* Function to pull requested SRS out of a WFS request
* @param getFeature operation
* @return
def getSrs(getFeature: Operation): Option[String] =
* Function to pull requested SRS out of WFS 1.0.0/1.1.0 request
* @param getFeatureType operation
* @return
def getSrs(getFeatureType: GetFeatureTypeV1): Option[String] =
getQueryType(getFeatureType).flatMap(qt => Option(qt.getSrsName)).map(_.toString)
* Function to pull requested SRS out of WFS 2 request
* @param getFeatureType operation
* @return
def getSrs(getFeatureType: GetFeatureTypeV2): Option[String] =
getQueryType(getFeatureType).flatMap(qt => Option(qt.getSrsName)).map(_.toString)
* @param getFeature operation
* @return
def getFeatureTypeV2(getFeature: Operation): Option[GetFeatureTypeV2] =
* @param getFeature operation
* @return
def getFeatureTypeV1(getFeature: Operation): Option[GetFeatureTypeV1] =
* Pull out query object from request
* @param getFeatureType operation
* @return
def getQueryType(getFeatureType: GetFeatureTypeV1): Option[QueryTypeV1] =
* Pull out query object from request
* @param getFeatureType operation
* @return
def getQueryType(getFeatureType: GetFeatureTypeV2): Option[QueryTypeV2] =