All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.geomesa.gs.wfs.output.ArrowOutputFormat.scala Maven / Gradle / Ivy

There is a newer version: 3.4.1
Show newest version
/***********************************************************************
 * Copyright (c) 2013-2017 Commonwealth Computer Research, Inc.
 * All rights reserved. This program and the accompanying materials are
 * made available under the terms of the GNU GENERAL PUBLIC LICENSE,
 * Version 2 which accompanies this distribution and is available at
 * https://opensource.org/licenses/GPL-2.0.
 ***********************************************************************/

package org.geomesa.gs.wfs.output

import java.io.{BufferedOutputStream, OutputStream}

import com.typesafe.scalalogging.LazyLogging
import org.geoserver.config.GeoServer
import org.geoserver.ows.Response
import org.geoserver.platform.Operation
import org.geoserver.wfs.WFSGetFeatureOutputFormat
import org.geoserver.wfs.request.{FeatureCollectionResponse, GetFeatureRequest}
import org.geotools.data.simple.SimpleFeatureCollection
import org.locationtech.geomesa.arrow.ArrowProperties
import org.locationtech.geomesa.arrow.vector.SimpleFeatureVector.SimpleFeatureEncoding
import org.locationtech.geomesa.index.conf.QueryHints._
import org.locationtech.geomesa.index.planning.QueryPlanner
import org.locationtech.geomesa.process.transform.ArrowConversionProcess.ArrowVisitor
import org.locationtech.geomesa.utils.collection.CloseableIterator
import org.locationtech.geomesa.utils.io.WithClose
import org.opengis.feature.simple.SimpleFeatureType
import org.opengis.filter.sort.SortOrder

import scala.collection.JavaConversions._

/**
  * Output format for wfs requests that encodes features into arrow vector format.
  * To trigger, use outputFormat=application/vnd.arrow in your wfs request
  *
  * Optional flags:
  *   format_options=includeFids:;proxyFids:;dictionaryFields:,;
  *     useCachedDictionaries:;sortField:;sortReverse:;
  *     batchSize:;doublePass:
  *
  * @param geoServer geoserver
  */
class ArrowOutputFormat(geoServer: GeoServer)
    extends WFSGetFeatureOutputFormat(geoServer, Set("arrow", ArrowOutputFormat.MimeType)) with LazyLogging {

  override def getMimeType(value: AnyRef, operation: Operation): String = ArrowOutputFormat.MimeType

  override def getPreferredDisposition(value: AnyRef, operation: Operation): String = Response.DISPOSITION_INLINE

  override def getAttachmentFileName(value: AnyRef, operation: Operation): String = {
    val gfr = GetFeatureRequest.adapt(operation.getParameters()(0))
    val name = Option(gfr.getHandle).getOrElse(gfr.getQueries.get(0).getTypeNames.get(0).getLocalPart)
    s"$name.${ArrowOutputFormat.FileExtension}"
  }

  override def write(featureCollections: FeatureCollectionResponse,
                     output: OutputStream,
                     getFeature: Operation): Unit = {

    // format_options flags for customizing the request
    val request = GetFeatureRequest.adapt(getFeature.getParameters()(0))

    val hints: Map[AnyRef, AnyRef] = {
      import ArrowOutputFormat.Fields

      val builder = Map.newBuilder[AnyRef, AnyRef]
      builder += ARROW_ENCODE -> Boolean.box(true)

      val options = request.getFormatOptions.asInstanceOf[java.util.Map[String, String]]
      Option(options.get(Fields.IncludeFids)).foreach { option =>
        builder += ARROW_INCLUDE_FID -> java.lang.Boolean.valueOf(option)
      }
      Option(options.get(Fields.ProxyFids)).foreach { option =>
        builder += ARROW_PROXY_FID -> java.lang.Boolean.valueOf(option)
      }
      Option(options.get(Fields.DictionaryFields)).foreach { option =>
        builder += ARROW_DICTIONARY_FIELDS -> option
      }
      Option(options.get(Fields.UseCachedDictionaries)).foreach { option =>
        builder += ARROW_DICTIONARY_CACHED -> java.lang.Boolean.valueOf(option)
      }
      Option(options.get(Fields.SortField)).foreach { option =>
        builder += ARROW_SORT_FIELD -> option
      }
      Option(options.get(Fields.SortReverse)).foreach { option =>
        builder += ARROW_SORT_REVERSE -> java.lang.Boolean.valueOf(option)
      }
      Option(options.get(Fields.BatchSize)).foreach { option =>
        builder += ARROW_BATCH_SIZE -> java.lang.Integer.valueOf(option)
      }
      Option(options.get(Fields.DoublePass)).foreach { option =>
        builder += ARROW_DOUBLE_PASS -> java.lang.Boolean.valueOf(option)
      }

      builder.result()
    }

    // set hints into thread local state - this prevents any wrapping feature collections from messing with
    // the aggregation
    QueryPlanner.setPerThreadQueryHints(hints)

    try {
      WithClose(new BufferedOutputStream(output)) { bos =>
        import org.locationtech.geomesa.utils.conversions.ScalaImplicits.RichTraversableOnce

        import scala.collection.JavaConverters._
        featureCollections.getFeatures.asScala.foreachIndex { case (fc, i) =>
          WithClose(CloseableIterator(fc.asInstanceOf[SimpleFeatureCollection].features())) { iter =>
            // this check needs to be done *after* getting the feature iterator so that the return sft will be set
            val aggregated = fc.getSchema == org.locationtech.geomesa.arrow.ArrowEncodedSft
            if (aggregated) {
              // with distributed processing, encodings have already been computed in the servers
              iter.map(_.getAttribute(0).asInstanceOf[Array[Byte]]).foreach(bos.write(_))
            } else {
              // for non-encoded fs we do the encoding here
              logger.warn(s"Server side arrow aggregation is not enabled for feature collection '${fc.getClass}'")

              val includeFid = hints.get(ARROW_INCLUDE_FID).forall(_.asInstanceOf[Boolean])
              val proxyFid = hints.get(ARROW_PROXY_FID).exists(_.asInstanceOf[Boolean])
              val encoding = SimpleFeatureEncoding.min(includeFid, proxyFid)
              val dictionaries = hints.get(ARROW_DICTIONARY_FIELDS).map(_.asInstanceOf[String].split(",").toSeq).getOrElse(Seq.empty)
              val cacheDictionaries = hints.get(ARROW_DICTIONARY_CACHED).asInstanceOf[Option[Boolean]]
              val sortField = hints.get(ARROW_SORT_FIELD).asInstanceOf[Option[String]]
              val sortReverse = hints.get(ARROW_SORT_REVERSE).asInstanceOf[Option[Boolean]]
              val batchSize = hints.get(ARROW_BATCH_SIZE).asInstanceOf[Option[Int]].getOrElse(ArrowProperties.BatchSize.get.toInt)
              val doublePass = hints.get(ARROW_DOUBLE_PASS).asInstanceOf[Option[Boolean]].getOrElse(false)

              val preSorted = for (field <- sortField; reverse <- sortReverse.orElse(Some(false))) yield {
                request.getQueries.get(i).getSortBy.toSeq match {
                  case Seq(sort) =>
                    Option(sort.getPropertyName).exists(_.getPropertyName == field) &&
                        (sort.getSortOrder == SortOrder.DESCENDING) == reverse
                  case _ => false
                }
              }

              val visitor = new ArrowVisitor(fc.getSchema.asInstanceOf[SimpleFeatureType], encoding, dictionaries,
                cacheDictionaries, sortField, sortReverse, preSorted.getOrElse(false), batchSize, doublePass)

              iter.foreach(visitor.visit)

              visitor.getResult().results.foreach(bos.write(_))
            }
          }
        }
      }
    } finally {
      QueryPlanner.clearPerThreadQueryHints()
    }
  }
}

object ArrowOutputFormat extends LazyLogging {

  val MimeType      = "application/vnd.arrow"
  val FileExtension = "arrow"

  object Fields {
    // note: format option keys are always upper-cased by geoserver
    val IncludeFids           = "INCLUDEFIDS"
    val ProxyFids             = "PROXYFIDS"
    val DictionaryFields      = "DICTIONARYFIELDS"
    val UseCachedDictionaries = "USECACHEDDICTIONARIES"
    val SortField             = "SORTFIELD"
    val SortReverse           = "SORTREVERSE"
    val BatchSize             = "BATCHSIZE"
    val DoublePass            = "DOUBLEPASS"
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy