package lightdb.lucene
import fabric._
import fabric.define.DefType
import{Asable, Convertible}
import lightdb.SortDirection.Ascending
import lightdb.aggregate.{AggregateQuery, AggregateType}
import lightdb.collection.Collection
import lightdb._
import lightdb.field.Field._
import lightdb.doc.{Document, DocumentModel, JsonConversion}
import lightdb.facet.{FacetResult, FacetResultValue}
import lightdb.field.{Field, IndexingState}
import lightdb.filter.{Condition, Filter}
import lightdb.lucene.index.Index
import lightdb.materialized.{MaterializedAggregate, MaterializedAndDoc, MaterializedIndex}
import lightdb.spatial.{DistanceAndDoc, Geo, Spatial}
import{Conversion, Store, StoreManager, StoreMode}
import lightdb.transaction.Transaction
import lightdb.util.Aggregator
import org.apache.lucene.document.{DoubleField, DoublePoint, IntField, IntPoint, LatLonDocValuesField, LatLonPoint, LatLonShape, LongField, LongPoint, NumericDocValuesField, SortedDocValuesField, SortedNumericDocValuesField, StoredField, StringField, TextField, Document => LuceneDocument, Field => LuceneField}
import org.apache.lucene.facet.taxonomy.FastTaxonomyFacetCounts
import org.apache.lucene.geo.{Line, Polygon}
import{BooleanClause, BooleanQuery, BoostQuery, FieldExistsQuery, IndexSearcher, MatchAllDocsQuery, MultiCollectorManager, PrefixQuery, RegexpQuery, ScoreDoc, SearcherFactory, SearcherManager, SortField, SortedNumericSortField, TermQuery, TopFieldCollector, TopFieldCollectorManager, TopFieldDocs, TotalHitCountCollector, TotalHitCountCollectorManager, WildcardQuery, Query => LuceneQuery, Sort => LuceneSort}
import org.apache.lucene.index.{DirectoryReader, SegmentInfos, SegmentReader, StoredFields, Term}
import org.apache.lucene.queryparser.classic.QueryParser
import org.apache.lucene.util.{BytesRef, Version}
import org.apache.lucene.facet.{DrillDownQuery, FacetsCollector, FacetsCollectorManager, FacetsConfig, FacetField => LuceneFacetField}
import java.nio.file.{Files, Path}
import scala.language.implicitConversions
import scala.util.Try
class LuceneStore[Doc <: Document[Doc], Model <: DocumentModel[Doc]](directory: Option[Path], val storeMode: StoreMode) extends Store[Doc, Model] {
private lazy val index = Index(directory)
private lazy val facetsConfig: FacetsConfig = {
val c = new FacetsConfig
fields.foreach {
case ff: FacetField[_] =>
if (ff.hierarchical) c.setHierarchical(, ff.hierarchical)
if (ff.multiValued) c.setMultiValued(, ff.multiValued)
if (ff.requireDimCount) c.setRequireDimCount(, ff.requireDimCount)
case _ => // Ignore
private lazy val hasFacets: Boolean = fields.exists(_.isInstanceOf[FacetField[_]])
private def facetsPrepareDoc(doc: LuceneDocument): LuceneDocument = if (hasFacets) {, doc)
} else {
override def init(collection: Collection[Doc, Model]): Unit = {
directory.foreach { path =>
if (Files.exists(path)) {
val directory =
val reader =
reader.leaves().forEach { leaf =>
val dataVersion = leaf.reader().asInstanceOf[SegmentReader]
val latest = Version.LATEST
if (latest != dataVersion) {
// TODO: Support re-indexing"Data Version: $dataVersion, Latest Version: $latest")
override def prepareTransaction(transaction: Transaction[Doc]): Unit = transaction.put(
key = StateKey[Doc],
value = LuceneState[Doc](index, hasFacets)
override def insert(doc: Doc)(implicit transaction: Transaction[Doc]): Unit = {
addDoc(doc, upsert = false)
override def upsert(doc: Doc)(implicit transaction: Transaction[Doc]): Unit = {
addDoc(doc, upsert = true)
private def createGeoFields(field: Field[Doc, _],
json: Json,
add: LuceneField => Unit): Unit = {
field.className match {
case Some("lightdb.spatial.Geo.Point") =>
val p =[Geo.Point]
try {
add(new LatLonPoint(, p.latitude, p.longitude))
} catch {
case t: Throwable => throw new RuntimeException(s"Failed to add LatLonPoint(${}, ${p.latitude}, ${p.longitude}): ${JsonFormatter.Default(json)}", t)
case _ =>
def indexPoint(p: Geo.Point): Unit = try {
LatLonShape.createIndexableFields(, p.latitude, p.longitude)
} catch {
case t: Throwable => throw new RuntimeException(s"Failed to add LatLonPoint.createIndexableFields(${}, ${p.latitude}, ${p.longitude}): ${JsonFormatter.Default(json)}", t)
def indexLine(l: Geo.Line): Unit = {
val line = new Line(,
LatLonShape.createIndexableFields(, line)
def indexPolygon(p: Geo.Polygon): Unit = {
def convert(p: Geo.Polygon): Polygon =
new Polygon(,
val polygon = convert(p)
LatLonShape.createIndexableFields(, polygon)
def indexGeo(geo: Geo): Unit = geo match {
case p: Geo.Point => indexPoint(p)
case Geo.MultiPoint(points) => points.foreach(indexPoint)
case l: Geo.Line => indexLine(l)
case Geo.MultiLine(lines) => lines.foreach(indexLine)
case p: Geo.Polygon => indexPolygon(p)
case Geo.MultiPolygon(polygons) => polygons.foreach(indexPolygon)
case Geo.GeometryCollection(geometries) => geometries.foreach(indexGeo)
val list = json match {
case Arr(value, _) =>[Geo])
case _ => List([Geo])
list.foreach { geo =>
add(new LatLonPoint(,,
if (list.isEmpty) {
add(new LatLonPoint(, 0.0, 0.0))
add(new StoredField(, JsonFormatter.Compact(json)))
private def createLuceneFields(field: Field[Doc, _], doc: Doc, state: IndexingState): List[LuceneField] = {
def fs: LuceneField.Store = if (storeMode == StoreMode.All || field.indexed) LuceneField.Store.YES else LuceneField.Store.NO
val json = field.getJson(doc, state)
var fields = List.empty[LuceneField]
def add(field: LuceneField): Unit = fields = field :: fields
field match {
case ff: FacetField[Doc] => ff.get(doc, ff, state).flatMap { value =>
if (value.path.nonEmpty || ff.hierarchical) {
val path = if (ff.hierarchical) value.path ::: List("$ROOT$") else value.path
Some(new LuceneFacetField(, path: _*))
} else {
case t: Tokenized[Doc] =>
List(new LuceneField(, t.get(doc, t, state), if (fs == LuceneField.Store.YES) TextField.TYPE_STORED else TextField.TYPE_NOT_STORED))
case _ =>
def addJson(json: Json, d: DefType): Unit = {
if (field.isSpatial) {
if (json != Null) createGeoFields(field, json, add)
} else {
d match {
case DefType.Str => json match {
case Null => add(new StringField(, Field.NullString, fs))
case _ => add(new StringField(, json.asString, fs))
case DefType.Enum(_, _) => add(new StringField(, json.asString, fs))
case DefType.Opt(d) => addJson(json, d)
case DefType.Json | DefType.Obj(_, _) => add(new StringField(, JsonFormatter.Compact(json), fs))
case _ if json == Null => // Ignore null values
case DefType.Arr(d) => json.asVector.foreach(json => addJson(json, d))
case DefType.Bool => add(new IntField(, if (json.asBoolean) 1 else 0, fs))
case DefType.Int => add(new LongField(, json.asLong, fs))
case DefType.Dec => add(new DoubleField(, json.asDouble, fs))
case _ => throw new UnsupportedOperationException(s"Unsupported definition (field: ${}, className: ${field.className}): $d for $json")
val fieldSortName = s"${}Sort"
field.getJson(doc, state) match {
case Str(s, _) =>
val bytes = new BytesRef(s)
val sorted = new SortedDocValuesField(fieldSortName, bytes)
case NumInt(l, _) => add(new NumericDocValuesField(fieldSortName, l))
case j if field.isSpatial && j != Null =>
val list = j match {
case Arr(values, _) =>[Geo])
case _ => List([Geo])
list.foreach { g =>
add(new LatLonDocValuesField(fieldSortName,,
case _ => // Ignore
private def addDoc(doc: Doc, upsert: Boolean): Unit = if (fields.tail.nonEmpty) {
val id =
val state = new IndexingState
val luceneFields = fields.flatMap { field =>
createLuceneFields(field, doc, state)
val document = new LuceneDocument
if (upsert) {
index.indexWriter.updateDocument(new Term("_id", id.value), facetsPrepareDoc(document))
} else {
override def exists(id: Id[Doc])(implicit transaction: Transaction[Doc]): Boolean = get(idField, id).nonEmpty
override def get[V](field: UniqueIndex[Doc, V], value: V)
(implicit transaction: Transaction[Doc]): Option[Doc] = {
val filter = Filter.Equals(field, value)
val query = Query[Doc, Model](collection, filter = Some(filter), limit = Some(1))
doSearch[Doc](query, Conversion.Doc()).list.headOption
override def delete[V](field: UniqueIndex[Doc, V], value: V)(implicit transaction: Transaction[Doc]): Boolean = {
val query = filter2Lucene(Some(field === value))
override def count(implicit transaction: Transaction[Doc]): Int =
state.indexSearcher.count(new MatchAllDocsQuery)
override def iterator(implicit transaction: Transaction[Doc]): Iterator[Doc] =
doSearch[Doc](Query[Doc, Model](collection), Conversion.Doc()).iterator
override def doSearch[V](query: Query[Doc, Model], conversion: Conversion[Doc, V])
(implicit transaction: Transaction[Doc]): SearchResults[Doc, Model, V] = {
val q: LuceneQuery = filter2Lucene(query.filter)
val sortFields = query.sort match {
case Nil => List(SortField.FIELD_SCORE)
case _ =>
val s = new LuceneSort(sortFields: _*)
val indexSearcher = state.indexSearcher
var facetsCollectorManager: Option[FacetsCollectorManager] = None
val limit = => math.min(l, 100)).getOrElse(100) + query.offset
if (limit <= 0) throw new RuntimeException(s"Limit must be a positive value, but set to $limit")
var facetResults: Map[FacetField[Doc], FacetResult] = Map.empty
def search(total: Option[Int]): TopFieldDocs = {
val hitCountCollectorManager = new TotalHitCountCollectorManager(indexSearcher.getSlices)
val topFieldCollectorManager = new TopFieldCollectorManager(s, total.getOrElse(limit), Int.MaxValue)
if (query.facets.nonEmpty) {
facetsCollectorManager = Some(new FacetsCollectorManager(query.scoreDocs))
val collectors = List(
Some(hitCountCollectorManager), Some(topFieldCollectorManager), facetsCollectorManager
// TODO: Support exclusion of hitCountCollectorManager if countTotal is false
val manager = new MultiCollectorManager(collectors: _*)
val resultCollectors =, manager).toVector
val actualCount = resultCollectors(0).asInstanceOf[java.lang.Integer].intValue()
val topFieldDocs = resultCollectors(1).asInstanceOf[TopFieldDocs]
val facetsCollector = if (facetsCollectorManager.nonEmpty) Some(resultCollectors(2).asInstanceOf[FacetsCollector]) else None
facetResults = facetsCollector match {
case Some(fc) =>
val facets = new FastTaxonomyFacetCounts(state.taxonomyReader, facetsConfig, fc) { fq =>
Option(fq.childrenLimit match {
case Some(l) => facets.getTopChildren(l,, fq.path: _*)
case None => facets.getAllChildren(, fq.path: _*)
}) match {
case Some(facetResult) =>
val values = if (facetResult.childCount > 0) { { lv =>
FacetResultValue(lv.label, lv.value.intValue())
} else {
val updatedValues = values.filterNot(_.value == "$ROOT$")
val totalCount =
fq.field -> FacetResult(updatedValues, facetResult.childCount, totalCount)
case None =>
fq.field -> FacetResult(Nil, 0, 0)
case None => Map.empty
val totalHits = total.getOrElse(actualCount)
if (totalHits > topFieldDocs.scoreDocs.length && total.isEmpty && query.limit.forall(l => l + query.offset > limit)) {
search(Some( => math.min(l, totalHits)).getOrElse(totalHits)))
} else {
val topFieldDocs: TopFieldDocs = search(None)
val scoreDocs: List[ScoreDoc] = {
val list = topFieldDocs
.map { scoreDoc =>
if (query.scoreDocs) {
val explanation = indexSearcher.explain(q, scoreDoc.doc)
// TODO: Add explanation info
new ScoreDoc(scoreDoc.doc, explanation.getValue.floatValue())
} else {
query.minDocScore match {
case Some(min) => list.filter(_.score.toDouble >= min)
case None => list
val total: Int = topFieldDocs.totalHits.value.toInt
val storedFields: StoredFields = indexSearcher.storedFields()
val idsAndScores = => Id[Doc](storedFields.document(doc.doc).get("_id")) -> doc.score.toDouble)
def jsonField[F](scoreDoc: ScoreDoc, field: Field[Doc, F]): Json = {
val values = storedFields.document(scoreDoc.doc).getValues(
.map(s => Field.string2Json(, s,
if (values.nonEmpty && values.head.isArr) {
} else {
if (values.length > 1) {
throw new RuntimeException(s"Failure: $values, ${values.head.getClass}")
def value[F](scoreDoc: ScoreDoc, field: Field[Doc, F]): F = jsonField[F](scoreDoc, field).as[F](
def loadScoreDoc(scoreDoc: ScoreDoc): (Doc, Double) = if (storeMode == StoreMode.All) {
collection.model match {
case c: JsonConversion[Doc] =>
val o = obj( => -> jsonField(scoreDoc, f)): _*)
c.convertFromJson(o) -> scoreDoc.score.toDouble
case _ =>
val map = { field => -> value(scoreDoc, field)
collection.model.map2Doc(map) -> scoreDoc.score.toDouble
} else {
val docId = scoreDoc.doc
val id = Id[Doc](storedFields.document(docId).get("_id"))
val score = scoreDoc.score.toDouble
collection(id)(transaction) -> score
def docIterator(): Iterator[(Doc, Double)] =
def jsonIterator(fields: List[Field[Doc, _]]): Iterator[(ScoreDoc, Json, Double)] = { { scoreDoc =>
val json = obj( { field => -> jsonField(scoreDoc, field)
}: _*)
val score = scoreDoc.score.toDouble
(scoreDoc, json, score)
val iterator: Iterator[(V, Double)] = conversion match {
case Conversion.Value(field) => { scoreDoc =>
value(scoreDoc, field) -> scoreDoc.score.toDouble
case Conversion.Doc() => docIterator().asInstanceOf[Iterator[(V, Double)]]
case Conversion.Converted(c) => docIterator().map {
case (doc, score) => c(doc) -> score
case Conversion.Materialized(fields) => jsonIterator(fields).map {
case (_, json, score) => MaterializedIndex[Doc, Model](json, collection.model).asInstanceOf[V] -> score
case Conversion.DocAndIndexes() => jsonIterator(fields.filter(_.indexed)).map {
case (scoreDoc, json, score) => MaterializedAndDoc[Doc, Model](json, collection.model, loadScoreDoc(scoreDoc)._1).asInstanceOf[V] -> score
case Conversion.Json(fields) => jsonIterator(fields).map(t => t._2 -> t._3).asInstanceOf[Iterator[(V, Double)]]
case Conversion.Distance(field, from, sort, radius) => {
case (id, score) =>
val state = new IndexingState
val doc = collection(id)(transaction)
val distance = field.get(doc, field, state).map(d => Spatial.distance(from, d))
DistanceAndDoc(doc, distance) -> score
model = collection.model,
offset = query.offset,
limit = query.limit,
total = Some(total),
iteratorWithScore = iterator,
facetResults = facetResults,
transaction = transaction
private def filter2Lucene(filter: Option[Filter[Doc]]): LuceneQuery = filter match {
case Some(f) =>
val fields = f.fields(collection.model)
def parsed(q: String, allowLeading: Boolean = false): LuceneQuery = {
val parser = new QueryParser(f.fieldNames.head, this.index.analyzer)
f match {
case f: Filter.Equals[Doc, _] => exactQuery(f.field(collection.model), f.getJson(collection.model))
case f: Filter.NotEquals[Doc, _] =>
val b = new BooleanQuery.Builder
b.add(new MatchAllDocsQuery, BooleanClause.Occur.MUST)
b.add(exactQuery(f.field(collection.model), f.getJson(collection.model)), BooleanClause.Occur.MUST_NOT)
case f: Filter.Regex[Doc, _] => new RegexpQuery(new Term(f.fieldName, f.expression))
case f: Filter.In[Doc, _] =>
val queries = f.getJson(collection.model).map(json => exactQuery(f.field(collection.model), json))
val b = new BooleanQuery.Builder
queries.foreach { q =>
b.add(q, BooleanClause.Occur.SHOULD)
case Filter.RangeLong(fieldName, from, to) => LongField.newRangeQuery(fieldName, from.getOrElse(Long.MinValue), to.getOrElse(Long.MaxValue))
case Filter.RangeDouble(fieldName, from, to) => DoubleField.newRangeQuery(fieldName, from.getOrElse(Double.MinValue), to.getOrElse(Double.MaxValue))
case Filter.StartsWith(_, query) if fields.head.isTokenized => parsed(s"$query*")
case Filter.EndsWith(_, query) if fields.head.isTokenized => parsed(s"*$query", allowLeading = true)
case Filter.Contains(_, query) if fields.head.isTokenized => parsed(s"*$query*", allowLeading = true)
case Filter.Exact(_, query) if fields.head.isTokenized => parsed(query)
case Filter.StartsWith(fieldName, query) => new WildcardQuery(new Term(fieldName, s"$query*"))
case Filter.EndsWith(fieldName, query) => new WildcardQuery(new Term(fieldName, s"*$query"))
case Filter.Contains(fieldName, query) => new WildcardQuery(new Term(fieldName, s"*$query*"))
case Filter.Exact(fieldName, query) => new WildcardQuery(new Term(fieldName, query))
case Filter.Distance(fieldName, from, radius) =>
val b = new BooleanQuery.Builder
b.add(LatLonPoint.newDistanceQuery(fieldName, from.latitude, from.longitude, radius.toMeters), BooleanClause.Occur.MUST)
b.add(LatLonPoint.newBoxQuery(fieldName, 0.0, 0.0, 0.0, 0.0), BooleanClause.Occur.MUST_NOT)
case Filter.Multi(minShould, clauses) =>
val b = new BooleanQuery.Builder
val hasShould = clauses.exists(c => c.condition == Condition.Should || c.condition == Condition.Filter)
val minShouldMatch = if (hasShould) minShould else 0
clauses.foreach { c =>
val q = filter2Lucene(Some(c.filter))
val query = c.boost match {
case Some(boost) => new BoostQuery(q, boost.toFloat)
case None => q
val occur = c.condition match {
case Condition.Must => BooleanClause.Occur.MUST
case Condition.MustNot => BooleanClause.Occur.MUST_NOT
case Condition.Filter => BooleanClause.Occur.FILTER
case Condition.Should => BooleanClause.Occur.SHOULD
b.add(query, occur)
if (minShouldMatch == 0 && !clauses.exists(_.condition == Condition.Must)) {
b.add(new MatchAllDocsQuery, BooleanClause.Occur.MUST)
case Filter.DrillDownFacetFilter(fieldName, path, showOnlyThisLevel) =>
val indexedFieldName = facetsConfig.getDimConfig(fieldName).indexFieldName
val exactPath = if (showOnlyThisLevel) {
path ::: List("$ROOT$")
} else {
new TermQuery(DrillDownQuery.term(indexedFieldName, fieldName, exactPath: _*))
case None => new MatchAllDocsQuery
private def exactQuery(field: Field[Doc, _], json: Json): LuceneQuery = json match {
case Str(s, _) if field.isInstanceOf[Tokenized[_]] =>
val b = new BooleanQuery.Builder
s.split("\\s+").foreach(s => b.add(new TermQuery(new Term(, s)), BooleanClause.Occur.MUST))
case Str(s, _) => new TermQuery(new Term(, s))
case Bool(b, _) => IntPoint.newExactQuery(, if (b) 1 else 0)
case NumInt(l, _) => LongPoint.newExactQuery(, l)
case NumDec(bd, _) => DoublePoint.newExactQuery(, bd.toDouble)
case Arr(v, _) =>
val b = new BooleanQuery.Builder
v.foreach { json =>
val q = exactQuery(field, json)
b.add(q, BooleanClause.Occur.MUST)
case Null => match {
case DefType.Opt(DefType.Str) => new TermQuery(new Term(, Field.NullString))
case _ => new TermQuery(new Term(, "null"))
case json => throw new RuntimeException(s"Unsupported equality check: $json (${})")
private def sort2SortField(sort: Sort): SortField = {
sort match {
case Sort.BestMatch => SortField.FIELD_SCORE
case Sort.IndexOrder => SortField.FIELD_DOC
case Sort.ByField(field, dir) =>
val fieldSortName = s"${}Sort"
def st(d: DefType): SortField.Type = d match {
case DefType.Str => SortField.Type.STRING
case DefType.Dec => SortField.Type.DOUBLE
case DefType.Int => SortField.Type.LONG
case DefType.Opt(t) => st(t)
case DefType.Arr(t) => st(t)
case _ => throw new RuntimeException(s"Unsupported sort type for ${}")
val sortType = st(
def sf(d: DefType): SortField = d match {
case DefType.Int | DefType.Dec => new SortedNumericSortField(fieldSortName, sortType, dir == SortDirection.Descending)
case DefType.Str => new SortField(fieldSortName, sortType, dir == SortDirection.Descending)
case DefType.Opt(t) => sf(t)
case DefType.Arr(t) => sf(t)
case d => throw new RuntimeException(s"Unsupported sort definition: $d")
case Sort.ByDistance(field, from, _) =>
val fieldSortName = s"${}Sort"
LatLonDocValuesField.newDistanceSort(fieldSortName, from.latitude, from.longitude)
override def aggregate(query: AggregateQuery[Doc, Model])
(implicit transaction: Transaction[Doc]): Iterator[MaterializedAggregate[Doc, Model]] =
Aggregator(query, collection)
override def aggregateCount(query: AggregateQuery[Doc, Model])(implicit transaction: Transaction[Doc]): Int =
override def truncate()(implicit transaction: Transaction[Doc]): Int = {
val count = this.count
override def dispose(): Unit = Try {
object LuceneStore extends StoreManager {
override def create[Doc <: Document[Doc], Model <: DocumentModel[Doc]](db: LightDB, name: String, storeMode: StoreMode): Store[Doc, Model] =
new LuceneStore[Doc, Model]("$name.lucene")), storeMode)
