org.opentripplanner.common.LuceneIndex Maven / Gradle / Ivy
package org.opentripplanner.common;
import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.DoubleField;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.*;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.onebusaway.gtfs.model.Stop;
import org.opentripplanner.profile.StopCluster;
import org.opentripplanner.routing.edgetype.StreetEdge;
import org.opentripplanner.routing.graph.GraphIndex;
import org.opentripplanner.routing.vertextype.StreetVertex;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* Lucene based index of streets, stops, etc.
* For reference see:
* https://svn.apache.org/repos/asf/lucene/dev/trunk/lucene/demo/src/java/org/apache/lucene/demo/IndexFiles.java
*/
public class LuceneIndex {
private static final Logger LOG = LoggerFactory.getLogger(LuceneIndex.class);
private Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_47);
private QueryParser parser = new QueryParser(Version.LUCENE_47, "name", analyzer);
private GraphIndex graphIndex;
private File basePath;
private Directory directory; // the Lucene Directory, not to be confused with a filesystem directory
private IndexSearcher searcher; // Will be null until index is built.
/**
* @param basePath the filesystem location under which to save indexes
* @param background if true, perform the initial indexing in a background thread, if false block to index
*/
public LuceneIndex(final GraphIndex graphIndex, File basePath, boolean background) {
this.graphIndex = graphIndex;
this.basePath = basePath;
if (background) {
new BackgroundIndexer().start();
} else {
new BackgroundIndexer().run();
}
}
/**
* Index stations, stops, intersections, streets, and addresses by name and location.
*/
private void index() {
try {
long startTime = System.currentTimeMillis();
/* Create or re-open a disk-backed Lucene Directory under the OTP server base filesystem directory. */
directory = FSDirectory.open(new File(basePath, "lucene"));
// TODO reuse the index if it exists?
//directory = new RAMDirectory(); // only a little faster
IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_47, analyzer).setOpenMode(OpenMode.CREATE);
final IndexWriter writer = new IndexWriter(directory, config);
for (Stop stop : graphIndex.stopForId.values()) {
addStop(writer, stop);
}
graphIndex.clusterStopsAsNeeded();
for (StopCluster stopCluster : graphIndex.stopClusterForId.values()) {
addCluster(writer, stopCluster);
}
for (StreetVertex sv : Iterables.filter(graphIndex.vertexForId.values(), StreetVertex.class)) {
addCorner(writer, sv);
}
writer.close();
long elapsedTime = System.currentTimeMillis() - startTime;
LOG.info("Built Lucene index in {} msec", elapsedTime);
// Make the IndexSearcher necessary for querying.
searcher = new IndexSearcher(DirectoryReader.open(directory));
} catch (Exception ex) {
throw new RuntimeException("Lucene indexing failed.", ex);
}
}
private void addStop(IndexWriter iwriter, Stop stop) throws IOException {
Document doc = new Document();
doc.add(new TextField("name", stop.getName(), Field.Store.YES));
if (stop.getCode() != null) {
doc.add(new StringField("code", stop.getCode(), Field.Store.YES));
}
doc.add(new DoubleField("lat", stop.getLat(), Field.Store.YES));
doc.add(new DoubleField("lon", stop.getLon(), Field.Store.YES));
doc.add(new StringField("id", stop.getId().toString(), Field.Store.YES));
doc.add(new StringField("category", Category.STOP.name(), Field.Store.YES));
iwriter.addDocument(doc);
}
private void addCluster(IndexWriter iwriter, StopCluster stopCluster) throws IOException {
Document doc = new Document();
doc.add(new TextField("name", stopCluster.name, Field.Store.YES));
doc.add(new DoubleField("lat", stopCluster.lat, Field.Store.YES));
doc.add(new DoubleField("lon", stopCluster.lon, Field.Store.YES));
doc.add(new StringField("id", stopCluster.id, Field.Store.YES));
doc.add(new StringField("category", Category.CLUSTER.name(), Field.Store.YES));
iwriter.addDocument(doc);
}
private void addCorner(IndexWriter iwriter, StreetVertex sv) throws IOException {
String mainStreet = null;
String crossStreet = null;
// TODO score based on OSM street type, using intersection nodes instead of vertices.
for (StreetEdge pse : Iterables.filter(sv.getOutgoing(), StreetEdge.class)) {
if (mainStreet == null) mainStreet = pse.getName();
else crossStreet = pse.getName();
}
if (mainStreet == null || crossStreet == null) return;
if (mainStreet.equals(crossStreet)) return;
Document doc = new Document();
doc.add(new TextField("name", mainStreet + " & " + crossStreet, Field.Store.YES));
doc.add(new DoubleField("lat", sv.getLat(), Field.Store.YES));
doc.add(new DoubleField("lon", sv.getLon(), Field.Store.YES));
doc.add(new StringField("category", Category.CORNER.name(), Field.Store.YES));
iwriter.addDocument(doc);
}
private class BackgroundIndexer extends Thread {
@Override
public void run() {
LOG.info("Starting background Lucene indexing.");
index();
}
}
/** Fetch results for the geocoder using the OTP graph for stops, clusters and street names
*
* @param queryString
* @param autocomplete Whether we should use the query string to do a prefix match
* @param stops Search for stops, either by name or stop code
* @param clusters Search for clusters by their name
* @param corners Search for street corners using at least one of the street names
* @return list of results in in the format expected by GeocoderBuiltin.js in the OTP Leaflet client
*/
public List query (String queryString, boolean autocomplete,
boolean stops, boolean clusters, boolean corners) {
/* Turn the query string into a Lucene query.*/
BooleanQuery query = new BooleanQuery();
BooleanQuery termQuery = new BooleanQuery();
if (autocomplete) {
termQuery.add(new PrefixQuery(new Term("name", queryString)), BooleanClause.Occur.SHOULD);
} else {
List list = new ArrayList();
Matcher m = Pattern.compile("([^\"]\\S*|\".+?\")\\s*").matcher(queryString);
while (m.find()) {
String token = m.group(1);
// if token is a quoted search phrase
if (token.startsWith("\"") && token.endsWith("\"")) {
PhraseQuery phraseQuery = new PhraseQuery();
for (String phraseToken : token.substring(1, token.length() - 1).split(" ")) {
phraseQuery.add(new Term("name", phraseToken.toLowerCase()));
}
termQuery.add(phraseQuery, BooleanClause.Occur.SHOULD);
} else { // a regular unquoted search term
termQuery.add(new FuzzyQuery(new Term("name", token)), BooleanClause.Occur.SHOULD);
// This makes it possible to search for a stop code
termQuery.add(new TermQuery(new Term("code", token)), BooleanClause.Occur.SHOULD);
}
}
}
query.add(termQuery, BooleanClause.Occur.MUST);
if (stops || clusters || corners) {
BooleanQuery typeQuery = new BooleanQuery();
if (stops) {
typeQuery.add(new TermQuery(new Term("category", Category.STOP.name())), BooleanClause.Occur.SHOULD);
}
if (clusters) {
typeQuery.add(new TermQuery(new Term("category", Category.CLUSTER.name())), BooleanClause.Occur.SHOULD);
}
if (corners) {
typeQuery.add(new TermQuery(new Term("category", Category.CORNER.name())), BooleanClause.Occur.SHOULD);
}
query.add(typeQuery, BooleanClause.Occur.MUST);
}
List result = Lists.newArrayList();
try {
TopScoreDocCollector collector = TopScoreDocCollector.create(10, true);
searcher.search(query, collector);
ScoreDoc[] docs = collector.topDocs().scoreDocs;
for (int i = 0; i < docs.length; i++) {
LuceneResult lr = new LuceneResult();
Document doc = searcher.doc(docs[i].doc);
lr.lat = doc.getField("lat").numericValue().doubleValue();
lr.lng = doc.getField("lon").numericValue().doubleValue();
String category = doc.getField("category").stringValue().toLowerCase();
String code;
if (doc.getField("code") != null){
code = "(" + doc.getField("code").stringValue() + ")";
} else {
code = "";
}
if (doc.getField("category").stringValue().equals(Category.STOP.name()) ||
doc.getField("category").stringValue().equals(Category.CLUSTER.name())) {
lr.id = doc.getField("id").stringValue();
}
String name = doc.getField("name").stringValue();
lr.description = category + " " + name + " " + code;
result.add(lr);
}
} catch (Exception ex) {
LOG.error("Error during Lucene search", ex);
} finally {
return result;
}
}
/** This class matches the structure of the Geocoder responses expected by the OTP client. */
public static class LuceneResult {
public double lat;
public double lng;
public String description;
public String id;
}
public static enum Category { STOP, CORNER, CLUSTER; }
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy