
org.apache.jena.query.text.TextIndexLucene Maven / Gradle / Ivy
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.jena.query.text ;
import java.io.IOException ;
import java.util.* ;
import java.util.Map.Entry ;
import org.apache.jena.datatypes.RDFDatatype ;
import org.apache.jena.datatypes.TypeMapper ;
import org.apache.jena.datatypes.xsd.XSDDatatype ;
import org.apache.jena.graph.Node ;
import org.apache.jena.graph.NodeFactory ;
import org.apache.jena.sparql.util.NodeFactoryExtra ;
import org.apache.lucene.analysis.Analyzer ;
import org.apache.lucene.analysis.core.KeywordAnalyzer ;
import org.apache.lucene.analysis.miscellaneous.PerFieldAnalyzerWrapper ;
import org.apache.lucene.analysis.standard.StandardAnalyzer ;
import org.apache.lucene.document.* ;
import org.apache.lucene.index.* ;
import org.apache.lucene.queryparser.analyzing.AnalyzingQueryParser ;
import org.apache.lucene.queryparser.complexPhrase.ComplexPhraseQueryParser ;
import org.apache.lucene.queryparser.classic.ParseException ;
import org.apache.lucene.queryparser.classic.QueryParser ;
import org.apache.lucene.queryparser.classic.QueryParserBase ;
import org.apache.lucene.search.IndexSearcher ;
import org.apache.lucene.search.Query ;
import org.apache.lucene.search.ScoreDoc ;
import org.apache.lucene.store.Directory ;
import org.apache.lucene.util.Version ;
import org.slf4j.Logger ;
import org.slf4j.LoggerFactory ;
public class TextIndexLucene implements TextIndex {
private static Logger log = LoggerFactory.getLogger(TextIndexLucene.class) ;
private static int MAX_N = 10000 ;
public static final Version VER = Version.LUCENE_4_9 ;
// prefix for storing datatype URIs in the index, to distinguish them from language tags
private static final String DATATYPE_PREFIX = "^^";
public static final FieldType ftIRI ;
static {
ftIRI = new FieldType() ;
ftIRI.setTokenized(false) ;
ftIRI.setStored(true) ;
ftIRI.setIndexed(true) ;
ftIRI.freeze() ;
}
public static final FieldType ftString = StringField.TYPE_NOT_STORED ;
private final EntityDefinition docDef ;
private final Directory directory ;
private final Analyzer analyzer ;
private final Analyzer queryAnalyzer ;
private final String queryParserType ;
private final FieldType ftText ;
// The IndexWriter can't be final because we may have to recreate it if rollback() is called.
// However, it needs to be volatile in case the next write transaction is on a different thread,
// but we do not need locking because we are assuming that there can only be one writer
// at a time (enforced elsewhere).
private volatile IndexWriter indexWriter ;
/**
* Constructs a new TextIndexLucene.
*
* @param directory The Lucene Directory for the index
* @param config The config definition for the index instantiation.
*/
public TextIndexLucene(Directory directory, TextIndexConfig config) {
this.directory = directory ;
this.docDef = config.getEntDef() ;
// create the analyzer as a wrapper that uses KeywordAnalyzer for
// entity and graph fields and StandardAnalyzer for all other
Map analyzerPerField = new HashMap<>() ;
analyzerPerField.put(docDef.getEntityField(), new KeywordAnalyzer()) ;
if ( docDef.getGraphField() != null )
analyzerPerField.put(docDef.getGraphField(), new KeywordAnalyzer()) ;
if ( docDef.getLangField() != null )
analyzerPerField.put(docDef.getLangField(), new KeywordAnalyzer()) ;
for (String field : docDef.fields()) {
Analyzer _analyzer = docDef.getAnalyzer(field);
if (_analyzer != null) {
analyzerPerField.put(field, _analyzer);
}
}
this.analyzer = new PerFieldAnalyzerWrapper(
(null != config.getAnalyzer()) ? config.getAnalyzer() : new StandardAnalyzer(VER), analyzerPerField) ;
this.queryAnalyzer = (null != config.getQueryAnalyzer()) ? config.getQueryAnalyzer() : this.analyzer ;
this.queryParserType = config.getQueryParser() ;
this.ftText = config.isValueStored() ? TextField.TYPE_STORED : TextField.TYPE_NOT_STORED ;
if (config.isValueStored() && docDef.getLangField() == null)
log.warn("Values stored but langField not set. Returned values will not have language tag or datatype.");
openIndexWriter();
}
private void openIndexWriter() {
IndexWriterConfig wConfig = new IndexWriterConfig(VER, analyzer) ;
try
{
indexWriter = new IndexWriter(directory, wConfig) ;
// Force a commit to create the index, otherwise querying before writing will cause an exception
indexWriter.commit();
}
catch (IOException e)
{
throw new TextIndexException(e) ;
}
}
public Directory getDirectory() {
return directory ;
}
public Analyzer getAnalyzer() {
return analyzer ;
}
public Analyzer getQueryAnalyzer() {
return queryAnalyzer ;
}
public IndexWriter getIndexWriter() {
return indexWriter;
}
@Override
public void prepareCommit() {
try {
indexWriter.prepareCommit();
}
catch (IOException e) {
throw new TextIndexException(e);
}
}
@Override
public void commit() {
try {
indexWriter.commit();
}
catch (IOException e) {
throw new TextIndexException(e);
}
}
@Override
public void rollback() {
IndexWriter idx = indexWriter;
indexWriter = null;
try {
idx.rollback();
}
catch (IOException e) {
throw new TextIndexException(e);
}
// The rollback will close the indexWriter, so we need to reopen it
openIndexWriter();
}
@Override
public void close() {
try {
indexWriter.close() ;
}
catch (IOException ex) {
throw new TextIndexException(ex) ;
}
}
@Override public void updateEntity(Entity entity) {
if ( log.isDebugEnabled() )
log.debug("Update entity: " + entity) ;
try {
updateDocument(entity);
} catch (IOException e) {
throw new TextIndexException(e) ;
}
}
protected void updateDocument(Entity entity) throws IOException {
Document doc = doc(entity);
Term term = new Term(docDef.getEntityField(), entity.getId());
indexWriter.updateDocument(term, doc);
}
@Override
public void addEntity(Entity entity) {
if ( log.isDebugEnabled() )
log.debug("Add entity: " + entity) ;
try {
addDocument(entity);
}
catch (IOException e) {
throw new TextIndexException(e) ;
}
}
protected void addDocument(Entity entity) throws IOException {
Document doc = doc(entity) ;
indexWriter.addDocument(doc) ;
}
@Override
public void deleteEntity(Entity entity) {
if (docDef.getUidField() == null)
return;
if ( log.isDebugEnabled() )
log.debug("Delete entity: "+entity) ;
try {
Map map = entity.getMap();
String property = map.keySet().iterator().next();
String value = (String)map.get(property);
String hash = entity.getChecksum(property, value);
Term uid = new Term(docDef.getUidField(), hash);
indexWriter.deleteDocuments(uid);
} catch (Exception e) {
throw new TextIndexException(e) ;
}
}
protected Document doc(Entity entity) {
Document doc = new Document() ;
Field entField = new Field(docDef.getEntityField(), entity.getId(), ftIRI) ;
doc.add(entField) ;
String graphField = docDef.getGraphField() ;
if ( graphField != null ) {
Field gField = new Field(graphField, entity.getGraph(), ftString) ;
doc.add(gField) ;
}
String langField = docDef.getLangField() ;
String uidField = docDef.getUidField() ;
for ( Entry e : entity.getMap().entrySet() ) {
doc.add( new Field(e.getKey(), (String) e.getValue(), ftText) );
if (langField != null) {
String lang = entity.getLanguage();
RDFDatatype datatype = entity.getDatatype();
if (lang != null && !"".equals(lang)) {
doc.add(new Field(langField, lang, StringField.TYPE_STORED));
} else if (datatype != null && !datatype.equals(XSDDatatype.XSDstring)) {
// for non-string and non-langString datatypes, store the datatype in langField
doc.add(new Field(langField, DATATYPE_PREFIX + datatype.getURI(), StringField.TYPE_STORED));
}
}
if (uidField != null) {
String hash = entity.getChecksum(e.getKey(), (String) e.getValue());
doc.add(new Field(uidField, hash, StringField.TYPE_STORED));
}
}
return doc ;
}
@Override
public Map get(String uri) {
try {
IndexReader indexReader = DirectoryReader.open(directory);
List
© 2015 - 2025 Weber Informatics LLC | Privacy Policy