org.apache.lucene.index.DocInverterPerField Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of aem-sdk-api Show documentation
Show all versions of aem-sdk-api Show documentation
The Adobe Experience Manager SDK
/*
* COPIED FROM APACHE LUCENE 4.7.2
*
* Git URL: [email protected]:apache/lucene.git, tag: releases/lucene-solr/4.7.2, path: lucene/core/src/java
*
* (see https://issues.apache.org/jira/browse/OAK-10786 for details)
*/
package org.apache.lucene.index;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.util.IOUtils;
/**
* Holds state for inverting all occurrences of a single
* field in the document. This class doesn't do anything
* itself; instead, it forwards the tokens produced by
* analysis to its own consumer
* (InvertedDocConsumerPerField). It also interacts with an
* endConsumer (InvertedDocEndConsumerPerField).
*/
final class DocInverterPerField extends DocFieldConsumerPerField {
final FieldInfo fieldInfo;
final InvertedDocConsumerPerField consumer;
final InvertedDocEndConsumerPerField endConsumer;
final DocumentsWriterPerThread.DocState docState;
final FieldInvertState fieldState;
public DocInverterPerField(DocInverter parent, FieldInfo fieldInfo) {
this.fieldInfo = fieldInfo;
docState = parent.docState;
fieldState = new FieldInvertState(fieldInfo.name);
this.consumer = parent.consumer.addField(this, fieldInfo);
this.endConsumer = parent.endConsumer.addField(this, fieldInfo);
}
@Override
void abort() {
try {
consumer.abort();
} finally {
endConsumer.abort();
}
}
@Override
public void processFields(final IndexableField[] fields,
final int count) throws IOException {
fieldState.reset();
final boolean doInvert = consumer.start(fields, count);
for(int i=0;i 0) {
fieldState.position += analyzed ? docState.analyzer.getPositionIncrementGap(fieldInfo.name) : 0;
}
/*
* To assist people in tracking down problems in analysis components, we wish to write the field name to the infostream
* when we fail. We expect some caller to eventually deal with the real exception, so we don't want any 'catch' clauses,
* but rather a finally that takes note of the problem.
*/
boolean succeededInProcessingField = false;
final TokenStream stream = field.tokenStream(docState.analyzer);
// reset the TokenStream to the first token
stream.reset();
try {
boolean hasMoreTokens = stream.incrementToken();
fieldState.attributeSource = stream;
OffsetAttribute offsetAttribute = fieldState.attributeSource.addAttribute(OffsetAttribute.class);
PositionIncrementAttribute posIncrAttribute = fieldState.attributeSource.addAttribute(PositionIncrementAttribute.class);
if (hasMoreTokens) {
consumer.start(field);
do {
// If we hit an exception in stream.next below
// (which is fairly common, eg if analyzer
// chokes on a given document), then it's
// non-aborting and (above) this one document
// will be marked as deleted, but still
// consume a docID
final int posIncr = posIncrAttribute.getPositionIncrement();
if (posIncr < 0) {
throw new IllegalArgumentException("position increment must be >=0 (got " + posIncr + ") for field '" + field.name() + "'");
}
if (fieldState.position == 0 && posIncr == 0) {
throw new IllegalArgumentException("first position increment must be > 0 (got 0) for field '" + field.name() + "'");
}
int position = fieldState.position + posIncr;
if (position > 0) {
// NOTE: confusing: this "mirrors" the
// position++ we do below
position--;
} else if (position < 0) {
throw new IllegalArgumentException("position overflow for field '" + field.name() + "'");
}
// position is legal, we can safely place it in fieldState now.
// not sure if anything will use fieldState after non-aborting exc...
fieldState.position = position;
if (posIncr == 0)
fieldState.numOverlap++;
if (checkOffsets) {
int startOffset = fieldState.offset + offsetAttribute.startOffset();
int endOffset = fieldState.offset + offsetAttribute.endOffset();
if (startOffset < 0 || endOffset < startOffset) {
throw new IllegalArgumentException("startOffset must be non-negative, and endOffset must be >= startOffset, "
+ "startOffset=" + startOffset + ",endOffset=" + endOffset + " for field '" + field.name() + "'");
}
if (startOffset < lastStartOffset) {
throw new IllegalArgumentException("offsets must not go backwards startOffset="
+ startOffset + " is < lastStartOffset=" + lastStartOffset + " for field '" + field.name() + "'");
}
lastStartOffset = startOffset;
}
boolean success = false;
try {
// If we hit an exception in here, we abort
// all buffered documents since the last
// flush, on the likelihood that the
// internal state of the consumer is now
// corrupt and should not be flushed to a
// new segment:
consumer.add();
success = true;
} finally {
if (!success) {
docState.docWriter.setAborting();
}
}
fieldState.length++;
fieldState.position++;
} while (stream.incrementToken());
}
// trigger streams to perform end-of-stream operations
stream.end();
// TODO: maybe add some safety? then again, its already checked
// when we come back around to the field...
fieldState.position += posIncrAttribute.getPositionIncrement();
fieldState.offset += offsetAttribute.endOffset();
succeededInProcessingField = true;
/* if success was false above there is an exception coming through and we won't get here.*/
succeededInProcessingField = true;
} finally {
if (!succeededInProcessingField) {
IOUtils.closeWhileHandlingException(stream);
} else {
stream.close();
}
if (!succeededInProcessingField && docState.infoStream.isEnabled("DW")) {
docState.infoStream.message("DW", "An exception was thrown while processing field " + fieldInfo.name);
}
}
fieldState.offset += analyzed ? docState.analyzer.getOffsetGap(fieldInfo.name) : 0;
fieldState.boost *= field.boost();
}
// LUCENE-2387: don't hang onto the field, so GC can
// reclaim
fields[i] = null;
}
consumer.finish();
endConsumer.finish();
}
@Override
FieldInfo getFieldInfo() {
return fieldInfo;
}
}
© 2015 - 2024 Weber Informatics LLC | Privacy Policy