org.pageseeder.flint.ixml.IndexDocumentHandler_5_0 Maven / Gradle / Ivy
/*
* Copyright 2015 Allette Systems (Australia)
* http://www.allette.com.au
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.pageseeder.flint.ixml;
import org.pageseeder.flint.indexing.FlintDocument;
import org.pageseeder.flint.indexing.FlintField;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.xml.sax.Attributes;
import org.xml.sax.helpers.DefaultHandler;
import java.util.ArrayList;
import java.util.List;
/**
* The handler for the Flint Index Documents format version 5.
*
* @see Index Documents 5.0 Schema
*
* @author Jean-Baptiste Reure
* @version 27 June 2016
*/
final class IndexDocumentHandler_5_0 extends DefaultHandler implements IndexDocumentHandler {
/**
* The logger for this class.
*/
private static final Logger LOGGER = LoggerFactory.getLogger(IndexDocumentHandler_5_0.class);
// class attributes
// -------------------------------------------------------------------------------------------
/**
* The catalog to associate with the fields.
*/
private final String _catalog;
/**
* The list of Lucene documents produced by this handler.
*/
private List documents;
// state variables for documents and fields
// ----------------------------------------------------------------------------------------------
/**
* The current document being processed.
*/
private FlintDocument _document;
/**
* Flag to indicate whether a field is being processed (affects the behaviour of characters())
*/
private boolean _isField;
/**
* The field builder.
*/
private FlintField field = null;
/**
* The characters found within a field.
*/
private final StringBuilder _value = new StringBuilder();
// constructors
// ----------------------------------------------------------------------------------------------
public IndexDocumentHandler_5_0(String catalog) {
this._catalog = catalog;
}
/**
* {@inheritDoc}
*/
@Override
public List getDocuments() {
return this.documents;
}
// SAX Methods
// ----------------------------------------------------------------------------------------------
/**
* {@inheritDoc}
*/
@Override
public void startDocument() {
this.documents = new ArrayList<>();
this.field = new FlintField(this._catalog);
}
/**
* {@inheritDoc}
*/
@Override
public void startElement(String uri, String localName, String qName, Attributes attributes) {
if ("field".equals(qName)) {
startFieldElement(attributes);
} else if ("document".equals(qName)) {
startDocumentElement();
}
}
/**
* {@inheritDoc}
*/
@Override
public void endElement(String uri, String localName, String qName) {
if ("field".equals(qName)) {
endFieldElement();
} else if ("document".equals(qName)) {
endDocumentElement();
}
}
/**
* Receives notification of character data inside an element.
*
* Replace the '\n' (newline character) by a space.
*
* @param ch The characters
* @param start The start position in the character array.
* @param length The number of characters to use from the character array.
*/
@Override
public void characters(char[] ch, int start, int length) {
if (this._isField) {
this._value.append(ch, start, length);
}
}
// private helpers
// -------------------------------------------------------------------------------------------
/**
* Handles the start of a 'document' element.
*/
private void startDocumentElement() {
this._document = new FlintDocument();
}
/**
* Handles the end of a 'document' element.
*/
private void endDocumentElement() {
if (this._document.isEmpty()) {
LOGGER.warn("This document is empty - will not be stored");
} else {
this.documents.add(this._document);
}
this._document = null;
}
/**
* Handles the start of a new 'field' element
*
* @param atts The attributes to handles.
*/
private void startFieldElement(Attributes atts) {
// required attributes backwards compatible -> compress = true
String store = atts.getValue("store");
this.field.name(atts.getValue("name"))
.index(atts.getValue("index"))
.store("compress".equals(store) ? "true" : store);
// Numeric type
String numType = atts.getValue("numeric-type");
if (numType != null) {
this.field.numeric(numType).precisionStep(atts.getValue("precision-step"));
}
// Optional attributes
this.field.termVector(atts.getValue("term-vector"))
.termVectorPositions(atts.getValue("term-vector-positions"))
.termVectorOffsets(atts.getValue("term-vector-offsets"))
.termVectorPayloads(atts.getValue("term-vector-payloads"))
.tokenize(atts.getValue("tokenize"))
.docValues(atts.getValue("doc-values"), numType != null);
if (atts.getValue("boost") != null) {
LOGGER.warn("boost attribute is deprecated, boost should be used at query time, not");
}
// Date handling
this.field.dateFormat(atts.getValue("date-format"))
.resolution(atts.getValue("date-resolution"));
this._isField = true;
}
/**
* Handles the end of a 'field' element.
*/
private void endFieldElement() {
try {
// set the value
this.field.value(this._value.toString());
this._document.add(this.field);
} catch (IllegalStateException | IllegalArgumentException ex) {
LOGGER.warn("Unable to create field: "+this.field.name(), ex);
}
// Reset the class attributes involved in this field
this.field = new FlintField(this._catalog);
this._isField = false;
this._value.setLength(0);
}
}