com.jaeksoft.searchlib.parser.ParserFieldTarget Maven / Gradle / Ivy
Show all versions of opensearchserver Show documentation
/**
* License Agreement for OpenSearchServer
*
* Copyright (C) 2012-2016 Emmanuel Keller / Jaeksoft
*
* http://www.open-search-server.com
*
* This file is part of OpenSearchServer.
*
* OpenSearchServer is free software: you can redistribute it and/or
* modify it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* OpenSearchServer is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with OpenSearchServer.
* If not, see .
**/
package com.jaeksoft.searchlib.parser;
import com.jaeksoft.searchlib.index.IndexDocument;
import com.jaeksoft.searchlib.schema.FieldValueItem;
import com.jaeksoft.searchlib.util.DomUtils;
import com.jaeksoft.searchlib.util.RegExpUtils;
import com.jaeksoft.searchlib.util.StringUtils;
import com.jaeksoft.searchlib.util.XmlWriter;
import com.jaeksoft.searchlib.util.map.TargetField;
import org.apache.commons.lang3.StringEscapeUtils;
import org.w3c.dom.Node;
import org.xml.sax.SAXException;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Pattern;
public class ParserFieldTarget extends TargetField {
private String captureRegexp;
private boolean removeTag;
private boolean convertHtmlEntities;
private Pattern captureRegexpPattern;
public ParserFieldTarget(String name, String captureRegexp, String analyzer, boolean removeTag) {
super(name, analyzer, null, null);
this.captureRegexp = captureRegexp;
this.removeTag = removeTag;
this.checkRegexpPattern();
}
public ParserFieldTarget(String name, Node node) {
super(name, node);
List nl = DomUtils.getNodes(node, "captureRegexp");
if (nl.size() > 0)
captureRegexp = StringEscapeUtils.unescapeXml(nl.get(0).getTextContent());
nl = DomUtils.getNodes(node, "removeTag");
removeTag = nl != null && nl.size() > 0;
nl = DomUtils.getNodes(node, "convertHtmlEntities");
convertHtmlEntities = nl != null && nl.size() > 0;
checkRegexpPattern();
}
public void writeXml(XmlWriter xmlWriter) throws SAXException {
if (captureRegexp != null) {
xmlWriter.startElement("captureRegexp");
xmlWriter.textNode(StringEscapeUtils.escapeXml(captureRegexp));
xmlWriter.endElement();
}
if (removeTag) {
xmlWriter.startElement("removeTag");
xmlWriter.endElement();
}
if (convertHtmlEntities) {
xmlWriter.startElement("convertHtmlEntities");
xmlWriter.endElement();
}
}
public final boolean hasRegexpPattern() {
return (captureRegexp != null);
}
private void checkRegexpPattern() {
if (captureRegexp != null)
if (captureRegexp.trim().length() == 0)
captureRegexp = null;
captureRegexpPattern = captureRegexp == null ? null : Pattern.compile(captureRegexp);
}
/**
* @return the captureRegexp
*/
public String getCaptureRegexp() {
return captureRegexp;
}
/**
* @param captureRegexp the captureRegexp to set
*/
public void setCaptureRegexp(String captureRegexp) {
this.captureRegexp = captureRegexp;
}
@Override
final public void addFieldValueItems(List fieldValueItems, IndexDocument targetDocument)
throws IOException {
if (fieldValueItems == null)
return;
List values = new ArrayList(0);
if (captureRegexpPattern == null) {
for (FieldValueItem fieldValueItem : fieldValueItems)
values.add(fieldValueItem.getValue());
} else {
synchronized (captureRegexpPattern) {
for (FieldValueItem fieldValueItem : fieldValueItems)
for (String value : RegExpUtils.getGroups(captureRegexpPattern, fieldValueItem.getValue()))
values.add(value);
}
}
if (removeTag || convertHtmlEntities) {
int pos = 0;
for (String value : values) {
if (removeTag)
value = StringUtils.removeTag(value);
if (convertHtmlEntities)
value = StringEscapeUtils.unescapeHtml4(value);
values.set(pos++, value);
}
}
addValues(values, targetDocument);
}
/**
* @return the removeTag
*/
public boolean isRemoveTag() {
return removeTag;
}
/**
* @param removeTag the removeTag to set
*/
public void setRemoveTag(boolean removeTag) {
this.removeTag = removeTag;
}
/**
* @param convertHtmlEntities the convertHtmlEntities to set
*/
public void setConvertHtmlEntities(boolean convertHtmlEntities) {
this.convertHtmlEntities = convertHtmlEntities;
}
/**
* @return the convertHtmlEntities
*/
public boolean isConvertHtmlEntities() {
return convertHtmlEntities;
}
}