All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.jaeksoft.searchlib.parser.ParserFieldTarget Maven / Gradle / Ivy

/**   
 * License Agreement for OpenSearchServer
 *
 * Copyright (C) 2012-2014 Emmanuel Keller / Jaeksoft
 * 
 * http://www.open-search-server.com
 * 
 * This file is part of OpenSearchServer.
 *
 * OpenSearchServer is free software: you can redistribute it and/or
 * modify it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 *  (at your option) any later version.
 *
 * OpenSearchServer is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with OpenSearchServer. 
 *  If not, see .
 **/

package com.jaeksoft.searchlib.parser;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Pattern;

import org.apache.commons.lang.StringEscapeUtils;
import org.w3c.dom.Node;
import org.xml.sax.SAXException;

import com.jaeksoft.searchlib.index.IndexDocument;
import com.jaeksoft.searchlib.schema.FieldValueItem;
import com.jaeksoft.searchlib.util.DomUtils;
import com.jaeksoft.searchlib.util.RegExpUtils;
import com.jaeksoft.searchlib.util.StringUtils;
import com.jaeksoft.searchlib.util.XmlWriter;
import com.jaeksoft.searchlib.util.map.TargetField;

public class ParserFieldTarget extends TargetField {

	private String captureRegexp;

	private boolean removeTag;

	private boolean convertHtmlEntities;

	private Pattern captureRegexpPattern;

	public ParserFieldTarget(String name, String captureRegexp,
			String analyzer, boolean removeTag) {
		super(name, analyzer, null, null);
		this.captureRegexp = captureRegexp;
		this.removeTag = removeTag;
		this.checkRegexpPattern();
	}

	public ParserFieldTarget(String name, Node node) {
		super(name, node);
		List nl = DomUtils.getNodes(node, "captureRegexp");
		if (nl.size() > 0)
			captureRegexp = StringEscapeUtils.unescapeXml(nl.get(0)
					.getTextContent());
		nl = DomUtils.getNodes(node, "removeTag");
		removeTag = nl != null && nl.size() > 0;
		nl = DomUtils.getNodes(node, "convertHtmlEntities");
		convertHtmlEntities = nl != null && nl.size() > 0;
		checkRegexpPattern();
	}

	public void writeXml(XmlWriter xmlWriter) throws SAXException {
		if (captureRegexp != null) {
			xmlWriter.startElement("captureRegexp");
			xmlWriter.textNode(StringEscapeUtils.escapeXml(captureRegexp));
			xmlWriter.endElement();
		}
		if (removeTag) {
			xmlWriter.startElement("removeTag");
			xmlWriter.endElement();
		}
		if (convertHtmlEntities) {
			xmlWriter.startElement("convertHtmlEntities");
			xmlWriter.endElement();
		}
	}

	public final boolean hasRegexpPattern() {
		return (captureRegexp != null);
	}

	private void checkRegexpPattern() {
		if (captureRegexp != null)
			if (captureRegexp.trim().length() == 0)
				captureRegexp = null;
		captureRegexpPattern = captureRegexp == null ? null : Pattern
				.compile(captureRegexp);
	}

	/**
	 * @return the captureRegexp
	 */
	public String getCaptureRegexp() {
		return captureRegexp;
	}

	/**
	 * @param replaceRegexpTag
	 *            the replaceRegexpTag to set
	 */
	public void setCaptureRegexp(String captureRegexp) {
		this.captureRegexp = captureRegexp;
	}

	@Override
	final public void addFieldValueItems(List fieldValueItems,
			IndexDocument targetDocument) throws IOException {
		if (fieldValueItems == null)
			return;
		List values = new ArrayList(0);
		if (captureRegexpPattern == null) {
			for (FieldValueItem fieldValueItem : fieldValueItems)
				values.add(fieldValueItem.getValue());
		} else {
			synchronized (captureRegexpPattern) {
				for (FieldValueItem fieldValueItem : fieldValueItems)
					for (String value : RegExpUtils.getGroups(
							captureRegexpPattern, fieldValueItem.getValue()))
						values.add(value);
			}
		}

		if (removeTag || convertHtmlEntities) {
			int pos = 0;
			for (String value : values) {
				if (removeTag)
					value = StringUtils.removeTag(value);
				if (convertHtmlEntities)
					value = StringEscapeUtils.unescapeHtml(value);
				values.set(pos++, value);
			}
		}
		addValues(values, targetDocument);
	}

	/**
	 * @return the removeTag
	 */
	public boolean isRemoveTag() {
		return removeTag;
	}

	/**
	 * @param removeTag
	 *            the removeTag to set
	 */
	public void setRemoveTag(boolean removeTag) {
		this.removeTag = removeTag;
	}

	/**
	 * @param convertHtmlEntities
	 *            the convertHtmlEntities to set
	 */
	public void setConvertHtmlEntities(boolean convertHtmlEntities) {
		this.convertHtmlEntities = convertHtmlEntities;
	}

	/**
	 * @return the convertHtmlEntities
	 */
	public boolean isConvertHtmlEntities() {
		return convertHtmlEntities;
	}

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy