org.carrot2.source.xml.XmlDocumentSourceHelper Maven / Gradle / Ivy
/*
* Carrot2 project.
*
* Copyright (C) 2002-2016, Dawid Weiss, Stanisław Osiński.
* All rights reserved.
*
* Refer to the full license file "carrot2.LICENSE"
* in the root folder of the repository checkout or at:
* http://www.carrot2.org/carrot2.LICENSE
*/
package org.carrot2.source.xml;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.Map;
import javax.xml.transform.Templates;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerConfigurationException;
import javax.xml.transform.TransformerException;
import javax.xml.transform.URIResolver;
import javax.xml.transform.stream.StreamResult;
import javax.xml.transform.stream.StreamSource;
import org.carrot2.core.IDocumentSource;
import org.carrot2.core.ProcessingResult;
import org.carrot2.core.attribute.Processing;
import org.carrot2.source.SimpleSearchEngine;
import org.carrot2.util.CloseableUtils;
import org.carrot2.util.StreamUtils;
import org.carrot2.util.attribute.Attribute;
import org.carrot2.util.attribute.AttributeLevel;
import org.carrot2.util.attribute.Bindable;
import org.carrot2.util.attribute.Group;
import org.carrot2.util.attribute.Input;
import org.carrot2.util.attribute.Label;
import org.carrot2.util.attribute.Level;
import org.carrot2.util.attribute.constraint.IntRange;
import org.carrot2.util.resource.IResource;
import org.carrot2.util.xslt.NopURIResolver;
import org.carrot2.util.xslt.TemplatesPool;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.xml.sax.SAXException;
/**
* Exposes the common functionality a {@link IDocumentSource} based on XML/XSLT is likely
* to need. This helper does note expose any attributes, so that different implementations
* can decide which attributes they expose.
*/
@Bindable(prefix = "XmlDocumentSourceHelper")
public class XmlDocumentSourceHelper
{
/**
* Data transfer timeout. Specifies the data transfer timeout, in seconds. A timeout value of
* zero is interpreted as an infinite timeout.
*/
@Input
@Processing
@Attribute
@IntRange(min = 0, max = 5 * 60)
@Label("Data transfer timeout")
@Level(AttributeLevel.ADVANCED)
@Group(SimpleSearchEngine.SERVICE)
public int timeout = 8;
/** Precompiled XSLT templates. */
private final TemplatesPool pool;
/**
* URI resolver. Does nothing.
*/
private final static URIResolver uriResolver = new NopURIResolver();
private final static Logger log = LoggerFactory.getLogger(XmlDocumentSourceHelper.class);
/**
*
*/
public XmlDocumentSourceHelper()
{
try
{
// No template caching.
this.pool = new TemplatesPool(false);
this.pool.tFactory.setURIResolver(uriResolver);
}
catch (Exception e)
{
throw new RuntimeException(e);
}
}
/**
* Loads a {@link ProcessingResult} from the provided {@link InputStream}, applying
* XSLT transform if specified. The provided {@link InputStream} will be closed.
*/
public ProcessingResult loadProcessingResult(InputStream xml, Templates stylesheet,
Map xsltParameters) throws Exception
{
InputStream carrot2XmlStream = null;
try
{
carrot2XmlStream = getCarrot2XmlStream(xml, stylesheet, xsltParameters);
return ProcessingResult.deserialize(carrot2XmlStream);
}
finally
{
CloseableUtils.close(carrot2XmlStream, xml);
}
}
/**
* Returns a Carrot2 XML stream, applying an XSLT transformation if the stylesheet is
* provided.
*/
private InputStream getCarrot2XmlStream(InputStream xmlInputStream,
Templates stylesheet, Map xsltParameters)
throws TransformerConfigurationException, IOException, TransformerException
{
// Perform transformation if stylesheet found.
InputStream carrot2XmlInputStream;
if (stylesheet != null)
{
byte [] debugInput = null;
try
{
// Initialize transformer
final Transformer transformer = pool.newTransformer(stylesheet);
final ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
// Set XSLT parameters, if any
if (xsltParameters != null)
{
for (Map.Entry entry : xsltParameters.entrySet())
{
transformer.setParameter(entry.getKey(), entry.getValue());
}
}
if (log.isDebugEnabled())
{
debugInput = StreamUtils.readFullyAndClose(xmlInputStream);
xmlInputStream = new ByteArrayInputStream(debugInput);
}
// Perform transformation
transformer.transform(new StreamSource(xmlInputStream), new StreamResult(
outputStream));
carrot2XmlInputStream = new ByteArrayInputStream(
outputStream.toByteArray());
}
catch (TransformerException e)
{
if (debugInput != null)
{
log.debug("Transformer input: " + new String(debugInput, "UTF-8"));
}
throw e;
}
finally
{
CloseableUtils.close(xmlInputStream);
}
}
else
{
carrot2XmlInputStream = xmlInputStream;
}
return carrot2XmlInputStream;
}
/**
* Loads the XSLT stylesheet from the provided {@link IResource}.
*/
public Templates loadXslt(IResource xslt)
{
InputStream is = null;
try
{
is = xslt.open();
return pool.compileTemplate(is);
}
catch (IOException e)
{
throw new RuntimeException(e);
}
catch (SAXException e)
{
throw new RuntimeException(e);
}
finally
{
CloseableUtils.close(is);
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy