All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.carrot2.source.xml.XmlDocumentSourceHelper Maven / Gradle / Ivy

Go to download

Carrot2 search results clustering framework. Minimal functional subset (core algorithms and infrastructure, no document sources).

There is a newer version: 3.16.3
Show newest version

/*
 * Carrot2 project.
 *
 * Copyright (C) 2002-2012, Dawid Weiss, Stanisław Osiński.
 * All rights reserved.
 *
 * Refer to the full license file "carrot2.LICENSE"
 * in the root folder of the repository checkout or at:
 * http://www.carrot2.org/carrot2.LICENSE
 */

package org.carrot2.source.xml;

import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.Map;

import javax.xml.transform.Templates;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerConfigurationException;
import javax.xml.transform.TransformerException;
import javax.xml.transform.URIResolver;
import javax.xml.transform.stream.StreamResult;
import javax.xml.transform.stream.StreamSource;

import org.apache.http.HttpStatus;
import org.carrot2.core.IDocumentSource;
import org.carrot2.core.ProcessingResult;
import org.carrot2.core.attribute.Processing;
import org.carrot2.source.SearchEngineResponse;
import org.carrot2.source.SimpleSearchEngine;
import org.carrot2.util.CloseableUtils;
import org.carrot2.util.StreamUtils;
import org.carrot2.util.attribute.Attribute;
import org.carrot2.util.attribute.AttributeLevel;
import org.carrot2.util.attribute.Bindable;
import org.carrot2.util.attribute.Group;
import org.carrot2.util.attribute.Input;
import org.carrot2.util.attribute.Label;
import org.carrot2.util.attribute.Level;
import org.carrot2.util.attribute.constraint.IntRange;
import org.carrot2.util.httpclient.HttpUtils;
import org.carrot2.util.resource.IResource;
import org.carrot2.util.xslt.NopURIResolver;
import org.carrot2.util.xslt.TemplatesPool;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.xml.sax.SAXException;

/**
 * Exposes the common functionality a {@link IDocumentSource} based on XML/XSLT is likely
 * to need. This helper does note expose any attributes, so that different implementations
 * can decide which attributes they expose.
 */
@Bindable(prefix = "XmlDocumentSourceHelper")
public class XmlDocumentSourceHelper
{
    /**
     * Data transfer timeout. Specifies the data transfer timeout, in seconds. A timeout value of 
     * zero is interpreted as an infinite timeout.  
     */
    @Input
    @Processing
    @Attribute
    @IntRange(min = 0, max = 5 * 60)
    @Label("Data transfer timeout")
    @Level(AttributeLevel.ADVANCED)
    @Group(SimpleSearchEngine.SERVICE)
    public int timeout = 8;

    /** Precompiled XSLT templates. */
    private final TemplatesPool pool;

    /**
     * URI resolver. Does nothing.
     */
    private final static URIResolver uriResolver = new NopURIResolver();
    
    private final static Logger log = LoggerFactory.getLogger(XmlDocumentSourceHelper.class);

    /**
     *
     */
    public XmlDocumentSourceHelper()
    {
        try
        {
            // No template caching.
            this.pool = new TemplatesPool(false);
            this.pool.tFactory.setURIResolver(uriResolver);
        }
        catch (Exception e)
        {
            throw new RuntimeException(e);
        }
    }

    /**
     * Loads a {@link ProcessingResult} from the provided remote URL, applying XSLT
     * transform if specified. This method can handle gzip-compressed streams if supported
     * by the data source.
     * 
     * @param metadata if a non-null map is provided, request metadata will
     *            be put into the map.
     * @param user if not null, the user name to use for HTTP Basic
     *            Authentication
     * @param password if not null, the password to use for HTTP Basic
     *            Authentication
     */
    public ProcessingResult loadProcessingResult(String url, Templates stylesheet,
        Map xsltParameters, Map metadata, String user,
        String password) throws Exception
    {
        final HttpUtils.Response response = HttpUtils.doGET(url, null, null, user,
            password, timeout * 1000);
        final InputStream carrot2XmlStream = response.getPayloadAsStream();

        final int statusCode = response.status;

        if (statusCode == HttpStatus.SC_OK)
        {
            metadata.put(SearchEngineResponse.COMPRESSION_KEY, response.compression);
            return loadProcessingResult(carrot2XmlStream, stylesheet, xsltParameters);
        }
        else
        {
            throw new IOException("HTTP error, status code: " + statusCode);
        }
    }

    /**
     * Loads a {@link ProcessingResult} from the provided {@link InputStream}, applying
     * XSLT transform if specified. The provided {@link InputStream} will be closed.
     */
    public ProcessingResult loadProcessingResult(InputStream xml, Templates stylesheet,
        Map xsltParameters) throws Exception
    {
        InputStream carrot2XmlStream = null;
        try
        {
            carrot2XmlStream = getCarrot2XmlStream(xml, stylesheet, xsltParameters);
            return ProcessingResult.deserialize(carrot2XmlStream);
        }
        finally
        {
            CloseableUtils.close(carrot2XmlStream, xml);
        }
    }

    /**
     * Returns a Carrot2 XML stream, applying an XSLT transformation if the stylesheet is
     * provided.
     */
    private InputStream getCarrot2XmlStream(InputStream xmlInputStream,
        Templates stylesheet, Map xsltParameters)
        throws TransformerConfigurationException, IOException, TransformerException
    {
        // Perform transformation if stylesheet found.
        InputStream carrot2XmlInputStream;
        if (stylesheet != null)
        {
            byte [] debugInput = null;
            try
            {
                // Initialize transformer
                final Transformer transformer = pool.newTransformer(stylesheet);
                final ByteArrayOutputStream outputStream = new ByteArrayOutputStream();

                // Set XSLT parameters, if any
                if (xsltParameters != null)
                {
                    for (Map.Entry entry : xsltParameters.entrySet())
                    {
                        transformer.setParameter(entry.getKey(), entry.getValue());
                    }
                }

                if (log.isDebugEnabled())
                {
                    debugInput = StreamUtils.readFullyAndClose(xmlInputStream);
                    xmlInputStream = new ByteArrayInputStream(debugInput);
                }

                // Perform transformation
                transformer.transform(new StreamSource(xmlInputStream), new StreamResult(
                    outputStream));
                carrot2XmlInputStream = new ByteArrayInputStream(
                    outputStream.toByteArray());
            }
            catch (TransformerException e)
            {
                if (debugInput != null)
                {
                    log.debug("Transformer input: " + new String(debugInput, "UTF-8"));
                }
                throw e;
            }
            finally
            {
                CloseableUtils.close(xmlInputStream);
            }
        }
        else
        {
            carrot2XmlInputStream = xmlInputStream;
        }

        return carrot2XmlInputStream;
    }

    /**
     * Loads the XSLT stylesheet from the provided {@link IResource}.
     */
    public Templates loadXslt(IResource xslt)
    {
        InputStream is = null;
        try
        {
            is = xslt.open();
            return pool.compileTemplate(is);
        }
        catch (IOException e)
        {
            throw new RuntimeException(e);
        }
        catch (SAXException e)
        {
            throw new RuntimeException(e);
        }
        finally
        {
            CloseableUtils.close(is);
        }
    }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy