All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.jetspeed.search.handlers.URLToDocHandler Maven / Gradle / Ivy

There is a newer version: 2.3.1
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 * 
 *      http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.jetspeed.search.handlers;

// Java imports
import java.io.IOException;
import java.io.InputStream;
import java.net.URL;

import org.apache.commons.httpclient.HttpClient;
import org.apache.commons.httpclient.HttpException;
import org.apache.commons.httpclient.methods.GetMethod;
import org.apache.commons.io.IOUtils;
import org.apache.jetspeed.search.AbstractObjectHandler;
import org.apache.jetspeed.search.BaseParsedObject;

/**
 * This object handler deals with URLs.
 * 
 * @author Mark Orciuch
 * @version $Id: URLToDocHandler.java 966940 2010-07-23 02:56:13Z woonsan $
 */
public class URLToDocHandler extends AbstractObjectHandler
{
    /**
     * Static initialization of the logger for this class
     */    
    //private static final JetspeedLogger logger = JetspeedLogFactoryService.getLogger(URLToDocHandler.class.getName());
    
    /**
     * Parses a specific object into a document suitable for index placement
     * 
     * @param o
     * @return 
     */
    public org.apache.jetspeed.search.ParsedObject parseObject(Object o)
    {
        org.apache.jetspeed.search.ParsedObject result = new BaseParsedObject();

        if ((o instanceof URL) == false)
        {
            //logger.error("URLToDocHandler: invalid object type: " + o);
            return null;
        }

        URL pageToAdd = (URL) o;
        String content = getContentFromURL(pageToAdd);
        
        if (content != null)
        {
            try
            {
                result.setKey(java.net.URLEncoder.encode(pageToAdd.toString(),"UTF-8"));
                result.setType(org.apache.jetspeed.search.ParsedObject.OBJECT_TYPE_URL);
                // TODO: We should extract the  tag here.
                result.setTitle(pageToAdd.toString());
                result.setContent(content);
                result.setDescription("");
                result.setLanguage("");
                result.setURL(pageToAdd);
                result.setClassName(o.getClass().getName());
                //logger.info("Parsed '" + pageToAdd.toString() + "'");
            }
            catch (Exception e)
            {
                e.printStackTrace();
                //logger.error("Adding document to index", e);
            }
        }
        
        return result;

    }
    
    private String getContentFromURL(final URL url)
    {
        if ("file".equals(url.getProtocol()))
        {
            return readContentFromURL(url);
        }
        
        String content = null;
        
        HttpClient client = new HttpClient();
        GetMethod method = new GetMethod(url.toString());
        method.setFollowRedirects(true);
        int statusCode = -1;
        int attempt = 0;

        try
        {
            // We will retry up to 3 times.
            while (statusCode == -1 && attempt < 3)
            {
                try
                {
                    // execute the method.
                    client.executeMethod(method);
                    statusCode = method.getStatusCode();
                    //if (logger.isDebugEnabled())
                    {
                        //logger.debug("URL = " + pageToAdd.toString() + "Status code = " + statusCode);
                    }
                }
                catch (HttpException e)
                {
                    // We will retry
                    attempt++;
                }
                catch (IOException e)
                {
                    return null;
                }
            }
            // Check that we didn't run out of retries.
            if (statusCode != -1)
            {
                try
                {
                    content = method.getResponseBodyAsString();
                }
                catch (Exception ioe)
                {
                    //logger.error("Getting content for " + pageToAdd.toString(), ioe);
                }
            }
        }
        finally
        {
            method.releaseConnection();
        }

        return content;
    }
    
    private String readContentFromURL(final URL url)
    {
        String content = null;
        InputStream input = null;
        
        try
        {
            input = url.openStream();
            content = IOUtils.toString(input);
        }
        catch (Exception e)
        {
        }
        finally
        {
            if (input != null)
            {
                try
                {
                    input.close();
                }
                catch (Exception ce)
                {
                }
            }
        }
        
        return content;
    }
}

</code></pre>    <br/>
    <br/>
    <div id="right-banner">
            </div>
    <div id="left-banner">
            </div>
<div class='clear'></div>
    <aside class="related-items">
        <section>
            <div class="panel panel-primary">
                <div class="panel-heading margin-bottom">Related Artifacts</div>
                <div class="">
                    <a title='This artifact is from the group mysql' class='btn btn-default btn-xs small-margin-bottom ellipsis sidebar-btn' href='/artifacts/mysql/mysql-connector-java' ><i class="fa fa-arrow-circle-right" aria-hidden="true"></i> mysql-connector-java <small class='group-info' >mysql</small></a><br/><a title='This artifact is from the group com.github.codedrinker' class='btn btn-default btn-xs small-margin-bottom ellipsis sidebar-btn' href='/artifacts/com.github.codedrinker/facebook-messenger' ><i class="fa fa-arrow-circle-right" aria-hidden="true"></i> facebook-messenger <small class='group-info' >com.github.codedrinker</small></a><br/><a title='This artifact is from the group org.seleniumhq.selenium' class='btn btn-default btn-xs small-margin-bottom ellipsis sidebar-btn' href='/artifacts/org.seleniumhq.selenium/selenium-java' ><i class="fa fa-arrow-circle-right" aria-hidden="true"></i> selenium-java <small class='group-info' >org.seleniumhq.selenium</small></a><br/><a title='This artifact is from the group com.github.sola92' class='btn btn-default btn-xs small-margin-bottom ellipsis sidebar-btn' href='/artifacts/com.github.sola92/instagram-java' ><i class="fa fa-arrow-circle-right" aria-hidden="true"></i> instagram-java <small class='group-info' >com.github.sola92</small></a><br/><a title='This artifact is from the group com.google.code.gson' class='btn btn-default btn-xs small-margin-bottom ellipsis sidebar-btn' href='/artifacts/com.google.code.gson/gson' ><i class="fa fa-arrow-circle-right" aria-hidden="true"></i> gson <small class='group-info' >com.google.code.gson</small></a><br/><a title='This artifact is from the group org.apache.poi' class='btn btn-default btn-xs small-margin-bottom ellipsis sidebar-btn' href='/artifacts/org.apache.poi/poi' ><i class="fa fa-arrow-circle-right" aria-hidden="true"></i> poi <small class='group-info' >org.apache.poi</small></a><br/><a title='This artifact is from the group org.apache.httpcomponents' class='btn btn-default btn-xs small-margin-bottom ellipsis sidebar-btn' href='/artifacts/org.apache.httpcomponents/httpclient' ><i class="fa fa-arrow-circle-right" aria-hidden="true"></i> httpclient <small class='group-info' >org.apache.httpcomponents</small></a><br/><a title='This artifact is from the group org.json' class='btn btn-default btn-xs small-margin-bottom ellipsis sidebar-btn' href='/artifacts/org.json/json' ><i class="fa fa-arrow-circle-right" aria-hidden="true"></i> json <small class='group-info' >org.json</small></a><br/><a title='This artifact is from the group com.google.code.facebook-java-api' class='btn btn-default btn-xs small-margin-bottom ellipsis sidebar-btn' href='/artifacts/com.google.code.facebook-java-api/facebook-java-api' ><i class="fa fa-arrow-circle-right" aria-hidden="true"></i> facebook-java-api <small class='group-info' >com.google.code.facebook-java-api</small></a><br/><a title='This artifact is from the group org.apache.poi' class='btn btn-default btn-xs small-margin-bottom ellipsis sidebar-btn' href='/artifacts/org.apache.poi/poi-ooxml' ><i class="fa fa-arrow-circle-right" aria-hidden="true"></i> poi-ooxml <small class='group-info' >org.apache.poi</small></a><br/><a title='This artifact is from the group com.fasterxml.jackson.core' class='btn btn-default btn-xs small-margin-bottom ellipsis sidebar-btn' href='/artifacts/com.fasterxml.jackson.core/jackson-databind' ><i class="fa fa-arrow-circle-right" aria-hidden="true"></i> jackson-databind <small class='group-info' >com.fasterxml.jackson.core</small></a><br/><a title='This artifact is from the group junit' class='btn btn-default btn-xs small-margin-bottom ellipsis sidebar-btn' href='/artifacts/junit/junit' ><i class="fa fa-arrow-circle-right" aria-hidden="true"></i> junit <small class='group-info' >junit</small></a><br/><a title='This artifact is from the group org.primefaces' class='btn btn-default btn-xs small-margin-bottom ellipsis sidebar-btn' href='/artifacts/org.primefaces/primefaces' ><i class="fa fa-arrow-circle-right" aria-hidden="true"></i> primefaces <small class='group-info' >org.primefaces</small></a><br/><a title='This artifact is from the group com.github.noraui' class='btn btn-default btn-xs small-margin-bottom ellipsis sidebar-btn' href='/artifacts/com.github.noraui/ojdbc7' ><i class="fa fa-arrow-circle-right" aria-hidden="true"></i> ojdbc7 <small class='group-info' >com.github.noraui</small></a><br/><a title='This artifact is from the group com.jfoenix' class='btn btn-default btn-xs small-margin-bottom ellipsis sidebar-btn' href='/artifacts/com.jfoenix/jfoenix' ><i class="fa fa-arrow-circle-right" aria-hidden="true"></i> jfoenix <small class='group-info' >com.jfoenix</small></a><br/><a title='This artifact is from the group org.testng' class='btn btn-default btn-xs small-margin-bottom ellipsis sidebar-btn' href='/artifacts/org.testng/testng' ><i class="fa fa-arrow-circle-right" aria-hidden="true"></i> testng <small class='group-info' >org.testng</small></a><br/><a title='This artifact is from the group com.googlecode.json-simple' class='btn btn-default btn-xs small-margin-bottom ellipsis sidebar-btn' href='/artifacts/com.googlecode.json-simple/json-simple' ><i class="fa fa-arrow-circle-right" aria-hidden="true"></i> json-simple <small class='group-info' >com.googlecode.json-simple</small></a><br/><a title='This artifact is from the group org.seleniumhq.selenium' class='btn btn-default btn-xs small-margin-bottom ellipsis sidebar-btn' href='/artifacts/org.seleniumhq.selenium/selenium-server' ><i class="fa fa-arrow-circle-right" aria-hidden="true"></i> selenium-server <small class='group-info' >org.seleniumhq.selenium</small></a><br/><a title='This artifact is from the group com.itextpdf' class='btn btn-default btn-xs small-margin-bottom ellipsis sidebar-btn' href='/artifacts/com.itextpdf/itextpdf' ><i class="fa fa-arrow-circle-right" aria-hidden="true"></i> itextpdf <small class='group-info' >com.itextpdf</small></a><br/><a title='This artifact is from the group org.springframework' class='btn btn-default btn-xs small-margin-bottom ellipsis sidebar-btn' href='/artifacts/org.springframework/spring-core' ><i class="fa fa-arrow-circle-right" aria-hidden="true"></i> spring-core <small class='group-info' >org.springframework</small></a><br/>                </div>
            </div>
        </section>
        <section>
            <div class="panel panel-primary">
                <div class="panel-heading margin-bottom">Related Groups</div>
                <div class="">
                    <a class='btn btn-default btn-xs small-margin-bottom ellipsis sidebar-btn' href='/artifacts/org.springframework' ><i class="fa fa-arrow-circle-right" aria-hidden="true"></i> org.springframework</a><br/><a class='btn btn-default btn-xs small-margin-bottom ellipsis sidebar-btn' href='/artifacts/org.apache.poi' ><i class="fa fa-arrow-circle-right" aria-hidden="true"></i> org.apache.poi</a><br/><a class='btn btn-default btn-xs small-margin-bottom ellipsis sidebar-btn' href='/artifacts/org.hibernate' ><i class="fa fa-arrow-circle-right" aria-hidden="true"></i> org.hibernate</a><br/><a class='btn btn-default btn-xs small-margin-bottom ellipsis sidebar-btn' href='/artifacts/org.springframework.boot' ><i class="fa fa-arrow-circle-right" aria-hidden="true"></i> org.springframework.boot</a><br/><a class='btn btn-default btn-xs small-margin-bottom ellipsis sidebar-btn' href='/artifacts/com.fasterxml.jackson.core' ><i class="fa fa-arrow-circle-right" aria-hidden="true"></i> com.fasterxml.jackson.core</a><br/><a class='btn btn-default btn-xs small-margin-bottom ellipsis sidebar-btn' href='/artifacts/com.itextpdf' ><i class="fa fa-arrow-circle-right" aria-hidden="true"></i> com.itextpdf</a><br/><a class='btn btn-default btn-xs small-margin-bottom ellipsis sidebar-btn' href='/artifacts/org.seleniumhq.selenium' ><i class="fa fa-arrow-circle-right" aria-hidden="true"></i> org.seleniumhq.selenium</a><br/><a class='btn btn-default btn-xs small-margin-bottom ellipsis sidebar-btn' href='/artifacts/mysql' ><i class="fa fa-arrow-circle-right" aria-hidden="true"></i> mysql</a><br/><a class='btn btn-default btn-xs small-margin-bottom ellipsis sidebar-btn' href='/artifacts/org.finos.legend.engine' ><i class="fa fa-arrow-circle-right" aria-hidden="true"></i> org.finos.legend.engine</a><br/><a class='btn btn-default btn-xs small-margin-bottom ellipsis sidebar-btn' href='/artifacts/org.apache.httpcomponents' ><i class="fa fa-arrow-circle-right" aria-hidden="true"></i> org.apache.httpcomponents</a><br/><a class='btn btn-default btn-xs small-margin-bottom ellipsis sidebar-btn' href='/artifacts/org.apache.logging.log4j' ><i class="fa fa-arrow-circle-right" aria-hidden="true"></i> org.apache.logging.log4j</a><br/><a class='btn btn-default btn-xs small-margin-bottom ellipsis sidebar-btn' href='/artifacts/org.openjfx' ><i class="fa fa-arrow-circle-right" aria-hidden="true"></i> org.openjfx</a><br/><a class='btn btn-default btn-xs small-margin-bottom ellipsis sidebar-btn' href='/artifacts/org.apache.commons' ><i class="fa fa-arrow-circle-right" aria-hidden="true"></i> org.apache.commons</a><br/><a class='btn btn-default btn-xs small-margin-bottom ellipsis sidebar-btn' href='/artifacts/org.json' ><i class="fa fa-arrow-circle-right" aria-hidden="true"></i> org.json</a><br/><a class='btn btn-default btn-xs small-margin-bottom ellipsis sidebar-btn' href='/artifacts/com.google.guava' ><i class="fa fa-arrow-circle-right" aria-hidden="true"></i> com.google.guava</a><br/><a class='btn btn-default btn-xs small-margin-bottom ellipsis sidebar-btn' href='/artifacts/com.google.zxing' ><i class="fa fa-arrow-circle-right" aria-hidden="true"></i> com.google.zxing</a><br/><a class='btn btn-default btn-xs small-margin-bottom ellipsis sidebar-btn' href='/artifacts/net.sf.jasperreports' ><i class="fa fa-arrow-circle-right" aria-hidden="true"></i> net.sf.jasperreports</a><br/><a class='btn btn-default btn-xs small-margin-bottom ellipsis sidebar-btn' href='/artifacts/javax.xml.bind' ><i class="fa fa-arrow-circle-right" aria-hidden="true"></i> javax.xml.bind</a><br/><a class='btn btn-default btn-xs small-margin-bottom ellipsis sidebar-btn' href='/artifacts/ojdbc' ><i class="fa fa-arrow-circle-right" aria-hidden="true"></i> ojdbc</a><br/><a class='btn btn-default btn-xs small-margin-bottom ellipsis sidebar-btn' href='/artifacts/com.google.code.facebook-java-api' ><i class="fa fa-arrow-circle-right" aria-hidden="true"></i> com.google.code.facebook-java-api</a><br/>                </div>
            </div>
        </section>
    </aside>
    <div class='clear'></div>
</main>
</div>
<br/><br/>
    <div class="align-center">© 2015 - 2024 <a href="/legal-notice.php">Weber Informatics LLC</a> | <a href="/data-protection.php">Privacy Policy</a></div>
<br/><br/><br/><br/><br/><br/>
</body>
</html>

<script data-cfasync="false" src="/cdn-cgi/scripts/5c5dd728/cloudflare-static/email-decode.min.js"></script>