All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.archive.crawler.restlet.EngineApplication Maven / Gradle / Ivy

The newest version!
/*
 *  This file is part of the Heritrix web crawler (crawler.archive.org).
 *
 *  Licensed to the Internet Archive (IA) by one or more individual 
 *  contributors. 
 *
 *  The IA licenses this file to You under the Apache License, Version 2.0
 *  (the "License"); you may not use this file except in compliance with
 *  the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing, software
 *  distributed under the License is distributed on an "AS IS" BASIS,
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *  See the License for the specific language governing permissions and
 *  limitations under the License.
 */

package org.archive.crawler.restlet;

import java.io.IOException;
import java.io.PrintWriter;
import java.io.StringWriter;

import freemarker.template.Configuration;
import freemarker.template.ObjectWrapper;
import org.archive.crawler.framework.Engine;
import org.archive.util.TextUtils;
import org.restlet.Application;
import org.restlet.Restlet;
import org.restlet.data.MediaType;
import org.restlet.data.Reference;
import org.restlet.Request;
import org.restlet.Response;
import org.restlet.data.Status;
import org.restlet.representation.Representation;
import org.restlet.representation.StringRepresentation;
import org.restlet.resource.Directory;
import org.restlet.routing.Redirector;
import org.restlet.routing.Router;
import org.restlet.routing.Template;
import org.restlet.service.StatusService;

/**
 * Restlet Application for a Heritrix crawl 'Engine', which is aware of
 * local job configurations/directories and can assemble/launch/monitor/
 * manage crawls. 
 * 
 * @author gojomo
 */
public class EngineApplication extends Application {
    protected Engine engine;
    private final Configuration templateConfiguration;

    public EngineApplication(Engine engine) {
        this.engine = engine;
        getMetadataService().addExtension("log", MediaType.TEXT_PLAIN );
        getMetadataService().addExtension("cxml", MediaType.APPLICATION_XML );
        setStatusService(new EngineStatusService());
        templateConfiguration = new Configuration();
        templateConfiguration.setClassForTemplateLoading(getClass(), "");
        templateConfiguration.setObjectWrapper(ObjectWrapper.BEANS_WRAPPER);
    }

    @Override
    public Restlet createInboundRoot() {
        Router router = new Router(getContext());

        router.attach("/",new Redirector(null,"/engine",Redirector.MODE_CLIENT_TEMPORARY));
        router.attach("/engine",EngineResource.class)
            .setMatchingMode(Template.MODE_EQUALS);
        router.attach("/engine/",EngineResource.class)
            .setMatchingMode(Template.MODE_EQUALS);

        Directory alljobsdir = new Directory(
                getContext(),
                engine.getJobsDir().toURI().toString());
        alljobsdir.setListingAllowed(true);
        router.attach("/engine/jobsdir",alljobsdir);
        
        
        EnhDirectory anypath = new EnhDirectory(
                getContext(),
                engine.getJobsDir().toURI().toString() /*TODO: changeme*/) {
                    @Override
                    protected Reference determineRootRef(Request request) {
                        String ref = "file:/";
                        return new Reference(ref);
                    }};
        anypath.setListingAllowed(true);
        anypath.setModifiable(true);
        anypath.setEditFilter(JobResource.EDIT_FILTER);
        
        router.attach("/engine/anypath/",anypath);
        
        EnhDirectory jobdir = new EnhDirectory(
                getContext(),
                engine.getJobsDir().toURI().toString() /*TODO: changeme*/) {
                    @Override
                    protected Reference determineRootRef(Request request) {
                        try {
                            return new Reference(
                                EngineApplication.this.getEngine()
                                .getJob(TextUtils.urlUnescape(
                                    (String)request.getAttributes().get("job")))
                                .getJobDir().getCanonicalFile().toURI().toString());
                        } catch (IOException e) {
                            throw new RuntimeException(e);
                        }
                    }};
        jobdir.setListingAllowed(true);
        jobdir.setModifiable(true);
        jobdir.setEditFilter(JobResource.EDIT_FILTER);
        router.attach("/engine/job/{job}/jobdir",jobdir);
        router.attach("/engine/job/{job}",JobResource.class);
        router.attach("/engine/job/{job}/report/{reportClass}",ReportGenResource.class);
        router.attach("/engine/job/{job}/beans",BeanBrowseResource.class);
        router.attach("/engine/job/{job}/beans/{beanPath}",BeanBrowseResource.class);
        router.attach("/engine/job/{job}/script",ScriptResource.class);

        // static files (won't serve directory, but will serve files in it)
        String resource = "clap://class/org/archive/crawler/restlet";
        Directory staticDir = new Directory(getContext(),resource); 
        router.attach("/engine/static/",staticDir);

        return router;
    }

    public Engine getEngine() {
        return engine;
    }  
    
    /**
     * Customize Restlet error to include back button and full stack.
     */
    protected class EngineStatusService extends StatusService {

        @Override
        public Representation getRepresentation(Status status, Request request, Response response) {
            StringWriter st = new StringWriter();
            PrintWriter pw = new PrintWriter(st);
            if(status.getCode()==404){
                pw.append("

Page not found

\n"); pw.append("The page you are looking for does not exist. "+ "You may be able to recover by going " + "back.\n"); } else{ pw.append("

An error occurred

\n"); pw.append( "You may be able to recover and try something " + "else by going " + "back.\n"); if(status.getThrowable()!=null) { pw.append("

Cause: "+ status.getThrowable().toString()+"

\n"); pw.append("
");
                    status.getThrowable().printStackTrace(pw);
                    pw.append("
"); } } pw.flush(); return new StringRepresentation(st.toString(),MediaType.TEXT_HTML); } } public Configuration getTemplateConfiguration() { return templateConfiguration; } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy