org.archive.crawler.restlet.EngineApplication Maven / Gradle / Ivy
The newest version!
/*
* This file is part of the Heritrix web crawler (crawler.archive.org).
*
* Licensed to the Internet Archive (IA) by one or more individual
* contributors.
*
* The IA licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.archive.crawler.restlet;
import java.io.IOException;
import java.io.PrintWriter;
import java.io.StringWriter;
import freemarker.template.Configuration;
import freemarker.template.ObjectWrapper;
import org.archive.crawler.framework.Engine;
import org.archive.util.TextUtils;
import org.restlet.Application;
import org.restlet.Restlet;
import org.restlet.data.MediaType;
import org.restlet.data.Reference;
import org.restlet.Request;
import org.restlet.Response;
import org.restlet.data.Status;
import org.restlet.representation.Representation;
import org.restlet.representation.StringRepresentation;
import org.restlet.resource.Directory;
import org.restlet.routing.Redirector;
import org.restlet.routing.Router;
import org.restlet.routing.Template;
import org.restlet.service.StatusService;
/**
* Restlet Application for a Heritrix crawl 'Engine', which is aware of
* local job configurations/directories and can assemble/launch/monitor/
* manage crawls.
*
* @author gojomo
*/
public class EngineApplication extends Application {
protected Engine engine;
private final Configuration templateConfiguration;
public EngineApplication(Engine engine) {
this.engine = engine;
getMetadataService().addExtension("log", MediaType.TEXT_PLAIN );
getMetadataService().addExtension("cxml", MediaType.APPLICATION_XML );
setStatusService(new EngineStatusService());
templateConfiguration = new Configuration();
templateConfiguration.setClassForTemplateLoading(getClass(), "");
templateConfiguration.setObjectWrapper(ObjectWrapper.BEANS_WRAPPER);
}
@Override
public Restlet createInboundRoot() {
Router router = new Router(getContext());
router.attach("/",new Redirector(null,"/engine",Redirector.MODE_CLIENT_TEMPORARY));
router.attach("/engine",EngineResource.class)
.setMatchingMode(Template.MODE_EQUALS);
router.attach("/engine/",EngineResource.class)
.setMatchingMode(Template.MODE_EQUALS);
Directory alljobsdir = new Directory(
getContext(),
engine.getJobsDir().toURI().toString());
alljobsdir.setListingAllowed(true);
router.attach("/engine/jobsdir",alljobsdir);
EnhDirectory anypath = new EnhDirectory(
getContext(),
engine.getJobsDir().toURI().toString() /*TODO: changeme*/) {
@Override
protected Reference determineRootRef(Request request) {
String ref = "file:/";
return new Reference(ref);
}};
anypath.setListingAllowed(true);
anypath.setModifiable(true);
anypath.setEditFilter(JobResource.EDIT_FILTER);
router.attach("/engine/anypath/",anypath);
EnhDirectory jobdir = new EnhDirectory(
getContext(),
engine.getJobsDir().toURI().toString() /*TODO: changeme*/) {
@Override
protected Reference determineRootRef(Request request) {
try {
return new Reference(
EngineApplication.this.getEngine()
.getJob(TextUtils.urlUnescape(
(String)request.getAttributes().get("job")))
.getJobDir().getCanonicalFile().toURI().toString());
} catch (IOException e) {
throw new RuntimeException(e);
}
}};
jobdir.setListingAllowed(true);
jobdir.setModifiable(true);
jobdir.setEditFilter(JobResource.EDIT_FILTER);
router.attach("/engine/job/{job}/jobdir",jobdir);
router.attach("/engine/job/{job}",JobResource.class);
router.attach("/engine/job/{job}/report/{reportClass}",ReportGenResource.class);
router.attach("/engine/job/{job}/beans",BeanBrowseResource.class);
router.attach("/engine/job/{job}/beans/{beanPath}",BeanBrowseResource.class);
router.attach("/engine/job/{job}/script",ScriptResource.class);
// static files (won't serve directory, but will serve files in it)
String resource = "clap://class/org/archive/crawler/restlet";
Directory staticDir = new Directory(getContext(),resource);
router.attach("/engine/static/",staticDir);
return router;
}
public Engine getEngine() {
return engine;
}
/**
* Customize Restlet error to include back button and full stack.
*/
protected class EngineStatusService extends StatusService {
@Override
public Representation getRepresentation(Status status, Request request, Response response) {
StringWriter st = new StringWriter();
PrintWriter pw = new PrintWriter(st);
if(status.getCode()==404){
pw.append("Page not found
\n");
pw.append("The page you are looking for does not exist. "+
"You may be able to recover by going " +
"back.\n");
}
else{
pw.append("An error occurred
\n");
pw.append(
"You may be able to recover and try something " +
"else by going " +
"back.\n");
if(status.getThrowable()!=null) {
pw.append("Cause: "+
status.getThrowable().toString()+"
\n");
pw.append("");
status.getThrowable().printStackTrace(pw);
pw.append("
");
}
}
pw.flush();
return new StringRepresentation(st.toString(),MediaType.TEXT_HTML);
}
}
public Configuration getTemplateConfiguration() {
return templateConfiguration;
}
}