![JAR search and dependency download from the Maven repository](/logo.png)
org.archive.modules.deciderules.ScriptedDecideRule Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of heritrix-modules Show documentation
Show all versions of heritrix-modules Show documentation
This project contains some of the configurable modules used within the
Heritrix application to crawl the web. The modules in this project can
be used in applications other than Heritrix, however.
/*
* This file is part of the Heritrix web crawler (crawler.archive.org).
*
* Licensed to the Internet Archive (IA) by one or more individual
* contributors.
*
* The IA licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.archive.modules.deciderules;
import java.io.Reader;
import java.util.logging.Level;
import java.util.logging.Logger;
import javax.script.ScriptEngine;
import javax.script.ScriptEngineManager;
import javax.script.ScriptException;
import org.apache.commons.io.IOUtils;
import org.archive.io.ReadSource;
import org.archive.modules.CrawlURI;
import org.springframework.beans.BeansException;
import org.springframework.beans.factory.BeanInitializationException;
import org.springframework.beans.factory.InitializingBean;
import org.springframework.beans.factory.annotation.Required;
import org.springframework.context.ApplicationContext;
import org.springframework.context.ApplicationContextAware;
/**
* Rule which runs a JSR-223 script to make its decision.
*
* Script source may be provided via a file local to the crawler or
* an inline configuration string.
*
* The source must include a one-argument function "decisionFor" which
* returns the appropriate DecideResult.
*
* Variables available to the script include 'object' (the object to be
* evaluated, typically a CrawlURI), 'self' (this ScriptedDecideRule
* instance), and 'context' (the crawl's ApplicationContext, from
* which all named crawl beans are easily reachable).
*
* TODO: provide way to trigger reload of script mid-crawl; perhaps
* by watching for a certain applicationEvent?
*
* @author gojomo
*/
public class ScriptedDecideRule extends DecideRule
implements ApplicationContextAware, InitializingBean {
private static final long serialVersionUID = 3L;
private static final Logger logger =
Logger.getLogger(ScriptedDecideRule.class.getName());
/** engine name; default "beanshell" */
protected String engineName = "beanshell";
public String getEngineName() {
return this.engineName;
}
public void setEngineName(String name) {
this.engineName = name;
}
protected ReadSource scriptSource = null;
public ReadSource getScriptSource() {
return scriptSource;
}
@Required
public void setScriptSource(ReadSource scriptSource) {
this.scriptSource = scriptSource;
}
/**
* Whether each ToeThread should get its own independent script
* engine, or they should share synchronized access to one
* engine. Default is true, meaning each thread gets its own
* isolated engine.
*/
protected boolean isolateThreads = true;
public boolean getIsolateThreads() {
return isolateThreads;
}
public void setIsolateThreads(boolean isolateThreads) {
this.isolateThreads = isolateThreads;
}
protected ApplicationContext appCtx;
public void setApplicationContext(ApplicationContext applicationContext) throws BeansException {
this.appCtx = applicationContext;
}
transient protected ThreadLocal threadEngine =
new ThreadLocal();
transient protected ScriptEngine sharedEngine;
public ScriptedDecideRule() {
}
public void afterPropertiesSet() throws Exception {
// fail at build-time if script engine not available
if(null == new ScriptEngineManager().getEngineByName(engineName)) {
throw new BeanInitializationException("named ScriptEngine not available");
}
}
@Override
public DecideResult innerDecide(CrawlURI uri) {
// depending on previous configuration, engine may
// be local to this thread or shared
ScriptEngine engine = getEngine();
synchronized(engine) {
// synchronization is harmless for local thread engine,
// necessary for shared engine
try {
engine.put("object",uri);
engine.put("appCtx", appCtx);
return (DecideResult)engine.eval("decisionFor(object)");
} catch (ScriptException e) {
logger.log(Level.WARNING,e.getMessage(),e);
return DecideResult.NONE;
} finally {
engine.put("object", null);
engine.put("appCtx", null);
}
}
}
/**
* Get the proper ScriptEngine instance -- either shared or local
* to this thread.
* @return ScriptEngine to use
*/
protected ScriptEngine getEngine() {
if (getIsolateThreads()) {
ScriptEngine engine = threadEngine.get();
if (engine == null) {
engine = newEngine();
threadEngine.set(engine);
}
return engine;
} else {
// sharing the engine
synchronized (this) {
if (sharedEngine == null) {
sharedEngine = newEngine();
}
}
return sharedEngine;
}
}
/**
* Create a new ScriptEngine instance, preloaded with any supplied
* source file and the variables 'self' (this ScriptedDecideRule)
* and 'context' (the ApplicationContext).
*
* @return the new Interpreter instance
*/
protected ScriptEngine newEngine() {
ScriptEngine interpreter = new ScriptEngineManager().getEngineByName(engineName);
interpreter.put("self", this);
interpreter.put("context", appCtx);
Reader reader = null;
try {
reader = getScriptSource().obtainReader();
interpreter.eval(reader);
} catch (ScriptException e) {
logger.log(Level.SEVERE,"script problem",e);
} finally {
IOUtils.closeQuietly(reader);
}
return interpreter;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy