org.archive.state.ModuleTestBase Maven / Gradle / Ivy
Show all versions of heritrix-modules Show documentation
/*
* This file is part of the Heritrix web crawler (crawler.archive.org).
*
* Licensed to the Internet Archive (IA) by one or more individual
* contributors.
*
* The IA licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.archive.state;
import java.io.File;
import java.io.IOException;
import java.io.Serializable;
import java.util.Arrays;
import org.apache.commons.httpclient.URIException;
import org.apache.commons.lang.SerializationUtils;
import org.archive.modules.CrawlURI;
import org.archive.net.UURI;
import org.archive.net.UURIFactory;
import org.archive.util.Recorder;
import org.archive.util.TmpDirTestCase;
import junit.framework.TestCase;
/**
* Base class for unit testing Module implementations.
*
* @author pjack
*/
public abstract class ModuleTestBase extends TestCase {
/**
* Magical constructor that attempts to auto-create static key field
* descriptions for your module class.
*
* If {@link #getSourceCodeDir} and {@link #getResourceDir} both return
* non-null values, then the constructor will look in the resources
* directory for an English resource file for the class. If it finds
* one, nothing magical happens.
*
*
Otherwise, the source code for the module being tested is loaded,
* and parsed to extract the JavaDoc descriptions for the static key
* fields. The results are stored in the appropriate English locale file
* in the resource directory.
*
*
Note the parsing is naive; at minimum, you should load the resulting
* locale file and remove any HTML markup.
*/
public ModuleTestBase() {
getSourceCodeDir();
getResourceDir();
}
/**
* Returns the location of the source code directory for your project.
* This defaults to "src/main/java", which is the standard for projects
* built with maven2. If you use a different source code directory,
* you should override this method.
*
*
If you want to disable automatic key description generation,
* return null from this method.
*
* @return the source code directory for the project
*/
protected File getSourceCodeDir() {
return getProjectDir("src/main/java");
}
/**
* Returns the location of the Java resources directory for your project.
* This defaults to "src/resources/java", which is the standard for projects
* built with maven2. If you use a different source code directory --
* for instance, if your resources directory is the same as your source
* code directory -- you should override this method.
*
*
If you want to disable automatic key description generation,
* return null from this method.
*
* @return the source code directory for the project
*/
protected File getResourceDir() {
return getProjectDir("src/main/resources");
}
/**
* Returns a project directory for a Heritrix subproject. This is here
* so that the src and resources directories can be found whether the
* unit test is run using maven2 or using Eclipse. The two build systems
* use different working directories.
*
* @param path the path the path to find
* @return the found path
*/
private File getProjectDir(String path) {
File r = new File(path);
if (r.exists()) {
return r;
}
String cname = getClass().getName();
if (cname.startsWith("org.archive.processors")) {
return new File("modules/" + path);
}
if (cname.startsWith("org.archive.deciderules")) {
return new File("modules/" + path);
}
if (cname.startsWith("org.archive.crawler")) {
return new File("engine/" + path);
}
return null;
}
/**
* Returns the class of the module to test. Deduces from
* test class name if possible.
*
* @return the class of the module to test
*/
protected Class> getModuleClass() {
String myClassName = this.getClass().getCanonicalName();
if(!myClassName.endsWith("Test")) {
throw new UnsupportedOperationException(
"Cannot get module class of "+myClassName);
}
String moduleClassName = myClassName.substring(0,myClassName.length()-4);
try {
return Class.forName(moduleClassName);
} catch (ClassNotFoundException e) {
throw new RuntimeException(e);
}
}
/**
* Return an example instance of the module. This is used by
* testSerialization to ensure the module can be serialized.
*
* @return an example instance of the module
* @throws Exception if the module cannot be constructed for any reason
*/
protected Object makeModule() throws Exception {
return getModuleClass().newInstance();
}
/**
* Tests that the module can be serialized. The value returned by
* {@link #makeModule} is serialized to a byte array, and then
* deserialized, and then serialized to a second byte array. The results
* are passed to {@link #verifySerialization}, which will simply compare
* the two byte arrays for equality. (That won't always work; see
* that method for details).
*
*
If nothing else, this test is useful for catching NotSerializable
* exceptions for your module or classes it depends on.
*
* @throws Exception if the module cannot be serialized
*/
public void testSerializationIfAppropriate() throws Exception {
Object first = makeModule();
if(!(first instanceof Serializable)) {
return;
}
byte[] firstBytes = SerializationUtils.serialize((Serializable)first);
Object second = SerializationUtils.deserialize(firstBytes);
byte[] secondBytes = SerializationUtils.serialize((Serializable)second);
Object third = SerializationUtils.deserialize(secondBytes);
byte[] thirdBytes = SerializationUtils.serialize((Serializable)third);
// HashMap serialization reverses order of items in linked buckets
// each roundtrip -- so don't check one roundtrip, check two.
//
// NOTE This is JVM-dependent behaviour, and since <= 1.7.0_u51 this
// ordering of serialisation cannot be relied upon. However, a TreeMap
// can be used instead of a HashMap, and this appears to have
// predictable serialisation behaviour.
//
// @see
// http://stackoverflow.com/questions/22392258/serialization-round-trip-of-hash-map-does-not-preserve-order
//
// verifySerialization(first, firstBytes, second, secondBytes);
verifySerialization(first, firstBytes, third, thirdBytes);
}
/**
* Verifies that serialization was successful.
*
*
By default, this method simply compares the first and second byte
* arrays for equality. That may not work if you use custom serialization
* -- for instance, if you're serializing a timestamp. If that's the case
* you should override this method to compare the given objects, or to
* simply do nothing. (If this method does nothing, then the
* {@link #testSerializationIfAppropriate()} test is still useful for catching
* NotSerializable problems).
*
* @param first the first object that was serialized
* @param firstBytes the byte array the first object was serialized to
* @param second the second object that was serialized
* @param secondBytes the byte array the second object was serialized to
* @throws Exception if anyt problem occurs
*/
protected void verifySerialization(Object first, byte[] firstBytes,
Object second, byte[] secondBytes) throws Exception {
assertTrue(Arrays.equals(firstBytes, secondBytes));
}
@Override
protected void runTest() throws Throwable {
try {
super.runTest();
} catch (Throwable t) {
t.printStackTrace();
throw t;
}
}
protected Recorder getRecorder() throws IOException {
if (Recorder.getHttpRecorder() == null) {
Recorder httpRecorder = new Recorder(TmpDirTestCase.tmpDir(),
getClass().getName(), 16 * 1024, 512 * 1024);
Recorder.setHttpRecorder(httpRecorder);
}
return Recorder.getHttpRecorder();
}
protected CrawlURI makeCrawlURI(String uri) throws URIException,
IOException {
UURI uuri = UURIFactory.getInstance(uri);
CrawlURI curi = new CrawlURI(uuri);
curi.setSeed(true);
curi.setRecorder(getRecorder());
return curi;
}
}