All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.archive.state.ModuleTestBase Maven / Gradle / Ivy

Go to download

This project contains some of the configurable modules used within the Heritrix application to crawl the web. The modules in this project can be used in applications other than Heritrix, however.

There is a newer version: 3.5.0
Show newest version
/*
 *  This file is part of the Heritrix web crawler (crawler.archive.org).
 *
 *  Licensed to the Internet Archive (IA) by one or more individual 
 *  contributors. 
 *
 *  The IA licenses this file to You under the Apache License, Version 2.0
 *  (the "License"); you may not use this file except in compliance with
 *  the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing, software
 *  distributed under the License is distributed on an "AS IS" BASIS,
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *  See the License for the specific language governing permissions and
 *  limitations under the License.
 */
package org.archive.state;


import java.io.File;
import java.io.IOException;
import java.io.Serializable;
import java.util.Arrays;

import org.apache.commons.httpclient.URIException;
import org.apache.commons.lang.SerializationUtils;
import org.archive.modules.CrawlURI;
import org.archive.net.UURI;
import org.archive.net.UURIFactory;
import org.archive.util.Recorder;
import org.archive.util.TmpDirTestCase;

import junit.framework.TestCase;


/**
 * Base class for unit testing Module implementations.
 * 
 * @author pjack
 */
public abstract class ModuleTestBase extends TestCase {


    /**
     * Magical constructor that attempts to auto-create static key field
     * descriptions for your module class.
     * 
     * 

If {@link #getSourceCodeDir} and {@link #getResourceDir} both return * non-null values, then the constructor will look in the resources * directory for an English resource file for the class. If it finds * one, nothing magical happens. * *

Otherwise, the source code for the module being tested is loaded, * and parsed to extract the JavaDoc descriptions for the static key * fields. The results are stored in the appropriate English locale file * in the resource directory. * *

Note the parsing is naive; at minimum, you should load the resulting * locale file and remove any HTML markup. */ public ModuleTestBase() { getSourceCodeDir(); getResourceDir(); } /** * Returns the location of the source code directory for your project. * This defaults to "src/main/java", which is the standard for projects * built with maven2. If you use a different source code directory, * you should override this method. * *

If you want to disable automatic key description generation, * return null from this method. * * @return the source code directory for the project */ protected File getSourceCodeDir() { return getProjectDir("src/main/java"); } /** * Returns the location of the Java resources directory for your project. * This defaults to "src/resources/java", which is the standard for projects * built with maven2. If you use a different source code directory -- * for instance, if your resources directory is the same as your source * code directory -- you should override this method. * *

If you want to disable automatic key description generation, * return null from this method. * * @return the source code directory for the project */ protected File getResourceDir() { return getProjectDir("src/main/resources"); } /** * Returns a project directory for a Heritrix subproject. This is here * so that the src and resources directories can be found whether the * unit test is run using maven2 or using Eclipse. The two build systems * use different working directories. * * @param path the path the path to find * @return the found path */ private File getProjectDir(String path) { File r = new File(path); if (r.exists()) { return r; } String cname = getClass().getName(); if (cname.startsWith("org.archive.processors")) { return new File("modules/" + path); } if (cname.startsWith("org.archive.deciderules")) { return new File("modules/" + path); } if (cname.startsWith("org.archive.crawler")) { return new File("engine/" + path); } return null; } /** * Returns the class of the module to test. Deduces from * test class name if possible. * * @return the class of the module to test */ protected Class getModuleClass() { String myClassName = this.getClass().getCanonicalName(); if(!myClassName.endsWith("Test")) { throw new UnsupportedOperationException( "Cannot get module class of "+myClassName); } String moduleClassName = myClassName.substring(0,myClassName.length()-4); try { return Class.forName(moduleClassName); } catch (ClassNotFoundException e) { throw new RuntimeException(e); } } /** * Return an example instance of the module. This is used by * testSerialization to ensure the module can be serialized. * * @return an example instance of the module * @throws Exception if the module cannot be constructed for any reason */ protected Object makeModule() throws Exception { return getModuleClass().newInstance(); } /** * Tests that the module can be serialized. The value returned by * {@link #makeModule} is serialized to a byte array, and then * deserialized, and then serialized to a second byte array. The results * are passed to {@link #verifySerialization}, which will simply compare * the two byte arrays for equality. (That won't always work; see * that method for details). * *

If nothing else, this test is useful for catching NotSerializable * exceptions for your module or classes it depends on. * * @throws Exception if the module cannot be serialized */ public void testSerializationIfAppropriate() throws Exception { Object first = makeModule(); if(!(first instanceof Serializable)) { return; } byte[] firstBytes = SerializationUtils.serialize((Serializable)first); Object second = SerializationUtils.deserialize(firstBytes); byte[] secondBytes = SerializationUtils.serialize((Serializable)second); Object third = SerializationUtils.deserialize(secondBytes); byte[] thirdBytes = SerializationUtils.serialize((Serializable)third); // HashMap serialization reverses order of items in linked buckets // each roundtrip -- so don't check one roundtrip, check two. // // NOTE This is JVM-dependent behaviour, and since <= 1.7.0_u51 this // ordering of serialisation cannot be relied upon. However, a TreeMap // can be used instead of a HashMap, and this appears to have // predictable serialisation behaviour. // // @see // http://stackoverflow.com/questions/22392258/serialization-round-trip-of-hash-map-does-not-preserve-order // // verifySerialization(first, firstBytes, second, secondBytes); verifySerialization(first, firstBytes, third, thirdBytes); } /** * Verifies that serialization was successful. * *

By default, this method simply compares the first and second byte * arrays for equality. That may not work if you use custom serialization * -- for instance, if you're serializing a timestamp. If that's the case * you should override this method to compare the given objects, or to * simply do nothing. (If this method does nothing, then the * {@link #testSerializationIfAppropriate()} test is still useful for catching * NotSerializable problems). * * @param first the first object that was serialized * @param firstBytes the byte array the first object was serialized to * @param second the second object that was serialized * @param secondBytes the byte array the second object was serialized to * @throws Exception if anyt problem occurs */ protected void verifySerialization(Object first, byte[] firstBytes, Object second, byte[] secondBytes) throws Exception { assertTrue(Arrays.equals(firstBytes, secondBytes)); } @Override protected void runTest() throws Throwable { try { super.runTest(); } catch (Throwable t) { t.printStackTrace(); throw t; } } protected Recorder getRecorder() throws IOException { if (Recorder.getHttpRecorder() == null) { Recorder httpRecorder = new Recorder(TmpDirTestCase.tmpDir(), getClass().getName(), 16 * 1024, 512 * 1024); Recorder.setHttpRecorder(httpRecorder); } return Recorder.getHttpRecorder(); } protected CrawlURI makeCrawlURI(String uri) throws URIException, IOException { UURI uuri = UURIFactory.getInstance(uri); CrawlURI curi = new CrawlURI(uuri); curi.setSeed(true); curi.setRecorder(getRecorder()); return curi; } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy