All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.crawljax.plugins.crawloverview.OutputBuilder Maven / Gradle / Ivy

There is a newer version: 5.2.3
Show newest version
package com.crawljax.plugins.crawloverview;

import static com.google.common.base.Preconditions.checkArgument;

import javax.annotation.Nullable;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.net.URISyntaxException;
import java.net.URL;
import java.net.URLDecoder;
import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;

import com.crawljax.core.CrawljaxException;
import com.crawljax.core.configuration.CrawljaxConfiguration;
import com.crawljax.plugins.crawloverview.model.OutPutModel;
import com.crawljax.plugins.crawloverview.model.Serializer;
import com.google.common.base.Charsets;
import com.google.common.base.Strings;
import com.google.common.io.ByteStreams;
import com.google.common.io.Files;
import org.apache.commons.io.FileUtils;
import org.apache.velocity.Template;
import org.apache.velocity.VelocityContext;
import org.apache.velocity.app.VelocityEngine;
import org.apache.velocity.runtime.RuntimeConstants;
import org.apache.velocity.runtime.resource.loader.ClasspathResourceLoader;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

class OutputBuilder {

	private static final Logger LOG = LoggerFactory.getLogger(OutputBuilder.class);

	static final String SCREENSHOT_FOLDER_NAME = "screenshots";
	static final String STATES_FOLDER_NAME = "states";
	static final String JSON_OUTPUT_NAME = "result.json";
	static final String DOMS_OUTPUT_NAME = "doms";

	private final File outputDir;
	private final File states;
	private final File screenshots;
	private final File indexFile;
	private final File doms;
	private final VelocityEngine ve;

	/**
	 * @param outputDir
	 *            target for the output directory. Folder must not exist or be empty.
	 */
	OutputBuilder(File outputDir) {
		this.outputDir = outputDir;
		checkPermissions();
		copySkeleton();

		states = new File(outputDir, STATES_FOLDER_NAME);
		boolean created = states.mkdir();
		checkArgument(created, "Could not create states dir");
		screenshots = new File(outputDir, SCREENSHOT_FOLDER_NAME);
		created = screenshots.mkdir();
		checkArgument(created, "Could not create screenshots dir");
		doms = new File(outputDir, DOMS_OUTPUT_NAME);
		created = doms.mkdir();
		checkArgument(created, "Could not create doms dir");

		indexFile = new File(outputDir, "index.html");
		ve = new VelocityEngine();
		configureVelocity();
	}

	private void configureVelocity() {
		ve.setProperty(RuntimeConstants.RUNTIME_LOG_LOGSYSTEM_CLASS,
		        "org.apache.velocity.runtime.log.NullLogChute");
		ve.setProperty(RuntimeConstants.RESOURCE_LOADER, "classpath");
		ve.setProperty("classpath.resource.loader.class", ClasspathResourceLoader.class.getName());
	}

	private void checkPermissions() {
		if (outputDir.exists()) {
			checkArgument(outputDir.isDirectory(), outputDir + " is not a directory");
			checkArgument(outputDir.list().length == 0, "Directory must be empty");
			checkArgument(outputDir.canWrite(), "Output dir not writable");
		} else {
			boolean created = outputDir.mkdirs();
			checkArgument(created, "Could not create directory " + outputDir);
		}
	}

	private void copySkeleton() {
		URL skeleton = OutputBuilder.class.getResource("/skeleton");
		if (skeleton.toExternalForm().contains("jar!")) {
			copySkeletonFromJar(skeleton);
		} else {
			LOG.debug("Loading skeleton as file");
			try {
				FileUtils.copyDirectory(new File(skeleton.toURI()), outputDir);
			} catch (IOException | URISyntaxException e) {
				throw new CrawljaxException(
				        "Could not copy required resources: " + e.getMessage(), e);
			}
		}

	}

	private void copySkeletonFromJar(URL skeleton) {
		LOG.debug("Loading skeleton as JAR entry {}", skeleton);
		File jar = getJar(skeleton);
		try (ZipInputStream zis = new ZipInputStream(new FileInputStream(jar))) {
			ZipEntry entry;
			while ((entry = zis.getNextEntry()) != null) {
				if (entry.getName().startsWith("skeleton") && !entry.isDirectory()) {
					String filename = entry.getName().substring("skeleton/".length());
					File newFile = new File(outputDir, filename);
					File parent = new File(newFile.getParent());
					if (!parent.exists()) {
						boolean created = parent.mkdirs();
						checkArgument(created, "Could not create folder " + newFile.getParent());
					}
					FileOutputStream out = new FileOutputStream(newFile);
					ByteStreams.copy(zis, out);
					out.close();
				}
			}
		} catch (IOException e1) {
			throw new CrawljaxException("Could not copy required resources: " + e1.getMessage(),
			        e1);
		}
	}

	private File getJar(URL skeleton) {
		String path;
		try {
			path = URLDecoder.decode(skeleton.getPath(), "UTF-8");
		} catch (UnsupportedEncodingException e) {
			throw new CrawljaxException("Could not process the path of the Overview skeleton "
			        + skeleton, e);
		}
		String jarpath = path.substring("file:".length(), path.indexOf("jar!") + "jar".length());
		File jar = new File(jarpath);
		LOG.debug("Jar file {} from path {}", jar, path);
		return jar;
	}

	File newScreenShotFile(String name) {
		return new File(screenshots, name + ".jpg");
	}

	public File newThumbNail(String name) {
		return new File(screenshots, name + "_small.jpg");
	}

	public void write(OutPutModel result, CrawljaxConfiguration config) {
		try {
			writeIndexFile(result, config);
			writeJsonToOutDir(Serializer.toPrettyJson(config), "config.json");
		} catch (Exception e) {
			LOG.error(e.getMessage(), e);
		}

		LOG.info("Overview report generated");
	}

	private void writeIndexFile(OutPutModel model, CrawljaxConfiguration config) {
		LOG.debug("Writing index file");
		VelocityContext context = new VelocityContext();
		writeJsonToOutDir(Serializer.toPrettyJson(model), JSON_OUTPUT_NAME);
		context.put("states", Serializer.toPrettyJson(model.getStates()));
		context.put("edges", Serializer.toPrettyJson(model.getEdges()));
		context.put("config", BeanToReadableMap.toMap(config));
		context.put("crawledUrl", config.getUrl());
		context.put("stats", model.getStatistics());
		context.put("exitStatus", model.getExitStatus());

		LOG.debug("Writing urls report");
		context.put("urls", model.getStatistics().getStateStats().getUrls());

		writeFile(context, indexFile, "index.html");
	}

	private void writeJsonToOutDir(String outModelJson, String filename) {
		try {
			Files.write(outModelJson, new File(this.outputDir, filename), Charsets.UTF_8);
		} catch (IOException e) {
			LOG.warn("Could not write JSON model to output dir. " + e.getMessage());
		}
	}

	private void writeFile(VelocityContext context, File outFile, String template) {
		try {
			Template templatee = ve.getTemplate(template);
			FileWriter writer = new FileWriter(outFile);
			templatee.merge(context, writer);
			writer.flush();
			writer.close();
		} catch (IOException e) {
			throw new CrawlOverviewException("Could not write output state", e);
		}
	}

	void writeState(VelocityContext context, String stateName) {
		File file = new File(states, stateName + ".html");
		writeFile(context, file, "state.html");
	}

	/**
	 * Save the dom to disk.
	 * 
	 * @param name
	 *            statename
	 * @param dom
	 *            the DOM as string
	 */
	void persistDom(String name, @Nullable String dom) {
		try {
			Files.write(Strings.nullToEmpty(dom), new File(doms, name + ".html"), Charsets.UTF_8);
		} catch (IOException e) {
			LOG.warn("Could not save dom state for {}", name);
			LOG.debug("Could not save dom state", e);
		}
	}

	String getDom(String name) {
		try {
			return Files.toString(new File(doms, name + ".html"), Charsets.UTF_8);
		} catch (IOException e) {
			return "Could not load DOM: " + e.getLocalizedMessage();
		}
	}

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy