
com.crawljax.plugins.crawloverview.OutputBuilder Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of crawloverview-plugin Show documentation
Show all versions of crawloverview-plugin Show documentation
Generates an HTML report with a snapshot overview of what is crawled.
package com.crawljax.plugins.crawloverview;
import static com.google.common.base.Preconditions.checkArgument;
import javax.annotation.Nullable;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.net.URISyntaxException;
import java.net.URL;
import java.net.URLDecoder;
import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;
import com.crawljax.core.CrawljaxException;
import com.crawljax.core.configuration.CrawljaxConfiguration;
import com.crawljax.plugins.crawloverview.model.OutPutModel;
import com.crawljax.plugins.crawloverview.model.Serializer;
import com.google.common.base.Charsets;
import com.google.common.base.Strings;
import com.google.common.io.ByteStreams;
import com.google.common.io.Files;
import org.apache.commons.io.FileUtils;
import org.apache.velocity.Template;
import org.apache.velocity.VelocityContext;
import org.apache.velocity.app.VelocityEngine;
import org.apache.velocity.runtime.RuntimeConstants;
import org.apache.velocity.runtime.resource.loader.ClasspathResourceLoader;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
class OutputBuilder {
private static final Logger LOG = LoggerFactory.getLogger(OutputBuilder.class);
static final String SCREENSHOT_FOLDER_NAME = "screenshots";
static final String STATES_FOLDER_NAME = "states";
static final String JSON_OUTPUT_NAME = "result.json";
static final String DOMS_OUTPUT_NAME = "doms";
private final File outputDir;
private final File states;
private final File screenshots;
private final File indexFile;
private final File doms;
private final VelocityEngine ve;
/**
* @param outputDir
* target for the output directory. Folder must not exist or be empty.
*/
OutputBuilder(File outputDir) {
this.outputDir = outputDir;
checkPermissions();
copySkeleton();
states = new File(outputDir, STATES_FOLDER_NAME);
boolean created = states.mkdir();
checkArgument(created, "Could not create states dir");
screenshots = new File(outputDir, SCREENSHOT_FOLDER_NAME);
created = screenshots.mkdir();
checkArgument(created, "Could not create screenshots dir");
doms = new File(outputDir, DOMS_OUTPUT_NAME);
created = doms.mkdir();
checkArgument(created, "Could not create doms dir");
indexFile = new File(outputDir, "index.html");
ve = new VelocityEngine();
configureVelocity();
}
private void configureVelocity() {
ve.setProperty(RuntimeConstants.RUNTIME_LOG_LOGSYSTEM_CLASS,
"org.apache.velocity.runtime.log.NullLogChute");
ve.setProperty(RuntimeConstants.RESOURCE_LOADER, "classpath");
ve.setProperty("classpath.resource.loader.class", ClasspathResourceLoader.class.getName());
}
private void checkPermissions() {
if (outputDir.exists()) {
checkArgument(outputDir.isDirectory(), outputDir + " is not a directory");
checkArgument(outputDir.list().length == 0, "Directory must be empty");
checkArgument(outputDir.canWrite(), "Output dir not writable");
} else {
boolean created = outputDir.mkdirs();
checkArgument(created, "Could not create directory " + outputDir);
}
}
private void copySkeleton() {
URL skeleton = OutputBuilder.class.getResource("/skeleton");
if (skeleton.toExternalForm().contains("jar!")) {
copySkeletonFromJar(skeleton);
} else {
LOG.debug("Loading skeleton as file");
try {
FileUtils.copyDirectory(new File(skeleton.toURI()), outputDir);
} catch (IOException | URISyntaxException e) {
throw new CrawljaxException(
"Could not copy required resources: " + e.getMessage(), e);
}
}
}
private void copySkeletonFromJar(URL skeleton) {
LOG.debug("Loading skeleton as JAR entry {}", skeleton);
File jar = getJar(skeleton);
try (ZipInputStream zis = new ZipInputStream(new FileInputStream(jar))) {
ZipEntry entry;
while ((entry = zis.getNextEntry()) != null) {
if (entry.getName().startsWith("skeleton") && !entry.isDirectory()) {
String filename = entry.getName().substring("skeleton/".length());
File newFile = new File(outputDir, filename);
File parent = new File(newFile.getParent());
if (!parent.exists()) {
boolean created = parent.mkdirs();
checkArgument(created, "Could not create folder " + newFile.getParent());
}
FileOutputStream out = new FileOutputStream(newFile);
ByteStreams.copy(zis, out);
out.close();
}
}
} catch (IOException e1) {
throw new CrawljaxException("Could not copy required resources: " + e1.getMessage(),
e1);
}
}
private File getJar(URL skeleton) {
String path;
try {
path = URLDecoder.decode(skeleton.getPath(), "UTF-8");
} catch (UnsupportedEncodingException e) {
throw new CrawljaxException("Could not process the path of the Overview skeleton "
+ skeleton, e);
}
String jarpath = path.substring("file:".length(), path.indexOf("jar!") + "jar".length());
File jar = new File(jarpath);
LOG.debug("Jar file {} from path {}", jar, path);
return jar;
}
File newScreenShotFile(String name) {
return new File(screenshots, name + ".jpg");
}
public File newThumbNail(String name) {
return new File(screenshots, name + "_small.jpg");
}
public void write(OutPutModel result, CrawljaxConfiguration config) {
try {
writeIndexFile(result, config);
writeJsonToOutDir(Serializer.toPrettyJson(config), "config.json");
} catch (Exception e) {
LOG.error(e.getMessage(), e);
}
LOG.info("Overview report generated");
}
private void writeIndexFile(OutPutModel model, CrawljaxConfiguration config) {
LOG.debug("Writing index file");
VelocityContext context = new VelocityContext();
writeJsonToOutDir(Serializer.toPrettyJson(model), JSON_OUTPUT_NAME);
context.put("states", Serializer.toPrettyJson(model.getStates()));
context.put("edges", Serializer.toPrettyJson(model.getEdges()));
context.put("config", BeanToReadableMap.toMap(config));
context.put("crawledUrl", config.getUrl());
context.put("stats", model.getStatistics());
context.put("exitStatus", model.getExitStatus());
LOG.debug("Writing urls report");
context.put("urls", model.getStatistics().getStateStats().getUrls());
writeFile(context, indexFile, "index.html");
}
private void writeJsonToOutDir(String outModelJson, String filename) {
try {
Files.write(outModelJson, new File(this.outputDir, filename), Charsets.UTF_8);
} catch (IOException e) {
LOG.warn("Could not write JSON model to output dir. " + e.getMessage());
}
}
private void writeFile(VelocityContext context, File outFile, String template) {
try {
Template templatee = ve.getTemplate(template);
FileWriter writer = new FileWriter(outFile);
templatee.merge(context, writer);
writer.flush();
writer.close();
} catch (IOException e) {
throw new CrawlOverviewException("Could not write output state", e);
}
}
void writeState(VelocityContext context, String stateName) {
File file = new File(states, stateName + ".html");
writeFile(context, file, "state.html");
}
/**
* Save the dom to disk.
*
* @param name
* statename
* @param dom
* the DOM as string
*/
void persistDom(String name, @Nullable String dom) {
try {
Files.write(Strings.nullToEmpty(dom), new File(doms, name + ".html"), Charsets.UTF_8);
} catch (IOException e) {
LOG.warn("Could not save dom state for {}", name);
LOG.debug("Could not save dom state", e);
}
}
String getDom(String name) {
try {
return Files.toString(new File(doms, name + ".html"), Charsets.UTF_8);
} catch (IOException e) {
return "Could not load DOM: " + e.getLocalizedMessage();
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy