
com.republicate.stillness.directive.FollowDirective Maven / Gradle / Ivy
package com.republicate.stillness.directive;
import org.apache.velocity.runtime.directive.InputBase;
import org.apache.velocity.runtime.parser.node.Node;
import org.apache.velocity.runtime.resource.Resource;
import org.apache.velocity.context.InternalContextAdapter;
import org.apache.velocity.context.Context;
import org.apache.velocity.context.InternalContextAdapterImpl;
import org.apache.velocity.exception.ResourceNotFoundException;
import org.apache.velocity.exception.ParseErrorException;
import org.apache.velocity.exception.MethodInvocationException;
import java.io.Writer;
import java.io.IOException;
import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.io.FileInputStream;
import java.net.URL;
import java.net.MalformedURLException;
import com.republicate.stillness.ScrapeException;
import com.republicate.stillness.ScrapeContext;
import com.republicate.stillness.node.RNode;
import com.republicate.stillness.StillnessUtil;
/**
* Handle the scraping for the follow directives.
*
* @author Claude Brisson
*/
public class FollowDirective extends InputBase {
public String getName() {
return "follow";
}
public int getType() {
return BLOCK;
}
/**
* Not used
*/
public boolean render(InternalContextAdapter context, Writer writer, Node node) {
throw new RuntimeException("FollowDirective.render is not implemented.");
}
/**
* Load the file or url and scrape it using the reverse template contained in its body.
*/
public void scrape(String source, Context context, ScrapeContext scrapeContext, RNode node)
throws IOException, ResourceNotFoundException, ParseErrorException, MethodInvocationException {
// check if we have 2 childrens (path and body)
if (node.getListChildrens().size() < 2)
throw new ScrapeException("#follow: expecting two arguments: url/path and body");
String arg = "";
// get the path or url
Object value = node.getAstNode().jjtGetChild(0).value(new InternalContextAdapterImpl(context));
if ( value == null) {
throw new ScrapeException("#follow() argument has no value");
}
arg = value.toString();
BufferedReader reader = null;
// reading the data
try {
if (arg.indexOf("://") != -1) {
reader = new BufferedReader(new InputStreamReader(new URL(arg).openStream()));
} else {
reader = new BufferedReader(new InputStreamReader(new FileInputStream(arg)));
}
} catch (MalformedURLException MUe) {
if (scrapeContext.isDebugEnabled())
scrapeContext.getDebugOutput().logText("#follow(\""+ value +"\")", true);
throw new ScrapeException("#follow("+arg+"): Bad URL: "+ MUe.getMessage());
} catch (IOException IOe) {
if (scrapeContext.isDebugEnabled())
scrapeContext.getDebugOutput().logText("#follow(\""+ value +"\")", true);
throw new ScrapeException("#follow("+arg+"): I/O exception: "+ IOe.getMessage());
} catch (Exception e) {
if (scrapeContext.isDebugEnabled())
scrapeContext.getDebugOutput().logText("#follow(\""+ value +"\")", true);
throw new ScrapeException("#follow("+arg+"): Exception occurred: "+ e.getMessage() + " ("+ arg+ ")");
}
if (scrapeContext.isDebugEnabled()) scrapeContext.getDebugOutput().logText("#follow(\""+ arg +"\")", true);
if ( reader == null ) throw new ScrapeException("#follow("+arg+"): Unable to load resource!");
String buffer = "";
String l;
while((l=reader.readLine())!= null) {
buffer +=l;
}
// source normalization
if (scrapeContext.isNormalized())
buffer = StillnessUtil.normalize(buffer);
// we scrape the resource found using the reverse template found in the body (second child)
// we use a new ScrapeContext - TODO: why? it's certainly wrong, we cannot reuse previous values!
ScrapeContext sc = new ScrapeContext(0);
sc.setNormalization(scrapeContext.isNormalized());
sc.setDebugOutput(scrapeContext.getDebugOutput());
((RNode)node.getListChildrens().get(1)).scrape(buffer, context, sc);
// TODO synchronize = false en sortie de follow ?
if (scrapeContext.isDebugEnabled()) scrapeContext.getDebugOutput().logText("#end", true);
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy