apoc.load.LoadHtml Maven / Gradle / Ivy
package apoc.load;
import apoc.result.MapResult;
import apoc.util.MapUtil;
import apoc.util.Util;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Attribute;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.neo4j.graphdb.GraphDatabaseService;
import org.neo4j.logging.Log;
import org.neo4j.procedure.Context;
import org.neo4j.procedure.Description;
import org.neo4j.procedure.Name;
import org.neo4j.procedure.Procedure;
import java.util.*;
import java.util.stream.Stream;
public class LoadHtml {
@Context
public GraphDatabaseService db;
@Context
public Log log;
@Procedure
@Description("apoc.load.html('url',{name: jquery, name2: jquery}, config) YIELD value - Load Html page and return the result as a Map")
public Stream html(@Name("url") String url, @Name(value = "query",defaultValue = "{}") Map query, @Name(value = "config",defaultValue = "{}") Map config) {
return readHtmlPage(url, query, config);
}
private Stream readHtmlPage(String url, Map query, Map config){
try {
String charset = config.getOrDefault("charset", "UTF-8").toString();
// baseUri is used to resolve relative paths
String baseUri = config.getOrDefault("baseUri", "").toString();
Document document = Jsoup.parse(Util.openInputStream(url, null, null), charset, baseUri);
return query.keySet().stream().map(key -> {
Elements elements = document.select(query.get(key));
List
© 2015 - 2025 Weber Informatics LLC | Privacy Policy