All Downloads are FREE. Search and download functionalities are using the official Maven repository.

ai.platon.pulsar.client.Scraper Maven / Gradle / Ivy

There is a newer version: 2.0.2
Show newest version
package ai.platon.pulsar.client;

import java.io.IOException;
import java.net.URI;
import java.net.http.HttpClient;
import java.net.http.HttpRequest;
import java.net.http.HttpResponse;

/**
 * A simple scraper which just send an X-SQL request to the pulsar server and get the scrape result
 * */
public class Scraper {

    public static void main(String[] args) throws IOException, InterruptedException {
        String sql = "select\n" +
                "            dom_first_text(dom, '#productTitle') as `title`,\n" +
                "            dom_first_text(dom, '#price tr td:contains(List Price) ~ td') as `listprice`,\n" +
                "            dom_first_text(dom, '#price tr td:matches(^Price) ~ td, #price_inside_buybox') as `price`,\n" +
                "            array_join_to_string(dom_all_texts(dom, '#wayfinding-breadcrumbs_container ul li a'), '|') as `categories`,\n" +
                "            dom_base_uri(dom) as `baseUri`\n" +
                "        from\n" +
                "            load_and_select('https://www.amazon.com/dp/B0C1H26C46', ':root')";

        HttpRequest request = HttpRequest.newBuilder()
                .uri(URI.create("http://localhost:8182/api/x/e"))
                .header("Content-Type", "text/plain")
                .POST(HttpRequest.BodyPublishers.ofString(sql)).build();
        String response = HttpClient.newHttpClient().send(request, HttpResponse.BodyHandlers.ofString()).body();

        System.out.println(response);
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy