All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.ubervu.river.github.GitHubRiver Maven / Gradle / Ivy

There is a newer version: 1.7.1
Show newest version
package com.ubervu.river.github;

import com.google.gson.JsonArray;
import com.google.gson.JsonElement;
import com.google.gson.JsonObject;
import com.google.gson.JsonStreamParser;
import org.apache.commons.codec.digest.DigestUtils;
import org.elasticsearch.action.bulk.BulkProcessor;
import org.elasticsearch.action.bulk.BulkRequest;
import org.elasticsearch.action.bulk.BulkResponse;
import org.elasticsearch.action.deletebyquery.DeleteByQueryResponse;
import org.elasticsearch.action.index.IndexRequest;
import org.elasticsearch.client.Client;
import org.elasticsearch.common.Base64;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.xcontent.support.XContentMapValues;
import org.elasticsearch.river.AbstractRiverComponent;
import org.elasticsearch.river.River;
import org.elasticsearch.river.RiverName;
import org.elasticsearch.river.RiverSettings;

import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.URL;
import java.net.URLConnection;
import java.util.HashMap;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import static org.elasticsearch.index.query.QueryBuilders.termQuery;


public class GitHubRiver extends AbstractRiverComponent implements River {

    private final Client client;
    private final String index;
    private final String repository;
    private final String owner;
    private final int interval;
    private String password;
    private String username;
    private DataStream dataStream;

    @SuppressWarnings({"unchecked"})
    @Inject
    public GitHubRiver(RiverName riverName, RiverSettings settings, Client client) {
        super(riverName, settings);
        this.client = client;

        if (!settings.settings().containsKey("github")) {
            throw new IllegalArgumentException("Need river settings - owner and repository.");
        }

        // get settings
        Map githubSettings = (Map) settings.settings().get("github");
        owner = XContentMapValues.nodeStringValue(githubSettings.get("owner"), null);
        repository = XContentMapValues.nodeStringValue(githubSettings.get("repository"), null);
        index = String.format("%s&%s", owner, repository);
        interval = XContentMapValues.nodeIntegerValue(githubSettings.get("interval"), 3600);

        // auth (optional)
        username = null;
        password = null;
        if (githubSettings.containsKey("authentication")) {
            Map auth = (Map) githubSettings.get("authentication");
            username = XContentMapValues.nodeStringValue(auth.get("username"), null);
            password = XContentMapValues.nodeStringValue(auth.get("password"), null);
        }

        logger.info("Created GitHub river.");
    }

    @Override
    public void start() {
        dataStream = new DataStream();
        dataStream.start();
        logger.info("Started GitHub river.");
    }

    @Override
    public void close() {
        dataStream.setRunning(false);
        logger.info("Stopped GitHub river.");
    }

    private class DataStream extends Thread {
        private volatile boolean isRunning;

        @Inject
        public DataStream() {
            super("DataStream thread");
            isRunning = true;
        }

        private void indexResponse(URLConnection conn, String type) throws IOException {
            InputStream input = conn.getInputStream();
            JsonStreamParser jsp = new JsonStreamParser(new InputStreamReader(input));

            JsonArray array = (JsonArray) jsp.next();

            BulkProcessor bp = BulkProcessor.builder(client, new BulkProcessor.Listener() {
                @Override
                public void beforeBulk(long executionId, BulkRequest request) {
                }

                @Override
                public void afterBulk(long executionId, BulkRequest request, BulkResponse response) {
                }

                @Override
                public void afterBulk(long executionId, BulkRequest request, Throwable failure) {
                }
            }).build();

            IndexRequest req = null;
            for (JsonElement e: array) {
                if (type.equals("event")) {
                    req = indexEvent(e);
                } else if (type.equals("issue")) {
                    req = indexOther(e, "IssueData", true);
                } else if (type.equals("pullreq")) {
                    req = indexOther(e, "PullRequestData");
                } else if (type.equals("milestone")) {
                    req = indexOther(e, "MilestoneData");
                } else if (type.equals("label")) {
                    req = indexOther(e, "LabelData");
                }
                bp.add(req);
            }
            bp.close();

            input.close();
        }

        private IndexRequest indexEvent(JsonElement e) {
            JsonObject obj = e.getAsJsonObject();
            String type = obj.get("type").getAsString();
            String id = obj.get("id").getAsString();
            IndexRequest req = new IndexRequest(index)
                    .type(type)
                    .id(id).create(false) // we want to overwrite old items
                    .source(e.toString());
            return req;
        }

        private IndexRequest indexOther(JsonElement e, String type, boolean overwrite) {
            JsonObject obj = e.getAsJsonObject();

            // handle objects that don't have IDs (i.e. labels)
            // set the ID to the MD5 hash of the string representation
            String id;
            if (obj.has("id")) {
                id = obj.get("id").getAsString();
            } else {
                id = DigestUtils.md5Hex(e.toString());
            }

            IndexRequest req = new IndexRequest(index)
                    .type(type)
                    .id(id).create(!overwrite)
                    .source(e.toString());
            return req;
        }

        private IndexRequest indexOther(JsonElement e, String type) {
            return indexOther(e, type, false);
        }

        private HashMap parseHeader(String header) {
            // inspired from https://github.com/uberVU/elasticboard/blob/4ccdfd8c8e772c1dda49a29a7487d14b8d820762/data_processor/github.py#L73
            Pattern p = Pattern.compile("\\<([a-z/0-9:\\.\\?_&=]+page=([0-9]+))\\>;\\s*rel=\\\"([a-z]+)\\\".*");
            Matcher m = p.matcher(header);

            if (!m.matches()) {
                return null;
            }

            HashMap data = new HashMap();
            data.put("url", m.group(1));
            data.put("page", m.group(2));
            data.put("rel", m.group(3));

            return data;
        }

        private boolean morePagesAvailable(URLConnection response) {
            String link = response.getHeaderField("link");
            if (link == null || link.length() == 0) {
                return false;
            }

            HashMap headerData = parseHeader(response.getHeaderField("link"));
            if (headerData == null) {
                return false;
            }

            String rel = headerData.get("rel");
            return rel.equals("next");
        }

        private String nextPageURL(URLConnection response) {
            HashMap headerData = parseHeader(response.getHeaderField("link"));
            if (headerData == null) {
                return null;
            }
            return headerData.get("url");
        }

        private void addAuthHeader(URLConnection request) {
            if (username == null || password == null) {
                return;
            }
            String auth = String.format("%s:%s", username, password);
            String encoded = Base64.encodeBytes(auth.getBytes());
            request.setRequestProperty("Authorization", "Basic " + encoded);
        }

        private void getData(String fmt, String type) {
            try {
                URL url = new URL(String.format(fmt, owner, repository));
                URLConnection response = url.openConnection();
                addAuthHeader(response);
                indexResponse(response, type);

                while (morePagesAvailable(response)) {
                    url = new URL(nextPageURL(response));
                    response = url.openConnection();
                    addAuthHeader(response);
                    indexResponse(response, type);
                }
            } catch (Exception e) {
                logger.error("Exception in getData", e);
            }
        }

        private void deleteByType(String type) {
            DeleteByQueryResponse response = client.prepareDeleteByQuery(index)
                    .setQuery(termQuery("_type", type))
                    .execute()
                    .actionGet();
        }

        @Override
        public void run() {
            while (isRunning) {
                getData("https://api.github.com/repos/%s/%s/events?per_page=1000", "event");
                getData("https://api.github.com/repos/%s/%s/issues?per_page=1000", "issue");
                getData("https://api.github.com/repos/%s/%s/issues?state=closed&per_page=1000", "issue");

                // delete pull req data - we are only storing open pull reqs
                // and when a pull request is closed we have no way of knowing;
                // this is why we have to delete them and reindex "fresh" ones
                deleteByType("PullRequestData");
                getData("https://api.github.com/repos/%s/%s/pulls", "pullreq");

                // same for milestones
                deleteByType("MilestoneData");
                getData("https://api.github.com/repos/%s/%s/milestones?per_page=1000", "milestone");

                // and for labels - they have IDs based on the MD5 of the contents, so
                // if a property changes, we get a "new" document
                deleteByType("LabelData");
                getData("https://api.github.com/repos/%s/%s/labels?per_page=1000", "label");


                try {
                    Thread.sleep(interval * 1000); // needs milliseconds
                } catch (InterruptedException e) {}
            }
        }

        public void setRunning(boolean running) {
            isRunning = running;
        }
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy