All Downloads are FREE. Search and download functionalities are using the official Maven repository.

uk.ac.shef.dcs.kbsearch.freebase.FreebaseQueryProxy Maven / Gradle / Ivy

The newest version!
package uk.ac.shef.dcs.kbsearch.freebase;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.URL;
import java.util.*;

import com.google.api.client.http.GenericUrl;
import com.google.api.client.http.HttpRequest;
import com.google.api.client.http.HttpRequestFactory;
import com.google.api.client.http.HttpResponse;
import com.google.api.client.http.HttpTransport;
import com.google.api.client.http.javanet.NetHttpTransport;
import org.apache.commons.collections.CollectionUtils;
import org.apache.log4j.Logger;
import org.json.simple.JSONArray;
import org.json.simple.JSONObject;
import org.json.simple.parser.JSONParser;
import org.json.simple.parser.ParseException;
import uk.ac.shef.dcs.kbsearch.model.Attribute;
import uk.ac.shef.dcs.kbsearch.model.Clazz;
import uk.ac.shef.dcs.util.StringUtils;


/**
 * Created with IntelliJ IDEA.
 * User: zqz
 * Date: 18/01/14
 * Time: 22:06
 * To change this template use File | Settings | File Templates.
 */
public class FreebaseQueryProxy {


    public static Logger LOG = Logger.getLogger(FreebaseQueryProxy.class.getName());
    //private String BASE_QUERY_URL="https://www.googleapis.com/freebase/v1/mqlread";
    private JSONParser jsonParser;
    private FreebaseQueryInterrupter interrupter;
    private HttpTransport httpTransport;
    private HttpRequestFactory requestFactory;
    private Properties properties;

    private static final String FB_MAX_QUERY_PER_SECOND="fb.query.max.sec";

    private static final String FB_MAX_QUERY_PER_DAY="fb.query.max.day";

    private static final String FB_QUERY_API_URL_TOPIC ="fb.query.apiurl.topic";

    private static final String FB_QUERY_API_URL_SEARCH ="fb.query.apiurl.search";

    private static final String FB_QUERY_API_URL_MQL ="fb.query.apiurl.mql";

    private static final String FB_QUERY_API_KEY="fb.query.api.key";

    private static final String FB_HOMEPAGE="fb.homepage";

    private static final String FB_QUERY_PARAM_LIMIT="fb.query.param.limit";

    public FreebaseQueryProxy(Properties properties) throws IOException {
        this.properties=properties;
        interrupter = new FreebaseQueryInterrupter(Integer.valueOf(properties.get(FB_MAX_QUERY_PER_SECOND).toString()),
                Integer.valueOf(properties.get(FB_MAX_QUERY_PER_DAY).toString()));
        httpTransport = new NetHttpTransport();
        requestFactory = httpTransport.createRequestFactory();
        jsonParser = new JSONParser();
    }


    //given a topic id, returns its attributes.
    public List topicapi_getAttributesOfTopic(String id) throws IOException {
        Date start = new Date();
        List res = new ArrayList<>();
        GenericUrl url = new GenericUrl(properties.get(FB_QUERY_API_URL_TOPIC).toString() + id);
        url.put("key", properties.get(FB_QUERY_API_KEY));
        url.put("limit", 100);
        HttpRequest request = requestFactory.buildGetRequest(url);
        HttpResponse httpResponse = interrupter.executeQuery(request, true);
        try {
            JSONObject topic = (JSONObject) jsonParser.parse(httpResponse.parseAsString());
            JSONObject properties = (JSONObject) topic.get("property");
            parseTopicAPIResult(properties, res, true);
        } catch (ParseException pe) {
            pe.printStackTrace();
        }
        LOG.debug("\tQueryFreebase (attributes):" + (new Date().getTime() - start.getTime()));
        return res;

    }

    public List topicapi_getTypesOfTopicID(String id) throws IOException {
        Date start = new Date();
        List res = new ArrayList<>();
        GenericUrl url = new GenericUrl(properties.get(FB_QUERY_API_URL_TOPIC).toString() + id);
        url.put("key", properties.get(FB_QUERY_API_KEY));
        url.put("filter", FreebaseEnum.RELATION_HASTYPE.getString());
        HttpRequest request = requestFactory.buildGetRequest(url);
        HttpResponse httpResponse = interrupter.executeQuery(request, true);
        try {
            JSONObject topic = (JSONObject) jsonParser.parse(httpResponse.parseAsString());
            JSONObject properties = (JSONObject) topic.get("property");
            if(properties!=null)
                parseTopicAPIResult(properties, res, true);
        } catch (ParseException pe) {
            pe.printStackTrace();
        }
        LOG.debug("\tQueryFreebase (types):" + (new Date().getTime() - start.getTime()));
        return res;

    }

    public List topicapi_getAttributesOfTopicID(String id, String filter) throws IOException {
        Date start = new Date();
        List res = new ArrayList<>();
        GenericUrl url = new GenericUrl(properties.get(FB_QUERY_API_URL_TOPIC).toString() + id);
        url.put("key", properties.get(FB_QUERY_API_KEY));
        url.put("filter", filter);
        url.put("limit", 200);
        HttpRequest request = requestFactory.buildGetRequest(url);
        HttpResponse httpResponse = interrupter.executeQuery(request, true);
        try {
            JSONObject topic = (JSONObject) jsonParser.parse(httpResponse.parseAsString());
            JSONObject properties = (JSONObject) topic.get("property");
            parseTopicAPIResult(properties, res, true);
        } catch (ParseException pe) {
            pe.printStackTrace();
        }
        LOG.debug("\tQueryFreebase (attributes):" + (new Date().getTime() - start.getTime()));
        return res;

    }

    private void parseTopicAPIResult(JSONObject json, List out, boolean directRelation) {
        /*if(json==null)
            System.out.println();*/
        Iterator prop_keys = json.keySet().iterator();
        while (prop_keys.hasNext()) {
            String prop = prop_keys.next();
            try {
                JSONObject propValueObj = (JSONObject) json.get(prop);
                JSONArray jsonArray = (JSONArray) propValueObj.get("values");
                Object c = propValueObj.get("valuetype");
                if (c != null && c.toString().equals("compound"))
                    parsePropertyValues(jsonArray, prop, out, directRelation, true);
                else
                    parsePropertyValues(jsonArray, prop, out, directRelation, false);
            } catch (Exception e) {
            }
        }
    }

    private FreebaseTopic parseSearchAPIResult(JSONObject json) {
        FreebaseTopic obj = new FreebaseTopic(json.get("mid").toString());
        Object o = json.get("mid");
        if (o != null)
            obj.setId(o.toString());
        obj.setLabel(json.get("name").toString());
        obj.setScore(Double.valueOf(json.get("score").toString()));

        obj.setLanguage(json.get("lang").toString());
        return obj;
    }

    private void parsePropertyValues(JSONArray json, String property, List out, boolean directRelation, boolean skipCompound) {
        Iterator entry = json.iterator();
        Object val = null, id = null, mid = null, more_props = null;
        while (entry.hasNext()) {
            JSONObject key = (JSONObject) entry.next();
            if (skipCompound) {
                more_props = key.get("property");
                if (more_props != null)
                    parseTopicAPIResult((JSONObject) more_props, out, false);
                continue;
            }

            val = key.get("text");
            if (property.equals(FreebaseEnum.RELATION_HASDESCRIPTION.getString())
                    || property.equals(FreebaseEnum.RELATION_HASDOCUMENTTEXT.getString())) {
                Object changeVal = key.get("value");
                if (changeVal != null)
                    val = changeVal;
            }
            id = key.get("id");
            mid = key.get("mid");
            if (id == null && mid != null) id = mid;
            Attribute attr = new FreebaseAttribute(property, val.toString());
            attr.setIsDirect(directRelation);
            if (val != null && id != null) {
                attr.setValueURI(id.toString());
                out.add(attr);
            }
            else if (val != null) {
                out.add(attr);
            }
        }

    }


    //operator - any means or; all means and
    public List searchapi_getTopicsByNameAndType(String name, String operator, boolean tokenMatch, int maxResult, String... types) throws IOException {
        List query_tokens = StringUtils.splitToAlphaNumericTokens(name, true);

        Date start = new Date();
        HttpTransport httpTransport = new NetHttpTransport();
        HttpRequestFactory requestFactory = httpTransport.createRequestFactory();
        List res = new ArrayList<>();

        GenericUrl url = new GenericUrl(properties.get(FB_QUERY_API_URL_SEARCH).toString());
        url.put("query", name);
        url.put("limit", 20);
        url.put("prefixed", true);
        url.put("key", properties.get(FB_QUERY_API_KEY));

        StringBuilder filter = new StringBuilder();
        for (String t : types) {
            filter.append("type:").append(t).append(" ");
        }

        if (filter.length() > 0)
            url.put("filter", "(" + operator + " " + filter.toString().trim() + ")");

        HttpRequest request = requestFactory.buildGetRequest(url);
        HttpResponse httpResponse = interrupter.executeQuery(request, true);
        JSONObject response;
        try {
            response = (JSONObject) jsonParser.parse(httpResponse.parseAsString());
            JSONArray results = (JSONArray) response.get("result");
            int count = 0;
            for (Object result : results) {
                FreebaseTopic top = parseSearchAPIResult((JSONObject) result);

                if (count < maxResult) {
                    if (tokenMatch) {
                        List candidate_tokens = StringUtils.splitToAlphaNumericTokens(top.getLabel(), true);
                        candidate_tokens.retainAll(query_tokens);
                        if (candidate_tokens.size() > 0) {
                            res.add(top);
                            count++;
                        }
                    } else {
                        res.add(top);
                        count++;
                    }
                }

                //print or save this id
            }

        } catch (ParseException e) {
            e.printStackTrace();
        }

        LOG.debug("\tQueryFreebase (search for topics):" + (new Date().getTime() - start.getTime()));
        return res;
    }


    public List mql_topics_with_name(int maxResults, String name, String operator, String... types) throws IOException {
        Set query_tokens = new HashSet();
        for (String t : name.split("\\s+")) {
            t = t.trim();
            if (t.length() > 0)
                query_tokens.add(t);
        }

        Date start = new Date();
        HttpTransport httpTransport = new NetHttpTransport();
        HttpRequestFactory requestFactory = httpTransport.createRequestFactory();
        List res = new ArrayList();

        final Map candidates = new HashMap();
        int limit = 20;
        int iterations = maxResults % limit;
        iterations = iterations == 0 ? maxResults / limit : maxResults / limit + 1;
        String cursorPoint = "";
        for (int i = 0; i < iterations; i++) {
            String query = "[{\"mid\":null," +
                    "\"name\":null," +
                    "\"name~=\":\"" + name + "\"," +
                    "\""+FreebaseEnum.RELATION_HASTYPE+"\":[],";
            if (types.length > 0) {
                if (operator.equals("any")) {
                    query = query + "\"type|=\":[";
                    for (String t : types) {
                        query = query + "\"" + t + "\",";
                    }
                    if (query.endsWith(","))
                        query = query.substring(0, query.length() - 1).trim();
                    query = query + "],";
                } else if (operator.equals("and")) {
                    for (int n = 0; n < types.length; n++) {
                        String t = types[n];
                        if (n == 0)
                            query = query + "\"type\":\"" + t + "\",";
                        else
                            query = query + "\"and:type\":\"" + t + "\",";
                    }
                }
            }

            query = query +
                    "\"limit\":" + limit + "" +
                    "}]";

            GenericUrl url = new GenericUrl(properties.get(FB_QUERY_API_URL_MQL).toString());
            url.put("query", query);
            url.put("key", properties.get(FB_QUERY_API_KEY));
            url.put("cursor", cursorPoint);

            HttpRequest request = requestFactory.buildGetRequest(url);
            HttpResponse httpResponse = interrupter.executeQuery(request, true);
            System.out.print(limit * (i + 1));
            JSONObject response;
            try {
                response = (JSONObject) jsonParser.parse(httpResponse.parseAsString());
                cursorPoint = response.get("cursor").toString();
                JSONArray results = (JSONArray) response.get("result");

                for (Object result : results) {
                    JSONObject obj = (JSONObject) result;
                    String id = obj.get("mid").toString();
                    String e_name = obj.get("name").toString();
                    FreebaseTopic ent = new FreebaseTopic(id);
                    ent.setLabel(e_name);
                    if (obj.get(FreebaseEnum.RELATION_HASTYPE.getString()) != null) {
                        JSONArray jsonArray = (JSONArray) obj.get(FreebaseEnum.RELATION_HASTYPE.getString());
                        for (int n = 0; n < jsonArray.size(); n++) {
                            String the_type = jsonArray.get(n).toString();
                            if (!the_type.equals(FreebaseEnum.TYPE_COMMON_TOPIC.getString()) && !the_type.startsWith(FreebaseEnum.TYPE_USER.getString()))
                                ent.addType(new Clazz(the_type, the_type));
                        }
                    }
                    List bow_ent = StringUtils.toBagOfWords(e_name, true, true, false);
                    List bow_query = StringUtils.toBagOfWords(name, true, true,false);
                    int intersection = CollectionUtils.intersection(bow_ent, bow_query).size();
                    candidates.put(ent, ((double) intersection / bow_ent.size() + (double) intersection / bow_query.size()) / 2.0);
                    //print or save this id
                }

                if (results.size() < limit) {
                    break;
                }
            } catch (ParseException e) {
                e.printStackTrace();
            }

        }

        LOG.debug("\tQueryFreebase:" + (new Date().getTime() - start.getTime()));
        res.addAll(candidates.keySet());
        Collections.sort(res, (o1, o2) -> candidates.get(o2).compareTo(candidates.get(o1)));
        return res;
    }

    public List mqlapi_topic_mids_with_wikipedia_pageid(String wikipedia_pageid) throws IOException {
        Date start = new Date();
        httpTransport = new NetHttpTransport();
        requestFactory = httpTransport.createRequestFactory();
        List res = new ArrayList();

        String query = "[{\"mid\":null," +
                "\"id\":\"/wikipedia/en_id/" + wikipedia_pageid + "\"" +
                "}]";

        GenericUrl url = new GenericUrl(properties.get(FB_QUERY_API_URL_MQL).toString());
        url.put("query", query);
        url.put("key", properties.get(FB_QUERY_API_KEY));

        HttpRequest request = requestFactory.buildGetRequest(url);
        HttpResponse httpResponse = interrupter.executeQuery(request, true);
        JSONObject response;
        try {
            response = (JSONObject) jsonParser.parse(httpResponse.parseAsString());
            JSONArray results = (JSONArray) response.get("result");

            for (Object result : results) {
                JSONObject obj = (JSONObject) result;
                String id = obj.get("mid").toString();
                res.add(id);
                //print or save this id
            }
        } catch (ParseException e) {
            e.printStackTrace();
        }

        LOG.debug("\tQueryFreebase:" + (new Date().getTime() - start.getTime()));

        return res;
    }

    //given a type search for any topics of that type and return their ids
    public List mqlapi_topic_mids_with_name(String name, int maxResults) throws IOException {
        Date start = new Date();
        httpTransport = new NetHttpTransport();
        requestFactory = httpTransport.createRequestFactory();
        List res = new ArrayList();

        int limit = Integer.valueOf(properties.get(FB_QUERY_PARAM_LIMIT).toString());
        int iterations = maxResults % limit;
        iterations = iterations == 0 ? maxResults / limit : maxResults / limit + 1;
        String cursorPoint = "";
        for (int i = 0; i < iterations; i++) {
            String query = "[{\"mid\":null," +
                    "\"name\":\"" + name + "\"," +
                    "\"limit\":" + limit + "" +
                    "}]";

            GenericUrl url = new GenericUrl(properties.get(FB_QUERY_API_URL_MQL).toString());
            url.put("query", query);
            url.put("key", properties.get(FB_QUERY_API_KEY));
            url.put("cursor", cursorPoint);

            HttpRequest request = requestFactory.buildGetRequest(url);
            HttpResponse httpResponse = interrupter.executeQuery(request, true);
            System.out.println(limit * (i + 1));
            JSONObject response;
            try {
                response = (JSONObject) jsonParser.parse(httpResponse.parseAsString());
                cursorPoint = response.get("cursor").toString();
                JSONArray results = (JSONArray) response.get("result");

                for (Object result : results) {
                    JSONObject obj = (JSONObject) result;
                    String id = obj.get("mid").toString();
                    res.add(id);

                    //print or save this id
                }

                if (results.size() < limit) {
                    break;
                }

            } catch (ParseException e) {
                e.printStackTrace();
            }
        }
        LOG.debug("\tQueryFreebase:" + (new Date().getTime() - start.getTime()));

        return res;
    }

    public List mqlapi_instances_of_type(String name, int maxResults) throws IOException {
        Date start = new Date();
        httpTransport = new NetHttpTransport();
        requestFactory = httpTransport.createRequestFactory();
        List res = new ArrayList();

        int limit = Integer.valueOf(properties.get(FB_QUERY_PARAM_LIMIT).toString());
        int iterations = maxResults % limit;
        iterations = iterations == 0 ? maxResults / limit : maxResults / limit + 1;
        String cursorPoint = "";
        for (int i = 0; i < iterations; i++) {
            String query = "[{\"name\":null," +
                    "\"type\":\"" + name + "\"," +
                    "\"limit\":" + limit + "" +
                    "}]";

            GenericUrl url = new GenericUrl(properties.get(FB_QUERY_API_URL_MQL).toString());
            url.put("query", query);
            url.put("key", properties.get(FB_QUERY_API_KEY));
            url.put("cursor", cursorPoint);

            HttpRequest request = requestFactory.buildGetRequest(url);
            HttpResponse httpResponse = interrupter.executeQuery(request, true);
            System.out.println(limit * (i + 1));
            JSONObject response;
            try {
                response = (JSONObject) jsonParser.parse(httpResponse.parseAsString());
                cursorPoint = response.get("cursor").toString();
                JSONArray results = (JSONArray) response.get("result");

                for (Object result : results) {
                    JSONObject obj = (JSONObject) result;
                    String id = obj.get("name").toString();
                    res.add(id);

                    //print or save this id
                }

                if (results.size() < limit) {
                    break;
                }

            } catch (ParseException e) {
                e.printStackTrace();
            }
        }
        LOG.debug("\tQueryFreebase:" + (new Date().getTime() - start.getTime()));

        return res;
    }

    /*public static void main(String[] args) throws IOException {
        FreebaseQueryProxy helper = new FreebaseQueryProxy("D:\\Work\\lodiedata\\tableminer_gs/freebase.properties");
        List artist= helper.mqlapi_instances_of_type("/music/artist",10000);
        System.out.println(artist);
    }*/


    public double find_granularityForType(String type) throws IOException {
        if(type.startsWith("/m/")) //if the type id starts with "/m/" in strict sense it is a topic representing a concept
        //but is not listed as a type in freebase
            return 1.0;
        String url = properties.get(FB_HOMEPAGE).toString() +type+"?instances=";
        Date startTime = new Date();
        URL connection = new URL(url);
        BufferedReader in = new BufferedReader(
                new InputStreamReader(connection.openStream()));

        String result=null;
        String inputLine;
        while ((inputLine = in.readLine()) != null) {
            int start = inputLine.indexOf("span data-value=");
            if(start!=-1) {
                start+=16;
                int end = inputLine.indexOf(" ",16);
                if(start0)
            return new Double(result);
        return 0.0;
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy