All Downloads are FREE. Search and download functionalities are using the official Maven repository.

uk.ac.shef.dcs.kbsearch.freebase.FreebaseSearch Maven / Gradle / Ivy

The newest version!
package uk.ac.shef.dcs.kbsearch.freebase;

import com.google.api.client.http.HttpResponseException;
import org.apache.commons.lang3.StringEscapeUtils;
import org.apache.log4j.Logger;
import org.apache.solr.client.solrj.embedded.EmbeddedSolrServer;
import uk.ac.shef.dcs.kbsearch.KBSearch;
import uk.ac.shef.dcs.kbsearch.KBSearchException;
import uk.ac.shef.dcs.kbsearch.model.Attribute;
import uk.ac.shef.dcs.kbsearch.model.Clazz;
import uk.ac.shef.dcs.kbsearch.model.Entity;
import uk.ac.shef.dcs.util.SolrCache;
import uk.ac.shef.dcs.util.StringUtils;

import java.io.IOException;
import java.util.*;


/**
 */
public class FreebaseSearch extends KBSearch {

    private static final Logger LOG = Logger.getLogger(FreebaseSearch.class.getName());
    private static final boolean AUTO_COMMIT = true;

    //two propperties for debugging purposes.In practice both should be false. set to true
    //if you want to deliberately trigger calls to FB apis
    private static final boolean ALWAYS_CALL_REMOTE_SEARCHAPI = false;
    private static final boolean ALWAYS_CALL_REMOTE_TOPICAPI = false;
    private FreebaseQueryProxy searcher;

    public FreebaseSearch(Properties properties, Boolean fuzzyKeywords,
                          EmbeddedSolrServer cacheEntity, EmbeddedSolrServer cacheConcept,
                          EmbeddedSolrServer cacheProperty, EmbeddedSolrServer cacheSimilarity) throws IOException {
        super(fuzzyKeywords, cacheEntity, cacheConcept, cacheProperty,cacheSimilarity);
        searcher = new FreebaseQueryProxy(properties);
        otherCache = new HashMap<>();
        resultFilter = new FreebaseSearchResultFilter(properties.getProperty(KB_SEARCH_RESULT_STOPLIST));
    }

    @Override
    public List findEntityCandidates(String content) throws KBSearchException {
        return find_matchingEntitiesForTextAndType(content);
    }

    @Override
    public List findEntityCandidatesOfTypes(String content, String... types) throws KBSearchException {
        return find_matchingEntitiesForTextAndType(content, types);
    }

    @Override
    public List findAttributesOfEntities(Entity ec) throws KBSearchException {
        return find_attributes(ec.getId(), cacheEntity);
    }

    @Override
    public List findAttributesOfProperty(String propertyId) throws KBSearchException {
        return find_attributes(propertyId, cacheProperty);
    }


    private List find_matchingEntitiesForTextAndType(String text, String... types) throws KBSearchException {
        String query = createSolrCacheQuery_findResources(text);
        ;
        boolean forceQuery = false;

        text = StringEscapeUtils.unescapeXml(text);
        int bracket = text.indexOf("(");
        if (bracket != -1) {
            text = text.substring(0, bracket).trim();
        }
        if (StringUtils.toAlphaNumericWhitechar(text).trim().length() == 0)
            return new ArrayList<>();
        if (ALWAYS_CALL_REMOTE_SEARCHAPI)
            forceQuery = true;



        List result = null;
        if (!forceQuery) {
            try {
                result = (List) cacheEntity.retrieve(query);
                if (result != null)
                    LOG.debug("QUERY (entities, cache load)=" + query + "|" + query);
            } catch (Exception e) {
            }
        }
        if (result == null) {
            result = new ArrayList<>();
            try {
                //firstly fetch candidate freebase topics. pass 'true' to only keep candidates whose name overlap with the query term
                List topics = searcher.searchapi_getTopicsByNameAndType(text, "any", true, 20); //search api does not retrieve complete types, find types for them
                LOG.debug("(FB QUERY =" +topics.size()+" results)");
                for (FreebaseTopic ec : topics) {
                    //Next get attributes for each topic
                    List attributes = findAttributesOfEntities(ec);
                    ec.setAttributes(attributes);
                    for (Attribute attr : attributes) {
                        if (attr.getRelationURI().equals(FreebaseEnum.RELATION_HASTYPE.getString()) &&
                                attr.isDirect() &&
                                !ec.hasType(attr.getValueURI())) {
                            ec.addType(new Clazz(attr.getValueURI(), attr.getValue()));
                        }
                    }
                }

                if (topics.size() == 0 && fuzzyKeywords) { //does the query has conjunection word? if so, we may need to try again with split queries
                    String[] queries = text.split("\\band\\b");
                    if (queries.length < 2) {
                        queries = text.split("\\bor\\b");
                        if (queries.length < 2) {
                            queries = text.split("/");
                            if (queries.length < 2) {
                                queries = text.split(",");
                            }
                        }
                    }
                    if (queries.length > 1) {
                        for (String q : queries) {
                            q = q.trim();
                            if (q.length() < 1) continue;
                            result.addAll(find_matchingEntitiesForTextAndType(q, types));
                        }
                    }
                }

                result.addAll(topics);
                cacheEntity.cache(query, result, AUTO_COMMIT);
                LOG.debug("QUERY (entities, cache save)=" + query + "|" + query);
            } catch (Exception e) {
                throw new KBSearchException(e);
            }
        }

        if (types.length > 0) {
            Iterator it = result.iterator();
            while (it.hasNext()) {
                Entity ec = it.next();
                boolean typeSatisfied = false;
                for (String t : types) {
                    if (ec.hasType(t)) {
                        typeSatisfied = true;
                        break;
                    }
                }
                if (!typeSatisfied)
                    it.remove();
            }
        }

        //filter entity's clazz, and attributes
        String id = "|";
        for (Entity ec : result) {
            id = id + ec.getId() + ",";
            //ec.setTypes(FreebaseSearchResultFilter.filterClazz(ec.getTypes()));
            List filteredTypes = getResultFilter().filterClazz(ec.getTypes());
            ec.clearTypes();
            for (Clazz ft : filteredTypes)
                ec.addType(ft);
        }

        return result;
    }


    /*
    In FB, getting the attributes of a class is different from that for entities and properties, we need to implement it differently
    and cannot use find_attributes method
     */
    @Override
    public List findAttributesOfClazz(String clazz) throws KBSearchException {
        //return find_triplesForEntity(conceptId);
        boolean forceQuery = false;
        if (ALWAYS_CALL_REMOTE_TOPICAPI)
            forceQuery = true;
        List attributes = new ArrayList<>();
        String query = createSolrCacheQuery_findAttributesOfResource(clazz);
        if (query.length() == 0) return attributes;

        try {
            attributes = (List) cacheConcept.retrieve(query);
            if (attributes != null)
                LOG.debug("QUERY (attributes of clazz, cache load)=" + query + "|" + query);
        } catch (Exception e) {
        }

        if (attributes == null || forceQuery) {
            try {
                attributes = new ArrayList<>();
                List retrievedAttributes = searcher.topicapi_getAttributesOfTopic(clazz);
                //check firstly, is this a concept?
                boolean isConcept = false;
                for (Attribute f : retrievedAttributes) {
                    if (f.getRelationURI().equals(FreebaseEnum.RELATION_HASTYPE.getString())
                            && f.getValueURI() != null && f.getValueURI().equals(FreebaseEnum.TYPE_TYPE.getString())) {
                        isConcept = true;
                        break;
                    }
                }
                if (!isConcept) {
                    try {
                        cacheConcept.cache(query, attributes, AUTO_COMMIT);
                        LOG.debug("QUERY (attributes of clazz, cache save)=" + query + "|" + query);
                    } catch (Exception e) {
                        e.printStackTrace();
                    }
                    return attributes;
                }

                //ok, this is a concept. We need to deep-fetch its properties, and find out the range of their properties
                for (Attribute f : retrievedAttributes) {
                    if (f.getRelationURI().equals(FreebaseEnum.TYPE_PROPERTYOFTYPE.getString())) { //this is a property of a concept, we need to process it further
                        String propertyId = f.getValueURI();
                        if (propertyId == null) continue;

                        List attrOfProperty = findAttributesOfProperty(propertyId);
                        for (Attribute t : attrOfProperty) {
                            if (t.getRelationURI().equals(FreebaseEnum.RELATION_RANGEOFPROPERTY.getString())) {
                                String rangeLabel = t.getValue();
                                String rangeURL = t.getValueURI();
                                Attribute attr = new FreebaseAttribute(f.getValueURI(), rangeLabel);
                                attr.setValueURI(rangeURL);
                                attr.setIsDirect(true);
                                //attributes.add(new String[]{f[2], rangeLabel, rangeURL, "n"});
                            }
                        }
                    } else {
                        attributes.add(f);
                    }
                }

                cacheConcept.cache(query, attributes, AUTO_COMMIT);
                LOG.debug("QUERY (attributes of clazz, cache save)=" + query + "|" + query);
            } catch (Exception e) {
                throw new KBSearchException(e);
            }
        }

        //filtering
        attributes=getResultFilter().filterAttribute(attributes);
        return attributes;
    }

    @Override
    public double findGranularityOfClazz(String clazz) throws KBSearchException {
        /*if(clazz.equals("/location/citytown"))
            System.out.println();*/
        String query = createSolrCacheQuery_findGranularityOfClazz(clazz);
        Double result = null;
        try {
            Object o = cacheConcept.retrieve(query);
            if (o != null) {
                LOG.debug("QUERY (granularity of clazz, cache load)=" + query + "|" + clazz);
                return (Double) o;
            }
        } catch (Exception e) {
        }

        if (result == null) {
            try {
                double granularity = searcher.find_granularityForType(clazz);
                result = granularity;
                try {
                    cacheConcept.cache(query, result, AUTO_COMMIT);
                    LOG.debug("QUERY (granularity of clazz, cache save)=" + query + "|" + clazz);
                } catch (Exception e) {
                    LOG.error("FAILED:" + clazz);
                    e.printStackTrace();
                }
            } catch (IOException ioe) {
                LOG.error("ERROR(Instances of Type): Unable to fetch freebase page of instances of type: " + clazz);
            }
        }
        if (result == null)
            return -1.0;
        return result;
    }


    public double findEntityClazzSimilarity(String id1, String clazz_url) {
        String query = createSolrCacheQuery_findEntityClazzSimilarity(id1, clazz_url);
        Object result = null;
        try {
            result = cacheSimilarity.retrieve(query);
            if (result != null)
                LOG.debug("QUERY (entity-clazz similarity, cache load)=" + query + "|" + query);
        } catch (Exception e) {
        }
        if (result == null)
            return -1.0;
        return (Double) result;
    }

    public void cacheEntityClazzSimilarity(String id1, String clazz_url, double score, boolean biDirectional,
                                           boolean commit) {
        String query = createSolrCacheQuery_findEntityClazzSimilarity(id1, clazz_url);
        try {
            cacheSimilarity.cache(query, score, commit);
            LOG.debug("QUERY (entity-clazz similarity, cache saving)=" + query + "|" + query);
            if (biDirectional) {
                query = clazz_url + "<>" + id1;
                cacheSimilarity.cache(query, score, commit);
                LOG.debug("QUERY (entity-clazz similarity, cache saving)=" + query + "|" + query);
            }
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    private List find_attributes(String id, SolrCache cache) throws KBSearchException {
        if (id.length() == 0)
            return new ArrayList<>();
        boolean forceQuery = false;
        if (ALWAYS_CALL_REMOTE_TOPICAPI)
            forceQuery = true;

        String query = createSolrCacheQuery_findAttributesOfResource(id);
        List result = null;
        try {
            result = (List) cache.retrieve(query);
            if (result != null)
                LOG.debug("QUERY (attributes of id, cache load)=" + query + "|" + query);
        } catch (Exception e) {
        }
        if (result == null || forceQuery) {
            List attributes;
            try {
                attributes = searcher.topicapi_getAttributesOfTopic(id);
            } catch (Exception e) {
                if (e instanceof HttpResponseException && donotRepeatQuery((HttpResponseException) e))
                    attributes = new ArrayList<>();
                else
                    throw new KBSearchException(e);
            }
            result = new ArrayList<>();
            result.addAll(attributes);
            try {
                cache.cache(query, result, AUTO_COMMIT);
                LOG.debug("QUERY (attributes of id, cache save)=" + query + "|" + query);
            } catch (Exception e) {
                e.printStackTrace();
            }
        }

        //filtering
        result = getResultFilter().filterAttribute(result);
        return result;
    }

    public void commitChanges() throws KBSearchException {
        try {
            cacheConcept.commit();
            cacheEntity.commit();
            cacheProperty.commit();
            for (SolrCache cache : otherCache.values())
                cache.commit();
        } catch (Exception e) {
            throw new KBSearchException(e);
        }
    }

    private boolean donotRepeatQuery(HttpResponseException e) {
        String message = e.getContent();
        if (message.contains("\"reason\": \"notFound\""))
            return true;
        return false;
    }


    @Override
    public void closeConnection() throws KBSearchException {
        try {
            if (cacheEntity != null)
                cacheEntity.shutdown();
            if (cacheConcept != null)
                cacheConcept.shutdown();
            if (cacheProperty != null)
                cacheProperty.shutdown();
        } catch (Exception e) {
            throw new KBSearchException(e);
        }
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy