![JAR search and dependency download from the Maven repository](/logo.png)
it.unipi.di.acube.searchapi.callers.BingSearchApiCaller Maven / Gradle / Ivy
package it.unipi.di.acube.searchapi.callers;
import java.lang.invoke.MethodHandles;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.Date;
import java.util.List;
import java.util.Vector;
import javax.xml.bind.DatatypeConverter;
import org.apache.commons.io.IOUtils;
import org.apache.http.HttpResponse;
import org.apache.http.client.HttpClient;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.utils.URIBuilder;
import org.apache.http.impl.client.HttpClientBuilder;
import org.codehaus.jettison.json.JSONArray;
import org.codehaus.jettison.json.JSONException;
import org.codehaus.jettison.json.JSONObject;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import it.unipi.di.acube.searchapi.WebsearchApi;
import it.unipi.di.acube.searchapi.interfaces.WebSearchApiCaller;
import it.unipi.di.acube.searchapi.model.WebsearchResponse;
import it.unipi.di.acube.searchapi.model.WebsearchResponseEntry;
/**
* Interface to the Bing Search API.
*
* @author Marco Cornolti
*
*/
public class BingSearchApiCaller implements WebSearchApiCaller {
private final static Logger LOG = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
private static final String API_PROTOCOL = "https";
private static final String API_HOST = "api.cognitive.microsoft.com";
private static final String API_PATH = "/bing/v7.0/search";
private static final String DEFAULT_MARKET = "en-US";
private static final SafeSearchOpt DEFAULT_SAFE_SEARCH = SafeSearchOpt.OFF;
private static final int MAX_RESULTS_PER_QUERY = 50;
private static final boolean TEXT_DECORATIONS = true;
private static URIBuilder builder = new URIBuilder();
private String bingKey;
private String market = DEFAULT_MARKET;
private SafeSearchOpt safeSearch = DEFAULT_SAFE_SEARCH;
public enum SafeSearchOpt {
OFF, MODERATE, STRICT
}
/**
* @param bingKey
* the key that will be used to access the Bing API.
*/
public BingSearchApiCaller(String bingKey) {
this.bingKey = bingKey;
}
/**
* @param market
* the market, e.g. "en-US" (see Bing API reference).
* @return this.
*/
public BingSearchApiCaller setMarket(String market) {
if (!market.matches("[a-z][a-z]-[A-Z][A-Z]"))
throw new IllegalArgumentException("Market must be in the form en-US");
this.market = market;
return this;
}
/**
* @param safeSearch
* the safeSearch parameter (see Bing API reference).
* @return this.
*/
public BingSearchApiCaller setSafeSearch(SafeSearchOpt safeSearch) {
this.safeSearch = safeSearch;
return this;
}
private static String safeSearchToString(SafeSearchOpt opt) {
switch (opt) {
case OFF:
return "Off";
case MODERATE:
return "Moderate";
default:
return "Strict";
}
}
@Override
public JSONObject query(String query, int resultsSoFar) throws Exception {
URI uri = getQueryURI(query, resultsSoFar);
HttpGet get = new HttpGet(uri);
get.setHeader("Accept", "*/*");
get.setHeader("Content-Type", "multipart/form-data");
get.setHeader("Ocp-Apim-Subscription-Key", bingKey);
HttpClient httpClient = HttpClientBuilder.create().build();
LOG.info(" {}", uri.toString());
HttpResponse response = httpClient.execute(get);
if (response.getStatusLine().getStatusCode() != 200) {
LOG.error("Got HTTP error {}. Message is: {}", response.getStatusLine().getStatusCode(),
IOUtils.toString(response.getEntity().getContent(), "utf-8"));
throw new RuntimeException("Got response code:" + response.getStatusLine().getStatusCode());
}
return new JSONObject(IOUtils.toString(response.getEntity().getContent(), "utf-8"));
}
@Override
public URI getQueryURI(String query, int resultsSoFar) throws URISyntaxException {
URI uri = builder.clearParameters().setScheme(API_PROTOCOL).setHost(API_HOST).setPath(API_PATH).addParameter("q", query)
.addParameter("count", Integer.toString(MAX_RESULTS_PER_QUERY)).addParameter("mkt", market)
.addParameter("safeSearch", safeSearchToString(safeSearch))
.addParameter("textDecorations", Boolean.toString(TEXT_DECORATIONS))
.addParameter("offset", Integer.toString(resultsSoFar)).build();
return uri;
}
@Override
public int countResults(JSONObject bingResponse) throws JSONException {
if (!bingResponse.has("webPages"))
return 0;
return bingResponse.getJSONObject("webPages").getJSONArray("value").length();
}
@Override
public boolean recacheNeeded(List bingResponses) throws JSONException {
for (JSONObject bingResponse : bingResponses) {
if (bingResponse == null)
return true;
String type = bingResponse.getString("_type");
if (type == null)
return true;
}
return false;
}
@Override
public WebsearchResponse buildResponseFromJson(List calledUris, List bingResponses, int neededResults)
throws JSONException {
long totalEstimatedMatches = bingResponses.get(0).has("webPages")
? bingResponses.get(0).getJSONObject("webPages").getLong("totalEstimatedMatches") : 0;
List webEntries = new Vector<>();
for (JSONObject bingResponse : bingResponses)
if (bingResponse.has("webPages")) {
List webEntriesI = getWebEntries(
bingResponse.getJSONObject("webPages").getJSONArray("value"));
for (WebsearchResponseEntry entry : webEntriesI) {
if (neededResults == 0)
break;
webEntries.add(entry);
neededResults--;
}
if (neededResults == 0)
break;
}
return new WebsearchResponse(totalEstimatedMatches, webEntries, calledUris, bingResponses);
}
private static List getWebEntries(JSONArray value) throws JSONException {
Vector webEntries = new Vector<>();
for (int i = 0; i < value.length(); i++) {
JSONObject entry = value.getJSONObject(i);
Date lastCrawled = entry.has("dateLastCrawled")
? DatatypeConverter.parseDateTime(entry.getString("dateLastCrawled")).getTime() : null;
webEntries.add(new WebsearchResponseEntry(entry.getString("name"), entry.getString("displayUrl")
.replaceAll(WebsearchApi.SNIPPET_BOLD_END_STR, "").replaceAll(WebsearchApi.SNIPPET_BOLD_START_STR, ""),
entry.getString("snippet"), lastCrawled));
}
return webEntries;
}
@Override
public boolean queryComplete(List jsonResponses, int neededResults) throws JSONException {
int count = 0;
for (JSONObject jsonResponse : jsonResponses) {
count += countResults(jsonResponse);
if (count >= neededResults)
return true;
if (countResults(jsonResponse) == 0)
return true;
if (jsonResponse.getJSONObject("rankingResponse").getJSONObject("mainline").getJSONArray("items")
.length() < MAX_RESULTS_PER_QUERY)
return true;
}
return false;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy