com.netbase.insightapi.clientlib.InsightAPIQuery Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of netbase4j Show documentation
Show all versions of netbase4j Show documentation
Java Library facilitating making queries to NetBase social media system
The newest version!
package com.netbase.insightapi.clientlib;
import java.io.IOException;
import java.net.URLDecoder;
import java.net.URLEncoder;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.TimeZone;
import java.util.TreeMap;
import java.util.concurrent.atomic.AtomicLong;
import javax.servlet.http.HttpServletResponse;
import org.apache.http.Header;
import org.apache.http.HttpHost;
import org.apache.http.HttpResponse;
import org.apache.http.HttpVersion;
import org.apache.http.client.HttpRequestRetryHandler;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.conn.params.ConnRoutePNames;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.protocol.HttpContext;
import org.apache.http.util.EntityUtils;
import org.json.simple.JSONValue;
/*
Copyright 2014, NetBase Solutions, Inc.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
/**
* The InsightAPIQuery class represents the operation name (e.g.
* "insightCount"), parameters and result state for a single query. It contains
* support methods for creating and manipulating the URL-encoded arguments, and
* leverages Apache's HTTPComponents library to make the requests.
*
* A typical use is to create an InsightAPIQuery with one of the operation
* names, set some parameters, and run the query from either the run() or
* start() method of UserChannel.
*/
public class InsightAPIQuery
{
protected static final String PUBLISHED_DATE = "publishedDate";
protected static final String PUBLISHED_TIMESTAMP = "timestamp";
protected static final String ALERT_DATE = "alertDatetime";
protected static final String ALERT_TIMESTAMP = "alertTimestamp";
protected static final String REALTIME = "realTime";
protected static final String URL_ARG_ENCODING_CHARSET = "UTF-8";
/*
* The following InsightAPI operations do not affect the rate limit, and may
* be run at any time. All others will be assumed to be controlled by either
* the general or realtime rate limit, depending on the "realTime"
* parameter.
*/
protected static final List rateLimitFreebies;
static {
rateLimitFreebies = new ArrayList();
rateLimitFreebies.add("helloWorld");
rateLimitFreebies.add("topics");
rateLimitFreebies.add("themes");
}
// timespan of data accessible with the realTime=true parameter
protected static final long ALERT_DATE_REALTIME_LIMIT = 24L * 60L * 60L * 1000L;
// 2012-01-01 00:00:00.000 GMT
protected static final long NETBASE_EPOCH_TIMESTAMP = 1325376000000L;
// this is not static because DateFormat objects are not thread safe;
// we create one per query just to avoid having to synchronize on a
// single copy
protected SimpleDateFormat dateFormat;
// serial numbers for the InsightAPI queries are for convenience in
// logging. The API will run several of them at a time, if the user
// wishes and the rate limit allows. Without serial numbers in the
// log messages, connecting requests and results (and errors) can
// be difficult.
protected static AtomicLong masterSerial = new AtomicLong(1000L);
protected long serial = masterSerial.incrementAndGet();
// URL and its parameters
protected String urlBase;
protected Map> parameters = new TreeMap>();
protected String op;
protected boolean mustRespectRateLimit = true;
// settings for proxy connectivity (default: none)
protected String proxyServer;
protected int proxyPort = 3128;
protected String proxyScheme = "http";
// timeout and retry settings
protected int timeout = 30000;
protected int maxRetries = 3;
// query result
protected String responseContent;
protected Object parsedContent;
protected Map headers = new TreeMap();
protected int statusCode = -1;
protected String statusString;
protected long elapsedMs;
protected boolean debug;
/**
* Create a new InsightAPI Query with the specified operation.
*
* @param op
*/
public InsightAPIQuery (String op)
{
this();
setOp(op);
}
/**
* Create a new InsightAPI Query with the default ("helloWorld") operation.
*/
public InsightAPIQuery ()
{
setOp("helloWorld");
// NB: The InsightAPI accepts several different datetime formats,
// but this is one of them. We use it here only for composing
// time arguments given java Dates and long times.
dateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS");
dateFormat.setTimeZone(TimeZone.getTimeZone("GMT"));
}
/**
* Clone an InsightAPI query, including all its control parameters, but not
* its result.
*
* @param other
*/
public InsightAPIQuery (InsightAPIQuery other)
{
this(other.op);
for (Map.Entry> ent : other.parameters.entrySet()) {
for (String value : ent.getValue())
addParameter(ent.getKey(), value);
}
urlBase = other.urlBase;
proxyServer = other.proxyServer;
proxyPort = other.proxyPort;
proxyScheme = other.proxyScheme;
timeout = other.timeout;
maxRetries = other.maxRetries;
debug = other.debug;
}
/**
* Set the operation (eg "helloWorld" or "retrieveDocuments") that this
* InsightAPI query will perform
*
* @param op
*/
public void setOp (String op)
{
this.op = op;
mustRespectRateLimit = !rateLimitFreebies.contains(op);
}
// =========================================================================
// Date range setting methods. These facilitate setting these ranges, by
// providing the formatting and other assumptions that the API requires, and
// sets the "realTime" property in conjunction with them.
//
// If the user provides either java.util.Dates or long time values, we
// format these as string arguments, because the full millisecond range is
// supported in the NetBase index. If the user provides Integer timestamp
// values, we give these to the server as-is. These have 1/10 second
// resolution, but probably came from the server, and it makes sense to
// return them in the same scale.
// =========================================================================
/**
* Sets the query to be restricted to a specific published date range. The
* operation removes any existing published date range. If d1 is null,
* nothing else is done. If d2 is null, the endpoint of the range is not
* set.
*
* @param d1
* @param d2
*/
public void setPublishedDateRange (java.util.Date d1, java.util.Date d2)
{
removeParameters(PUBLISHED_DATE);
if (d1 == null)
return;
setParameter(PUBLISHED_DATE, d1);
if (d2 != null)
addParameter(PUBLISHED_DATE, d2);
setRealtime(d1);
}
/**
* Sets the query to be restricted to a specific published date range. The
* operation removes any existing published date range. If d1 is
* Long.MIN_VALUE, nothing else is done. If d2 is Long.MAX_VALUE, the
* endpoint of the range is not set.
*
* @param d1
* @param d2
*/
public void setPublishedDateRange (long d1, long d2)
{
setPublishedDateRange(d1 == Long.MIN_VALUE ? null
: new java.util.Date(d1), d2 == Long.MAX_VALUE ? null
: new java.util.Date(d2));
}
/**
* Sets the query to be restricted to a specific published date range. The
* operation removes any existing published date range. If d1 is
* Integer.MIN_VALUE, nothing else is done. If d2 is Integer.MAX_VALUE, the
* endpoint of the range is not set.
*
* @param d1
* @param d2
*/
public void setPublishedTimestampRange (int d1, int d2)
{
removeParameters(PUBLISHED_TIMESTAMP);
if (d1 == Integer.MIN_VALUE)
return;
setParameter(PUBLISHED_TIMESTAMP, d1);
if (d2 != Integer.MAX_VALUE)
addParameter(PUBLISHED_TIMESTAMP, d2);
setRealtime(new java.util.Date(timestampToLongTime(d1)));
}
/**
* Sets the query to be restricted to a specific alert date range. The
* operation removes any existing alert date range. If d1 is null, nothing
* else is done. If d2 is null, the endpoint of the range is not set.
*
* @param d1
* @param d2
*/
public void setAlertDateRange (java.util.Date d1, java.util.Date d2)
{
removeParameters(ALERT_DATE);
if (d1 == null)
return;
setParameter(ALERT_DATE, d1);
if (d2 != null)
addParameter(ALERT_DATE, d2);
setRealtime(d1);
}
/**
* Sets the query to be restricted to a specific alert date range. The
* operation removes any existing alert date range. If d1 is Long.MIN_VALUE,
* nothing else is done. If d2 is Long.MAX_VALUE, the endpoint of the range
* is not set.
*
* @param d1
* @param d2
*/
public void setAlertDateRange (long d1, long d2)
{
setAlertDateRange(d1 == Long.MIN_VALUE ? null : new java.util.Date(d1),
d2 == Long.MAX_VALUE ? null : new java.util.Date(d2));
}
/**
* Sets the query to be restricted to a specific alert date range. The
* operation removes any existing alert date range. If d1 is
* Integer.MIN_VALUE, nothing else is done. If d2 is Integer.MAX_VALUE, the
* endpoint of the range is not set.
*
* @param d1
* @param d2
*/
public void setAlertTimestampRange (int d1, int d2)
{
removeParameters(ALERT_TIMESTAMP);
if (d1 == Integer.MIN_VALUE)
return;
setParameter(ALERT_TIMESTAMP, d1);
if (d2 != Integer.MAX_VALUE)
addParameter(ALERT_TIMESTAMP, d2);
setRealtime(new java.util.Date(timestampToLongTime(d1)));
}
/**
* Sets the query's "realTime" parameter depending on the value of the
* argument, which is assumed to be the earliest time the query will
* reference.
*
* @param d
*/
public void setRealtime (java.util.Date d)
{
boolean realtime = System.currentTimeMillis() - d.getTime() < ALERT_DATE_REALTIME_LIMIT;
setRealtime(realtime);
}
/**
* Sets the query's "realTime" parameter depending on the value of the
* argument.
*
* @param d
*/
public void setRealtime (boolean realtime)
{
setParameter(REALTIME, realtime);
}
/**
* Returns true if the query has a "realTime" parameter, and its value is
* "true".
*
* @return
*/
public boolean isRealtime ()
{
List realtime = getParameters(REALTIME);
if (realtime == null || realtime.size() == 0)
return (false);
return (realtime.get(0).equalsIgnoreCase("true"));
}
/**
* Utility routine that converts a NetBase timestamp value into a java time
* (milliseconds since the java epoch). Preserves MIN_VALUE and MAX_VALUE
* conventions.
*
* @param timestamp
* @return
*/
public static long timestampToLongTime (int timestamp)
{
switch (timestamp) {
case Integer.MIN_VALUE:
return (Long.MIN_VALUE);
case Integer.MAX_VALUE:
return (Long.MAX_VALUE);
default:
return (((long)timestamp) * 100 + NETBASE_EPOCH_TIMESTAMP);
}
}
/**
* Utility routine that converts a java time to a NetBase timestamp.
* Preserves MIN_VALUE and MAX_VALUE conventions.
*
* @param time
* @return
*/
public static int longTimeToTimestamp (long time)
{
if (time == Long.MIN_VALUE)
return (Integer.MIN_VALUE);
if (time == Long.MAX_VALUE)
return (Integer.MAX_VALUE);
return ((int)((time - NETBASE_EPOCH_TIMESTAMP) / 100L));
}
/**
* Returns true if the query is complete. An InsightAPIQuery is not complete
* unless it's finished, and it did not return a rate limit exceeded
* complaint. This is distinct from "succeeded", which requires that
* statusCode == SC_OK (200).
*/
public boolean isComplete ()
{
return (statusCode != -1 && (statusCode != HttpServletResponse.SC_FORBIDDEN || !responseContent.toLowerCase()
.contains("rate limit")));
}
/**
* Blocks the current thread until this Query is complete.
*
* @throws InterruptedException
*/
public void waitForCompletion ()
throws InterruptedException
{
while (!isComplete()) {
synchronized (this) {
this.wait();
}
}
}
/**
* May be overridden to use a different JSON Parser than the one included in
* this library.
*/
public void parseResponseContent ()
{
parsedContent = JSONValue.parse(responseContent);
}
// ============================
// Methods to manage parameters
// ============================
/**
* Add a parameter, by name, to the query. If the parameter already exists,
* an additional sequential value will be appended to the query. The value
* of the parameter is converted to string form in this method.
*
* @param name
* @param value
*/
public void addParameter (String name, Object value)
{
// null values are ignored.
if (value != null) {
List values = parameters.get(name);
if (values == null) {
values = new ArrayList();
parameters.put(name, values);
}
values.add(parameterToString(value));
}
}
/**
* Convert a parameter from an Object to a string. This method may be
* overridden to add more specialized encoding of parameters.
*
* The default implementation is not trivial; it provides proper conversion
* for java.util.Date objects. User code that overrides this method should
* continue to call it for date classes.
*
* @param param
* @return
*/
public String parameterToString (Object param)
{
if (param instanceof java.util.Date)
return (dateFormat.format(param));
else
return (param.toString());
}
/**
* Remove all values of this parameter, and then add this one.
*
* @param name
* @param value
*/
public void setParameter (String name, Object value)
{
removeParameters(name);
addParameter(name, value);
}
public void addParameter (String name, long value)
{
addParameter(name, Long.valueOf(value));
}
public void setParameter (String name, long value)
{
removeParameters(name);
addParameter(name, value);
}
public void addParameter (String name, int value)
{
addParameter(name, Integer.valueOf(value));
}
public void setParameter (String name, String value)
{
removeParameters(name);
addParameter(name, value);
}
public void addParameter (String name, boolean value)
{
addParameter(name, Boolean.valueOf(value));
}
public void setParameter (String name, boolean value)
{
removeParameters(name);
addParameter(name, value);
}
public void addParameters (String name, Object... values)
{
for (Object value : values)
addParameter(name, value);
}
public void addParameters (String name, List