com.nfbsoftware.diffbot.DiffBotClient Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of simple-diffbot Show documentation
Show all versions of simple-diffbot Show documentation
The NFB Software Simple-Diffbot is a Java wrapper for the Diffbot.com API for website data extraction.
package com.nfbsoftware.diffbot;
import java.net.URLEncoder;
import java.util.List;
import com.nfbsoftware.diffbot.model.Article;
import com.nfbsoftware.diffbot.model.ArticleResponse;
import com.nfbsoftware.diffbot.model.ErrorResponse;
import com.nfbsoftware.diffbot.model.Image;
import com.nfbsoftware.diffbot.model.ImageResponse;
import com.nfbsoftware.diffbot.model.Video;
import com.nfbsoftware.diffbot.model.VideoResponse;
import com.nfbsoftware.util.WebPost;
import flexjson.JSONDeserializer;
/**
* This is a Java utility class that is used to communicate with the DiffBot RESTful API.
*
* @author brendanclemenzi
*/
public class DiffBotClient
{
private String m_accessToken;
private String m_requestTimeout;
private static final String DIFFBOT_ARTICLE_API = "http://api.diffbot.com/v3/article";
private static final String DIFFBOT_IMAGE_API = "http://api.diffbot.com/v3/image";
private static final String DIFFBOT_VIDEO_API = "http://api.diffbot.com/v3/video";
/**
*
* @param accessToken for the diffbot API
*/
public DiffBotClient(String accessToken)
{
m_accessToken = accessToken;
m_requestTimeout = "30000";
}
/**
*
* @param accessToken for the diffbot API
* @param timeoutMilliseconds
*/
public DiffBotClient(String accessToken, String timeoutMilliseconds)
{
m_accessToken = accessToken;
m_requestTimeout = timeoutMilliseconds;
}
/**
* Returns an object that represents the Article being extracted from the given page url.
*
* @param pageUrl
* @return Article
* @throws Exception
*/
public Article getArticle(String pageUrl) throws Exception
{
ArticleResponse restApiResponse = getArticleResponse(pageUrl);
return restApiResponse.getObjects().get(0);
}
/**
* Returns the full response object for the article extracted from the given url
*
* @param pageUrl
* @return ArticleResponse
* @throws Exception
*/
public ArticleResponse getArticleResponse(String pageUrl) throws Exception
{
String apiResponse = getApiResponse(DIFFBOT_ARTICLE_API, pageUrl);
// Process the JSON response into an object we can use
JSONDeserializer js = new JSONDeserializer();
ArticleResponse restApiResponse = js.deserialize(apiResponse, ArticleResponse.class);
return restApiResponse;
}
/**
* Returns an object list that represents the images being extracted from the given page url.
*
* @param pageUrl
* @return List
* @throws Exception
*/
public List getImages(String pageUrl) throws Exception
{
ImageResponse restApiResponse = getImageResponse(pageUrl);
return restApiResponse.getObjects();
}
/**
* Returns the full response object for the images extracted from the given url
*
* @param pageUrl
* @return ImageResponse
* @throws Exception
*/
public ImageResponse getImageResponse(String pageUrl) throws Exception
{
String apiResponse = getApiResponse(DIFFBOT_IMAGE_API, pageUrl);
// Process the JSON response into an object we can use
JSONDeserializer js = new JSONDeserializer();
ImageResponse restApiResponse = js.deserialize(apiResponse, ImageResponse.class);
return restApiResponse;
}
/**
* Returns an object that represents the Video being extracted from the given page url.
*
* @param pageUrl
* @return Video
* @throws Exception
*/
public Video getVideo(String pageUrl) throws Exception
{
VideoResponse restApiResponse = getVideoResponse(pageUrl);
return restApiResponse.getObjects().get(0);
}
/**
* Returns the full response object for the video extracted from the given url
*
* @param pageUrl
* @return VideoResponse
* @throws Exception
*/
public VideoResponse getVideoResponse(String pageUrl) throws Exception
{
String apiResponse = getApiResponse(DIFFBOT_VIDEO_API, pageUrl);
// Process the JSON response into an object we can use
JSONDeserializer js = new JSONDeserializer();
VideoResponse restApiResponse = js.deserialize(apiResponse, VideoResponse.class);
return restApiResponse;
}
/**
*
* @param diffBotApi
* @param pageUrl
* @return
* @throws Exception
*/
private String getApiResponse(String apiUrl, String pageUrl) throws Exception
{
WebPost webPostUtil = new WebPost();
String fullApiUrl = apiUrl + "?token=" + m_accessToken + "&timeout=" + m_requestTimeout + "&url=" + URLEncoder.encode(pageUrl, "UTF-8");
webPostUtil.connect(fullApiUrl, "text/html; charset=utf-8", "GET");
// Get the response for the API
String apiResponse = webPostUtil.receive();
// Check for an error code
if(apiResponse.contains("errorCode"))
{
JSONDeserializer js = new JSONDeserializer();
ErrorResponse errorResponse = js.deserialize(apiResponse, ErrorResponse.class);
throw new Exception(errorResponse.getError());
}
webPostUtil.disconnect();
return apiResponse;
}
}