Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
/*
* Carrot2 project.
*
* Copyright (C) 2002-2015, Dawid Weiss, Stanisław Osiński.
* All rights reserved.
*
* Refer to the full license file "carrot2.LICENSE"
* in the root folder of the repository checkout or at:
* http://www.carrot2.org/carrot2.LICENSE
*/
package org.carrot2.source.google;
import java.io.UnsupportedEncodingException;
import java.net.URLDecoder;
import java.util.Arrays;
import java.util.concurrent.Callable;
import org.apache.http.Header;
import org.apache.http.NameValuePair;
import org.apache.http.message.BasicHeader;
import org.apache.http.message.BasicNameValuePair;
import org.carrot2.core.*;
import org.carrot2.core.attribute.Internal;
import org.carrot2.core.attribute.Processing;
import org.carrot2.source.*;
import org.carrot2.util.ExceptionUtils;
import org.carrot2.util.attribute.*;
import org.carrot2.util.httpclient.HttpClientFactory;
import org.carrot2.util.httpclient.HttpRedirectStrategy;
import org.carrot2.util.httpclient.HttpUtils;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
/**
* A {@link IDocumentSource} fetching search results from Google JSON API. Please note
* that this document source cannot deliver more than 32 search results.
*
* @see Google AJAX API
*/
@Bindable(prefix = "GoogleDocumentSource")
public class GoogleDocumentSource extends MultipageSearchEngine
{
/**
* Service URL. Google web search service URL.
*/
@Input
@Processing
@Internal
@Attribute
@Label("Service URL")
@Level(AttributeLevel.ADVANCED)
@Group(SERVICE)
public String serviceUrl = "http://ajax.googleapis.com/ajax/services/search/web";
/**
* Request referrer. Please do not use the default value when deploying this
* component in production environments. Instead, put the URL to your application
* here.
*/
@Input
@Processing
@Internal
@Attribute
@Label("Referrer")
@Level(AttributeLevel.ADVANCED)
@Group(SERVICE)
public String referer = "http://www.carrot2.org";
/**
* Keep query word highlighting. Google by default highlights query words in snippets
* using the bold HTML tag. Set this attribute to true to keep these
* highlights.
*/
@Input
@Processing
@Attribute
@Label("Keep highlights")
@Level(AttributeLevel.ADVANCED)
@Group(POSTPROCESSING)
public boolean keepHighlights = false;
/**
* HTTP redirect response strategy (follow or throw an error).
*/
@Input
@Processing
@Attribute
@Label("HTTP redirect strategy")
@Level(AttributeLevel.MEDIUM)
@Group(SimpleSearchEngine.SERVICE)
@Internal
public HttpRedirectStrategy redirectStrategy = HttpRedirectStrategy.NO_REDIRECTS;
/**
* Google search metadata.
*/
static final MultipageSearchEngineMetadata metadata = new MultipageSearchEngineMetadata(8, 32);
/**
* Maximum concurrent threads from all instances of this component.
*/
private static final int MAX_CONCURRENT_THREADS = 10;
@Override
public void process() throws ProcessingException
{
super.process(metadata, getSharedExecutor(MAX_CONCURRENT_THREADS, getClass()));
}
@Override
protected Callable createFetcher(final SearchRange bucket)
{
return new SearchEngineResponseCallable()
{
public SearchEngineResponse search() throws Exception
{
final SearchEngineResponse response = new SearchEngineResponse();
final NameValuePair [] queryParams = new NameValuePair []
{
new BasicNameValuePair("v", "1.0"),
new BasicNameValuePair("rsz", "large"),
new BasicNameValuePair("start", Integer.toString(bucket.start)),
new BasicNameValuePair("q", query),
};
final Header [] headers = new Header []
{
new BasicHeader("Referer", referer),
};
final HttpUtils.Response httpResp = HttpUtils.doGET(
serviceUrl,
Arrays.asList(queryParams),
Arrays.asList(headers),
null, null,
HttpClientFactory.DEFAULT_TIMEOUT,
redirectStrategy.value());
final ObjectMapper mapper = new ObjectMapper();
final JsonNode root = mapper.readTree(httpResp.getPayloadAsStream());
if (root == null)
{
return response;
}
final JsonNode responseData = root.get("responseData");
if (responseData == null)
{
return response;
}
final JsonNode resultsArray = responseData.get("results");
if (resultsArray == null)
{
return response;
}
for (JsonNode node : resultsArray) {
final Document document = new Document(
node.get("titleNoFormatting").textValue(),
node.get("content").textValue(),
node.get("url").textValue());
response.results.add(document);
}
final JsonNode cursor = responseData.get("cursor");
if (cursor == null)
{
return response;
}
final JsonNode resultCount = cursor.get("estimatedResultCount");
if (resultCount != null)
{
response.metadata.put(SearchEngineResponse.RESULTS_TOTAL_KEY, Long
.parseLong(resultCount.textValue()));
}
else
{
response.metadata.put(SearchEngineResponse.RESULTS_TOTAL_KEY, 0L);
}
response.metadata.put(SearchEngineResponse.COMPRESSION_KEY,
httpResp.compression);
return response;
}
};
}
@Override
protected void afterFetch(SearchEngineResponse response)
{
clean(response, keepHighlights, Document.TITLE, Document.SUMMARY);
// Decode URLs
for (Document document : response.results)
{
final String url = document.getField(Document.CONTENT_URL);
if (url != null)
{
try
{
document.setField(Document.CONTENT_URL, URLDecoder.decode(url, "UTF-8"));
}
catch (UnsupportedEncodingException e)
{
// Should not happen
throw ExceptionUtils.wrapAsRuntimeException(e);
}
}
}
}
}