co.cask.cdap.examples.wordcount.RetrieveCountsHandler Maven / Gradle / Ivy
/*
* Copyright © 2014 Cask Data, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package co.cask.cdap.examples.wordcount;
import co.cask.cdap.api.annotation.Property;
import co.cask.cdap.api.common.Bytes;
import co.cask.cdap.api.dataset.lib.KeyValueTable;
import co.cask.cdap.api.dataset.table.Get;
import co.cask.cdap.api.dataset.table.Row;
import co.cask.cdap.api.dataset.table.Table;
import co.cask.cdap.api.service.http.AbstractHttpServiceHandler;
import co.cask.cdap.api.service.http.HttpServiceContext;
import co.cask.cdap.api.service.http.HttpServiceRequest;
import co.cask.cdap.api.service.http.HttpServiceResponder;
import com.google.common.base.Charsets;
import com.google.common.base.Stopwatch;
import com.google.common.collect.Maps;
import java.util.HashMap;
import java.util.Map;
import javax.ws.rs.DefaultValue;
import javax.ws.rs.GET;
import javax.ws.rs.POST;
import javax.ws.rs.Path;
import javax.ws.rs.PathParam;
import javax.ws.rs.QueryParam;
/**
* Retrieve Counts service handler.
*/
public class RetrieveCountsHandler extends AbstractHttpServiceHandler {
private static final int WORD_COUNT_LIMIT = 10;
@Property
private final String wordStatsTableName;
@Property
private final String wordCountTableName;
@Property
private final String uniqueCountTableName;
@Property
private final String wordAssocTableName;
private Table wordStatsTable;
private KeyValueTable wordCountsTable;
private UniqueCountTable uniqueCountTable;
private AssociationTable associationTable;
public RetrieveCountsHandler(WordCount.WordCountConfig config) {
this.wordStatsTableName = config.getWordStatsTable();
this.wordCountTableName = config.getWordCountTable();
this.uniqueCountTableName = config.getUniqueCountTable();
this.wordAssocTableName = config.getWordAssocTable();
}
@Override
public void initialize(HttpServiceContext context) throws Exception {
super.initialize(context);
wordStatsTable = context.getDataset(wordStatsTableName);
wordCountsTable = context.getDataset(wordCountTableName);
uniqueCountTable = context.getDataset(uniqueCountTableName);
associationTable = context.getDataset(wordAssocTableName);
}
/**
* Returns total number of words, the number of unique words, and the average word length.
*/
@Path("stats")
@GET
public void getStats(HttpServiceRequest request, HttpServiceResponder responder) {
long totalWords = 0L;
long uniqueWords = 0L;
double averageLength = 0.0;
// Read the total_length and total_words to calculate average length
Row result = wordStatsTable.get(new Get("totals", "total_length", "total_words"));
if (!result.isEmpty()) {
// Extract the total sum of lengths
long totalLength = result.getLong("total_length", 0);
// Extract the total count of words
totalWords = result.getLong("total_words", 0);
// Compute the average length
if (totalLength != 0 && totalWords != 0) {
averageLength = ((double) totalLength) / totalWords;
// Read the unique word count
uniqueWords = uniqueCountTable.readUniqueCount();
}
}
// Return a map as JSON
Map results = new HashMap<>();
results.put("totalWords", totalWords);
results.put("uniqueWords", uniqueWords);
results.put("averageLength", averageLength);
responder.sendJson(results);
}
/**
* Returns the count for a specific word and its word associations, up to the specified limit or
* a pre-set limit of ten if not specified.
*/
@Path("count/{word}")
@GET
public void getCount(HttpServiceRequest request, HttpServiceResponder responder,
@PathParam("word") String word, @DefaultValue("10") @QueryParam("limit") Integer limit) {
// Read the word count
byte[] countBytes = wordCountsTable.read(Bytes.toBytes(word));
long wordCount = countBytes == null ? 0L : Bytes.toLong(countBytes);
// Read the top associated words
Map wordsAssocs = associationTable.readWordAssocs(word, limit);
// Build a map with results
Map results = new HashMap<>();
results.put("word", word);
results.put("count", wordCount);
results.put("assocs", wordsAssocs);
responder.sendJson(results);
}
/**
* Returns the counts for all words in the input. The request body is expected to contain
* a comma-separated list of words.
*/
@Path("counts")
@POST
public void getCounts(HttpServiceRequest request, HttpServiceResponder responder) {
String wordString = Charsets.UTF_8.decode(request.getContent()).toString();
String[] words = wordString.split(",");
Map wordCounts = Maps.newHashMap();
Stopwatch timer = new Stopwatch().start();
for (int i = 0; i < words.length; i++) {
byte[] countBytes = wordCountsTable.read(Bytes.toBytes(words[i]));
long count = countBytes != null ? Bytes.toLong(countBytes) : 0;
wordCounts.put(words[i], count);
}
timer.stop();
Map responseBody = Maps.newHashMap();
responseBody.put("counts", wordCounts);
responseBody.put("elapsed", timer.toString());
responder.sendJson(responseBody);
}
/**
* Returns the counts for all words in the input. The request body is expected to contain
* a comma-separated list of words.
*
*
* This endpoint method differs from {@link RetrieveCountsHandler#getCounts(HttpServiceRequest,HttpServiceResponder)}
* in using {@link KeyValueTable#readAll(byte[][])} to perform a batched read.
*
*/
@Path("multicounts")
@POST
public void getMultiCounts(HttpServiceRequest request, HttpServiceResponder responder) {
String wordString = Charsets.UTF_8.decode(request.getContent()).toString();
String[] words = wordString.split(",");
byte[][] wordBytes = new byte[words.length][];
for (int i = 0; i < words.length; i++) {
wordBytes[i] = Bytes.toBytes(words[i]);
}
Stopwatch timer = new Stopwatch().start();
Map results = wordCountsTable.readAll(wordBytes);
Map wordCounts = Maps.newHashMap();
for (Map.Entry entry : results.entrySet()) {
byte[] val = entry.getValue();
wordCounts.put(Bytes.toString(entry.getKey()), val != null ? Bytes.toLong(entry.getValue()) : 0);
}
timer.stop();
Map response = Maps.newHashMap();
response.put("counts", wordCounts);
response.put("elapsed", timer.toString());
responder.sendJson(response);
}
/**
* Returns the count of associations for a specific word pair.
*/
@Path("assoc/{word1}/{word2}")
@GET
public void getAssoc(HttpServiceRequest request, HttpServiceResponder responder,
@PathParam("word1") String word1, @PathParam("word2") String word2) {
// Read the top associated words
long count = associationTable.getAssoc(word1, word2);
// Return a map as JSON
Map results = new HashMap<>();
results.put("word1", word1);
results.put("word2", word2);
results.put("count", count);
responder.sendJson(results);
}
}