All Downloads are FREE. Search and download functionalities are using the official Maven repository.

co.cask.cdap.examples.wordcount.RetrieveCountsHandler Maven / Gradle / Ivy

/*
 * Copyright © 2014 Cask Data, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
 * use this file except in compliance with the License. You may obtain a copy of
 * the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations under
 * the License.
 */

package co.cask.cdap.examples.wordcount;

import co.cask.cdap.api.annotation.Property;
import co.cask.cdap.api.common.Bytes;
import co.cask.cdap.api.dataset.lib.KeyValueTable;
import co.cask.cdap.api.dataset.table.Get;
import co.cask.cdap.api.dataset.table.Row;
import co.cask.cdap.api.dataset.table.Table;
import co.cask.cdap.api.service.http.AbstractHttpServiceHandler;
import co.cask.cdap.api.service.http.HttpServiceContext;
import co.cask.cdap.api.service.http.HttpServiceRequest;
import co.cask.cdap.api.service.http.HttpServiceResponder;
import com.google.common.base.Charsets;
import com.google.common.base.Stopwatch;
import com.google.common.collect.Maps;

import java.util.HashMap;
import java.util.Map;
import javax.ws.rs.DefaultValue;
import javax.ws.rs.GET;
import javax.ws.rs.POST;
import javax.ws.rs.Path;
import javax.ws.rs.PathParam;
import javax.ws.rs.QueryParam;

/**
 * Retrieve Counts service handler.
 */
public class RetrieveCountsHandler extends AbstractHttpServiceHandler {
  private static final int WORD_COUNT_LIMIT = 10;

  @Property
  private final String wordStatsTableName;

  @Property
  private final String wordCountTableName;

  @Property
  private final String uniqueCountTableName;

  @Property
  private final String wordAssocTableName;

  private Table wordStatsTable;
  private KeyValueTable wordCountsTable;
  private UniqueCountTable uniqueCountTable;
  private AssociationTable associationTable;

  public RetrieveCountsHandler(WordCount.WordCountConfig config) {
    this.wordStatsTableName = config.getWordStatsTable();
    this.wordCountTableName = config.getWordCountTable();
    this.uniqueCountTableName = config.getUniqueCountTable();
    this.wordAssocTableName = config.getWordAssocTable();
  }

  @Override
  public void initialize(HttpServiceContext context) throws Exception {
    super.initialize(context);
    wordStatsTable = context.getDataset(wordStatsTableName);
    wordCountsTable = context.getDataset(wordCountTableName);
    uniqueCountTable = context.getDataset(uniqueCountTableName);
    associationTable = context.getDataset(wordAssocTableName);
  }

  /**
   * Returns total number of words, the number of unique words, and the average word length.
   */
  @Path("stats")
  @GET
  public void getStats(HttpServiceRequest request, HttpServiceResponder responder) {
    long totalWords = 0L;
    long uniqueWords = 0L;
    double averageLength = 0.0;

    // Read the total_length and total_words to calculate average length
    Row result = wordStatsTable.get(new Get("totals", "total_length", "total_words"));
    if (!result.isEmpty()) {
      // Extract the total sum of lengths
      long totalLength = result.getLong("total_length", 0);

      // Extract the total count of words
      totalWords = result.getLong("total_words", 0);

      // Compute the average length
      if (totalLength != 0 && totalWords != 0) {
        averageLength = ((double) totalLength) / totalWords;

        // Read the unique word count
        uniqueWords = uniqueCountTable.readUniqueCount();
      }
    }

    // Return a map as JSON
    Map results = new HashMap<>();
    results.put("totalWords", totalWords);
    results.put("uniqueWords", uniqueWords);
    results.put("averageLength", averageLength);

    responder.sendJson(results);
  }

  /**
   * Returns the count for a specific word and its word associations, up to the specified limit or
   * a pre-set limit of ten if not specified.
   */
  @Path("count/{word}")
  @GET
  public void getCount(HttpServiceRequest request, HttpServiceResponder responder,
                       @PathParam("word") String word, @DefaultValue("10") @QueryParam("limit") Integer limit) {
    // Read the word count
    byte[] countBytes = wordCountsTable.read(Bytes.toBytes(word));
    long wordCount = countBytes == null ? 0L : Bytes.toLong(countBytes);

    // Read the top associated words
    Map wordsAssocs = associationTable.readWordAssocs(word, limit);

    // Build a map with results
    Map results = new HashMap<>();
    results.put("word", word);
    results.put("count", wordCount);
    results.put("assocs", wordsAssocs);

    responder.sendJson(results);
  }

  /**
   * Returns the counts for all words in the input.  The request body is expected to contain
   * a comma-separated list of words.
   */
  @Path("counts")
  @POST
  public void getCounts(HttpServiceRequest request, HttpServiceResponder responder) {
    String wordString = Charsets.UTF_8.decode(request.getContent()).toString();
    String[] words = wordString.split(",");
    Map wordCounts = Maps.newHashMap();
    Stopwatch timer = new Stopwatch().start();
    for (int i = 0; i < words.length; i++) {
      byte[] countBytes = wordCountsTable.read(Bytes.toBytes(words[i]));
      long count = countBytes != null ? Bytes.toLong(countBytes) : 0;
      wordCounts.put(words[i], count);
    }
    timer.stop();
    Map responseBody = Maps.newHashMap();
    responseBody.put("counts", wordCounts);
    responseBody.put("elapsed", timer.toString());
    responder.sendJson(responseBody);
  }

  /**
   * Returns the counts for all words in the input.  The request body is expected to contain
   * a comma-separated list of words.
   *
   * 

* This endpoint method differs from {@link RetrieveCountsHandler#getCounts(HttpServiceRequest,HttpServiceResponder)} * in using {@link KeyValueTable#readAll(byte[][])} to perform a batched read. *

*/ @Path("multicounts") @POST public void getMultiCounts(HttpServiceRequest request, HttpServiceResponder responder) { String wordString = Charsets.UTF_8.decode(request.getContent()).toString(); String[] words = wordString.split(","); byte[][] wordBytes = new byte[words.length][]; for (int i = 0; i < words.length; i++) { wordBytes[i] = Bytes.toBytes(words[i]); } Stopwatch timer = new Stopwatch().start(); Map results = wordCountsTable.readAll(wordBytes); Map wordCounts = Maps.newHashMap(); for (Map.Entry entry : results.entrySet()) { byte[] val = entry.getValue(); wordCounts.put(Bytes.toString(entry.getKey()), val != null ? Bytes.toLong(entry.getValue()) : 0); } timer.stop(); Map response = Maps.newHashMap(); response.put("counts", wordCounts); response.put("elapsed", timer.toString()); responder.sendJson(response); } /** * Returns the count of associations for a specific word pair. */ @Path("assoc/{word1}/{word2}") @GET public void getAssoc(HttpServiceRequest request, HttpServiceResponder responder, @PathParam("word1") String word1, @PathParam("word2") String word2) { // Read the top associated words long count = associationTable.getAssoc(word1, word2); // Return a map as JSON Map results = new HashMap<>(); results.put("word1", word1); results.put("word2", word2); results.put("count", count); responder.sendJson(results); } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy