All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.day.crx.statistics.keyword.ExtractKeywordsReport Maven / Gradle / Ivy

There is a newer version: 2024.11.18751.20241128T090041Z-241100
Show newest version
/*************************************************************************
*
* ADOBE CONFIDENTIAL
* ___________________
*
*  Copyright 1997 Adobe Systems Incorporated
*  All Rights Reserved.
*
* NOTICE:  All information contained herein is, and remains
* the property of Adobe Systems Incorporated and its suppliers,
* if any.  The intellectual and technical concepts contained
* herein are proprietary to Adobe Systems Incorporated and its
* suppliers and are protected by trade secret or copyright law.
* Dissemination of this information or reproduction of this material
* is strictly forbidden unless prior written permission is obtained
* from Adobe Systems Incorporated.
**************************************************************************/
package com.day.crx.statistics.keyword;

import com.day.crx.statistics.Report;
import com.day.crx.statistics.result.ResultsByQueryReport;
import com.day.crx.statistics.query.MostPopularQueriesReport;

import javax.jcr.Session;
import javax.jcr.RepositoryException;
import java.util.Iterator;
import java.util.Map;
import java.util.List;
import java.util.ArrayList;
import java.util.LinkedHashMap;

/**
 * ExtractKeywordsReport implements a report, which extracts
 * keywords from the statistical query and result data. The algorithm works
 * as follows:
 * 
    *
  • Get the n most popular queries for a given period.
  • *
  • For each query, retrieve the results that were selected the most. * Whether a result is considered relevant depends on the number of times it * was selected. The threshold is set to half the value of the most selected * result, but at least 2.
  • *
* * @author mreutegg */ public class ExtractKeywordsReport extends Report { /** * The maximum number of queries to analyze. */ private int size = 100; /** * Per default cover last 90 days */ private int period = 90; /** * The path where result data is stored. */ private final String resultDataPath; /** * Add OAK-specific "option(traversal ok)" to the statistics query so that no traversal warning is triggered */ private final boolean traversalOk; /** * Creates a new report. * * @param queryDataPath the path where query data is stored. * @param resultDataPath the path where result data is stored. */ public ExtractKeywordsReport(String queryDataPath, String resultDataPath) { this(queryDataPath, resultDataPath, false); } /** * Creates a new report. * * @param queryDataPath the path where query data is stored. * @param resultDataPath the path where result data is stored. * @param traversalOk set to true to put "traveral ok" option in the statistics query */ public ExtractKeywordsReport(String queryDataPath, String resultDataPath, boolean traversalOk) { super(queryDataPath); this.resultDataPath = resultDataPath; this.traversalOk = traversalOk; } /** * {@inheritDoc} *

* Returns result rows with the following objects: *

    *
  • Path String of a page
  • *
  • Long count (how may times the page was selected as a result)
  • *
  • List of Strings (the keywords)
  • *
*/ public Iterator getResult(Session session) throws RepositoryException { MostPopularQueriesReport mpqr = new MostPopularQueriesReport(getDataPath()); mpqr.setSize(getSize()); mpqr.setPeriod(getPeriod()); Iterator popularQueries = mpqr.getResult(session); Map pathToKeywords = new LinkedHashMap(); while (popularQueries.hasNext()) { Object[] data = (Object[]) popularQueries.next(); String query = (String) data[0]; ResultsByQueryReport rr = new ResultsByQueryReport(resultDataPath, query, traversalOk); long threshold = -1; for (Iterator it = rr.getResult(session); it.hasNext(); ) { Object[] rrData = (Object[]) it.next(); String path = (String) rrData[0]; long count = ((Long) rrData[1]).longValue(); if (threshold == -1) { // calculate threshold based on first result // result must have been selected at least 2 times threshold = Math.max(count / 2, 2); } if (count < threshold) { break; } List keywords = (List) pathToKeywords.get(path); if (keywords == null) { keywords = new ArrayList(3); keywords.add(new Long(0)); pathToKeywords.put(path, keywords); } query = query.toLowerCase(); if (!keywords.contains(query)) { keywords.add(query); } // update count keywords.set(0, new Long(((Long) keywords.get(0)).longValue() + count)); } } List result = new ArrayList(); for (Iterator it = pathToKeywords.entrySet().iterator(); it.hasNext(); ) { Map.Entry entry = (Map.Entry) it.next(); String path = (String) entry.getKey(); List keywords = (List) entry.getValue(); Long count = (Long) keywords.get(0); keywords = keywords.subList(1, keywords.size()); result.add(new Object[]{path, count, keywords}); } return result.iterator(); } /** * @return the maximum number of queries analyze. */ public int getSize() { return size; } /** * @param size the maximum number of queries to analyze. */ public void setSize(int size) { this.size = size; } /** * @return the report period in number of days. */ public int getPeriod() { return period; } /** * @param period the report period in number of days. */ public void setPeriod(int period) { this.period = period; } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy