![JAR search and dependency download from the Maven repository](/logo.png)
com.day.crx.statistics.keyword.ExtractKeywordsReport Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of aem-sdk-api Show documentation
Show all versions of aem-sdk-api Show documentation
The Adobe Experience Manager SDK
/*************************************************************************
*
* ADOBE CONFIDENTIAL
* ___________________
*
* Copyright 1997 Adobe Systems Incorporated
* All Rights Reserved.
*
* NOTICE: All information contained herein is, and remains
* the property of Adobe Systems Incorporated and its suppliers,
* if any. The intellectual and technical concepts contained
* herein are proprietary to Adobe Systems Incorporated and its
* suppliers and are protected by trade secret or copyright law.
* Dissemination of this information or reproduction of this material
* is strictly forbidden unless prior written permission is obtained
* from Adobe Systems Incorporated.
**************************************************************************/
package com.day.crx.statistics.keyword;
import com.day.crx.statistics.Report;
import com.day.crx.statistics.result.ResultsByQueryReport;
import com.day.crx.statistics.query.MostPopularQueriesReport;
import javax.jcr.Session;
import javax.jcr.RepositoryException;
import java.util.Iterator;
import java.util.Map;
import java.util.List;
import java.util.ArrayList;
import java.util.LinkedHashMap;
/**
* ExtractKeywordsReport
implements a report, which extracts
* keywords from the statistical query and result data. The algorithm works
* as follows:
*
* - Get the
n
most popular queries for a given period.
* - For each query, retrieve the results that were selected the most.
* Whether a result is considered relevant depends on the number of times it
* was selected. The threshold is set to half the value of the most selected
* result, but at least 2.
*
*
* @author mreutegg
*/
public class ExtractKeywordsReport extends Report {
/**
* The maximum number of queries to analyze.
*/
private int size = 100;
/**
* Per default cover last 90 days
*/
private int period = 90;
/**
* The path where result data is stored.
*/
private final String resultDataPath;
/**
* Add OAK-specific "option(traversal ok)" to the statistics query so that no traversal warning is triggered
*/
private final boolean traversalOk;
/**
* Creates a new report.
*
* @param queryDataPath the path where query data is stored.
* @param resultDataPath the path where result data is stored.
*/
public ExtractKeywordsReport(String queryDataPath, String resultDataPath) {
this(queryDataPath, resultDataPath, false);
}
/**
* Creates a new report.
*
* @param queryDataPath the path where query data is stored.
* @param resultDataPath the path where result data is stored.
* @param traversalOk set to true to put "traveral ok" option in the statistics query
*/
public ExtractKeywordsReport(String queryDataPath, String resultDataPath, boolean traversalOk) {
super(queryDataPath);
this.resultDataPath = resultDataPath;
this.traversalOk = traversalOk;
}
/**
* {@inheritDoc}
*
* Returns result rows with the following objects:
*
* - Path
String
of a page
* Long
count (how may times the page was selected as a result)
* List
of String
s (the keywords)
*
*/
public Iterator getResult(Session session) throws RepositoryException {
MostPopularQueriesReport mpqr = new MostPopularQueriesReport(getDataPath());
mpqr.setSize(getSize());
mpqr.setPeriod(getPeriod());
Iterator popularQueries = mpqr.getResult(session);
Map pathToKeywords = new LinkedHashMap();
while (popularQueries.hasNext()) {
Object[] data = (Object[]) popularQueries.next();
String query = (String) data[0];
ResultsByQueryReport rr = new ResultsByQueryReport(resultDataPath, query, traversalOk);
long threshold = -1;
for (Iterator it = rr.getResult(session); it.hasNext(); ) {
Object[] rrData = (Object[]) it.next();
String path = (String) rrData[0];
long count = ((Long) rrData[1]).longValue();
if (threshold == -1) {
// calculate threshold based on first result
// result must have been selected at least 2 times
threshold = Math.max(count / 2, 2);
}
if (count < threshold) {
break;
}
List keywords = (List) pathToKeywords.get(path);
if (keywords == null) {
keywords = new ArrayList(3);
keywords.add(new Long(0));
pathToKeywords.put(path, keywords);
}
query = query.toLowerCase();
if (!keywords.contains(query)) {
keywords.add(query);
}
// update count
keywords.set(0, new Long(((Long) keywords.get(0)).longValue() + count));
}
}
List result = new ArrayList();
for (Iterator it = pathToKeywords.entrySet().iterator(); it.hasNext(); ) {
Map.Entry entry = (Map.Entry) it.next();
String path = (String) entry.getKey();
List keywords = (List) entry.getValue();
Long count = (Long) keywords.get(0);
keywords = keywords.subList(1, keywords.size());
result.add(new Object[]{path, count, keywords});
}
return result.iterator();
}
/**
* @return the maximum number of queries analyze.
*/
public int getSize() {
return size;
}
/**
* @param size the maximum number of queries to analyze.
*/
public void setSize(int size) {
this.size = size;
}
/**
* @return the report period in number of days.
*/
public int getPeriod() {
return period;
}
/**
* @param period the report period in number of days.
*/
public void setPeriod(int period) {
this.period = period;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy