Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
/*
* Carrot2 project.
*
* Copyright (C) 2002-2015, Dawid Weiss, Stanisław Osiński.
* All rights reserved.
*
* Refer to the full license file "carrot2.LICENSE"
* in the root folder of the repository checkout or at:
* http://www.carrot2.org/carrot2.LICENSE
*/
package org.carrot2.source.ambient;
import java.io.*;
import java.util.*;
import org.carrot2.core.Document;
import org.carrot2.core.ProcessingException;
import org.carrot2.util.*;
import org.carrot2.util.resource.ClassResource;
import org.carrot2.util.resource.IResource;
import com.google.common.base.Predicate;
import com.google.common.collect.*;
/**
* Handles data of test collections developed by Fondazione Ugo Bordoni.
*/
class FubTestCollection
{
/**
* The total number of Ambient topics.
*/
int topicCount;
/**
* Documents by topic id.
*/
final Map> documentsByTopicId;
/**
* Numbers of documents for each subtopic.
*/
final Map subtopicSizes;
/**
* Human-readable descriptions of topics.
*/
final Map subtopicLabels;
public FubTestCollection(String basePath)
{
/** [topicId][resultIndex] = subopicId */
final int [][] resultSubtopicIds = loadSubtopicMapping(new ClassResource(
AmbientDocumentSource.class, basePath + "/STRel.txt"));
documentsByTopicId = loadDocuments(new ClassResource(AmbientDocumentSource.class,
basePath + "/results.txt"), resultSubtopicIds);
subtopicSizes = prepareSubtopicSizes(resultSubtopicIds);
subtopicLabels = loadSubtopicLabels(new ClassResource(
AmbientDocumentSource.class, basePath + "/subTopics.txt"));
}
protected int getTopicCount()
{
return topicCount;
}
protected List getDocumentsForTopic(int topicId, int requestedResults,
final int minTopicSize, final boolean includeDocumentsWithoutTopic)
throws ProcessingException
{
// Filter the results
final List documents = Lists.newArrayList(Collections2.filter(
documentsByTopicId.get(topicId), new Predicate()
{
public boolean apply(Document document)
{
// For now there is only one topic per document in Ambient
final String documentTopic = getTopic(document);
return subtopicSizes.get(documentTopic) >= minTopicSize
&& (includeDocumentsWithoutTopic || !documentTopic.endsWith(".0"));
}
}));
if (documents.size() >= requestedResults)
{
return documents.subList(0, requestedResults);
}
else
{
return documents;
}
}
@SuppressWarnings("unchecked")
protected Set