org.carrot2.clustering.SharedInfrastructure Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of carrot2-core Show documentation
Show all versions of carrot2-core Show documentation
Carrot2 Text Clustering Library
/*
* Carrot2 project.
*
* Copyright (C) 2002-2021, Dawid Weiss, Stanisław Osiński.
* All rights reserved.
*
* Refer to the full license file "carrot2.LICENSE"
* in the root folder of the repository checkout or at:
* https://www.carrot2.org/carrot2.LICENSE
*/
package org.carrot2.clustering;
import java.util.ArrayDeque;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.IdentityHashMap;
import java.util.List;
import java.util.Set;
import java.util.stream.Collectors;
import org.carrot2.attrs.AttrString;
public class SharedInfrastructure {
public static AttrString queryHintAttribute() {
return AttrString.builder().label("Query hint").defaultValue(null);
}
private static class ClusterData {
final Cluster cluster;
final double score;
final String label;
final int recursiveDocumentCount;
public ClusterData(Cluster cluster, double score, int recursiveDocumentCount) {
this.cluster = cluster;
this.label = String.join(", ", cluster.getLabels());
this.score = score;
this.recursiveDocumentCount = recursiveDocumentCount;
}
}
public static List> reorderByWeightedScoreAndSize(
List> clusters, double scoreWeight) {
Comparator> comparator =
Comparator.>comparingDouble(data -> data.score)
.reversed()
.thenComparing(Comparator.nullsFirst(Comparator.comparing(data -> data.label)));
return clusters.stream()
.map(
cluster -> {
int docCount = recursiveDocumentCount(cluster);
double score =
Math.pow(docCount, 1d - scoreWeight) * Math.pow(cluster.getScore(), scoreWeight);
return new ClusterData(cluster, score, docCount);
})
.sorted(comparator)
.map(data -> data.cluster)
.collect(Collectors.toList());
}
public static List> reorderByDescendingSizeAndLabel(
ArrayList> clusters) {
Comparator> comparator =
Comparator.>comparingInt(data -> data.recursiveDocumentCount)
.reversed()
.thenComparing(Comparator.nullsFirst(Comparator.comparing(data -> data.label)));
return clusters.stream()
.map(
cluster -> {
int docCount = recursiveDocumentCount(cluster);
return new ClusterData(cluster, 0, docCount);
})
.sorted(comparator)
.map(data -> data.cluster)
.collect(Collectors.toList());
}
public static int recursiveDocumentCount(Cluster> cluster) {
Set
© 2015 - 2024 Weber Informatics LLC | Privacy Policy