org.carrot2.text.preprocessing.SubstringComparator Maven / Gradle / Ivy
/*
* Carrot2 project.
*
* Copyright (C) 2002-2016, Dawid Weiss, Stanisław Osiński.
* All rights reserved.
*
* Refer to the full license file "carrot2.LICENSE"
* in the root folder of the repository checkout or at:
* http://www.carrot2.org/carrot2.LICENSE
*/
package org.carrot2.text.preprocessing;
import java.io.Serializable;
import java.util.Comparator;
/**
* Compares {@link Substring}s in such a way as to put them into different buckets in an
* inflection-insensitive way. The order of different variants of substrings within
* buckets is arbitrary.
*/
class SubstringComparator implements Comparator, Serializable
{
private static final long serialVersionUID = 1L;
private final int [] tokensWordIndex;
private final int [] wordsStemIndex;
/**
* Creates a Comparator for substrings relating to the given word and stem indices.
*/
public SubstringComparator(int [] tokensWordIndex, int [] wordsStemIndex)
{
this.tokensWordIndex = tokensWordIndex;
this.wordsStemIndex = wordsStemIndex;
}
public int compare(Substring s1, Substring s2)
{
final int s1From = s1.from;
final int s1To = s1.to;
final int s2From = s2.from;
final int s2To = s2.to;
final int s2Length = s2To - s2From;
final int s1Length = s1To - s1From;
if (s1Length != s2Length)
{
return s1Length - s2Length;
}
assert s1Length == s2Length;
for (int i = 0; i < s1Length; i++)
{
final int stemIndex1 = wordsStemIndex[tokensWordIndex[s1From + i]];
final int stemIndex2 = wordsStemIndex[tokensWordIndex[s2From + i]];
if (stemIndex1 != stemIndex2)
{
return stemIndex1 - stemIndex2;
}
}
// If the substrings are equal in the inflection-insensitive way, return 0 (equivalence class).
return 0;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy