All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.carrot2.text.preprocessing.filter.LeftCompleteLabelFilter Maven / Gradle / Ivy


/*
 * Carrot2 project.
 *
 * Copyright (C) 2002-2015, Dawid Weiss, Stanisław Osiński.
 * All rights reserved.
 *
 * Refer to the full license file "carrot2.LICENSE"
 * in the root folder of the repository checkout or at:
 * http://www.carrot2.org/carrot2.LICENSE
 */

package org.carrot2.text.preprocessing.filter;

import java.util.*;

import com.google.common.collect.Lists;

/**
 * Filters out phrases that are not left complete.
 */
class LeftCompleteLabelFilter extends CompleteLabelFilterBase
{
    int [] createLcp(List phraseCodes)
    {
        int [] lcpArray = new int [phraseCodes.size()];
        for (int i = 0; i < phraseCodes.size() - 1; i++)
        {
            int [] codes = phraseCodes.get(i).getCodes();
            int [] nextCodes = phraseCodes.get(i + 1).getCodes();

            int minLength = Math.min(codes.length, nextCodes.length);
            for (int j = 1; j <= minLength; j++)
            {
                if (codes[codes.length - j] != nextCodes[nextCodes.length - j])
                {
                    break;
                }

                lcpArray[i]++;
            }
        }

        lcpArray[lcpArray.length - 1] = -1;

        return lcpArray;
    }

    List sortPhraseCodes(
        List phrasesWithCodes)
    {
        final ArrayList sortedPhraseCodes = Lists
            .newArrayList(phrasesWithCodes);
        Collections.sort(sortedPhraseCodes, new Comparator()
        {
            public int compare(LabelIndexWithCodes o1, LabelIndexWithCodes o2)
            {
                int [] codesA = o1.getCodes();
                int [] codesB = o2.getCodes();

                int minLength = Math.min(codesA.length, codesB.length);
                for (int i = 1; i <= minLength; i++)
                {
                    if (codesA[codesA.length - i] < codesB[codesB.length - i])
                    {
                        return -1;
                    }
                    else if (codesA[codesA.length - i] > codesB[codesB.length - i])
                    {
                        return 1;
                    }
                }

                if (codesA.length < codesB.length)
                {
                    return -1;
                }
                else if (codesA.length > codesB.length)
                {
                    return 1;
                }

                return 0;
            }
        });

        return sortedPhraseCodes;
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy