All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.airlift.airship.shared.Strings Maven / Gradle / Ivy

package io.airlift.airship.shared;

import com.google.common.base.Function;
import com.google.common.base.Joiner;
import com.google.common.base.Preconditions;
import com.google.common.base.Predicates;
import com.google.common.base.Splitter;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Iterables;
import com.google.common.collect.Sets;

import java.util.Collection;
import java.util.Collections;
import java.util.List;
import java.util.SortedSet;

import static com.google.common.collect.Collections2.transform;
import static com.google.common.collect.Lists.newArrayList;
import static java.lang.Math.max;
import static java.lang.Math.min;

public class Strings
{
    public static int commonPrefixSegments(char separator, Collection strings)
    {
        return commonPrefixSegments(separator, strings, 0);
    }

    public static int commonPrefixSegments(char separator, Collection strings, int minSize)
    {
        Preconditions.checkNotNull(strings, "strings is null");
        Preconditions.checkArgument(minSize >= 0, "minSize is negative");

        if (strings.isEmpty()) {
            return 0;
        }

        int shortestNumberOfParts = Integer.MAX_VALUE;
        List> stringsParts = newArrayList();
        for (String string : strings) {
            List parts = ImmutableList.copyOf(Splitter.on(separator).split(string));
            if (parts.isEmpty() || !parts.get(0).isEmpty()) {
                throw new IllegalArgumentException("All strings must start with the separator character");
            }
            parts = parts.subList(1, parts.size());
            stringsParts.add(parts);
            shortestNumberOfParts = min(parts.size(), shortestNumberOfParts);
        }

        int maxNumberOfSharedParts = max(shortestNumberOfParts - minSize, 0);

        int commonParts = 0;
        while (commonParts < maxNumberOfSharedParts && isPartEqual(commonParts, stringsParts)) {
            commonParts++;
        }

        return commonParts;
    }

    private static boolean isPartEqual(int partNumber, List> stringsParts)
    {
        if (stringsParts.get(0).size() <= partNumber) {
            return false;
        }
        String part = stringsParts.get(0).get(partNumber);
        for (List parts : stringsParts) {
            if (parts.size() <= partNumber || !part.equals(parts.get(partNumber))) {
                return false;
            }
        }
        return true;
    }

    public static String trimLeadingSegments(String string, char separator, int segmentCount)
    {
        if (string == null) {
            return null;
        }

        List segments = ImmutableList.copyOf(Splitter.on(separator).split(string));
        if (segments.isEmpty() || !segments.get(0).isEmpty()) {
            throw new IllegalArgumentException("String must start with the separator character");
        }
        segments = segments.subList(1, segments.size());

        if (segments.size() < segmentCount) {
            return string;
        }
        String trimmedString = Joiner.on(separator).join(segments.subList(segmentCount, segments.size()));
        if (!trimmedString.startsWith("" + separator)) {
            trimmedString = separator + trimmedString;
        }
        return trimmedString;
    }

    public static int shortestUniquePrefix(Collection strings)
    {
        return shortestUniquePrefix(strings, 1);
    }

    public static int shortestUniquePrefix(Collection strings, int minSize)
    {
        Preconditions.checkNotNull(strings, "strings is null");

        // remove nulls
        strings = ImmutableList.copyOf(Iterables.filter(strings, Predicates.notNull()));

        // must have at least two strings to calculate min size
        if (strings.size() < 2) {
            return minSize;
        }

        SortedSet sorted = Sets.newTreeSet(strings);
        if (sorted.size() != strings.size()) {
            throw new IllegalArgumentException("Cannot compute unique prefix size for collection with duplicate entries");
        }

        int prefix = shortestUniquePrefixStartingAt(ImmutableList.copyOf(sorted), 0);
        return max(prefix, minSize);
    }

    private static int shortestUniquePrefixStartingAt(List strings, int charPosition)
    {
        Preconditions.checkArgument(!strings.isEmpty(), "strings is empty");
        Preconditions.checkArgument(charPosition < Collections.max(transform(strings, lengthGetter())),
                "charPosition is beyond the size of all the provided strings");

        int result = 1;

        // assumes sorted list
        // the algorithm goes like this:
        //   1. identify sequences of strings that start with the same character.
        //      Strings are sorted, so it's just a matter of scanning until the char changes
        //   2. recursively, compute the unique prefix of these strings, starting at the next character position
        //   3. the shortest unique prefix is the max between all the sequences + 1
        int candidates = 0;
        boolean first = true;
        char commonChar = 0;
        int sequenceStart = 0;
        int index = -1;

        for (String value : strings) {
            ++index;
            if (charPosition >= value.length()) {
                continue;
            }

            candidates++;

            char currentChar = value.charAt(charPosition);
            if (first) {
                commonChar = currentChar;
                first = false;
                continue;
            }

            if (currentChar != commonChar) {
                if (index - sequenceStart > 1) {
                    // only recurse if we have more than one item to process in the sequence
                    result = max(result, shortestUniquePrefixStartingAt(strings.subList(sequenceStart, index), charPosition + 1) + 1);
                }

                sequenceStart = index;
                commonChar = currentChar;
            }
        }

        // deal with the last sequence
        if (candidates > 1 && strings.size() - sequenceStart > 1) {
            result = max(result, shortestUniquePrefixStartingAt(strings.subList(sequenceStart, strings.size()), charPosition + 1) + 1);
        }

        return result;
    }

    public static String safeTruncate(String string, int length)
    {
        if (string == null) {
            return null;
        }
        if (length > string.length()) {
            return string;
        }
        return string.substring(0, length);
    }

    private static Function lengthGetter()
    {
        return new Function()
        {
            public Integer apply(String input)
            {
                return input.length();
            }
        };
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy