All Downloads are FREE. Search and download functionalities are using the official Maven repository.

java.util.regex.Splitter Maven / Gradle / Ivy

/*
 * Copyright (C) 2010 The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package java.util.regex;

import java.util.ArrayList;
import java.util.List;
import libcore.util.EmptyArray;

/**
 * Used to make {@code String.split} fast (and to help {@code Pattern.split} too).
 * @hide
 */
public class Splitter {
    // The RI allows regular expressions beginning with ] or }, but that's probably a bug.
    private static final String METACHARACTERS = "\\?*+[](){}^$.|";

    private Splitter() {
    }

    /**
     * Returns a result equivalent to {@code s.split(separator, limit)} if it's able
     * to compute it more cheaply than ICU, or null if the caller should fall back to
     * using ICU.
     */
    public static String[] fastSplit(String re, String input, int limit) {
        // Can we do it cheaply?
        int len = re.length();
        if (len == 0) {
            return null;
        }
        char ch = re.charAt(0);
        if (len == 1 && METACHARACTERS.indexOf(ch) == -1) {
            // We're looking for a single non-metacharacter. Easy.
        } else if (len == 2 && ch == '\\') {
            // We're looking for a quoted character.
            // Quoted metacharacters are effectively single non-metacharacters.
            ch = re.charAt(1);
            if (METACHARACTERS.indexOf(ch) == -1) {
                return null;
            }
        } else {
            return null;
        }

        // We can do this cheaply...

        // Unlike Perl, which considers the result of splitting the empty string to be the empty
        // array, Java returns an array containing the empty string.
        if (input.isEmpty()) {
            return new String[] { "" };
        }

        // Count separators
        int separatorCount = 0;
        int begin = 0;
        int end;
        while (separatorCount + 1 != limit && (end = input.indexOf(ch, begin)) != -1) {
            ++separatorCount;
            begin = end + 1;
        }
        int lastPartEnd = input.length();
        if (limit == 0 && begin == lastPartEnd) {
            // Last part is empty for limit == 0, remove all trailing empty matches.
            if (separatorCount == lastPartEnd) {
                // Input contains only separators.
                return EmptyArray.STRING;
            }
            // Find the beginning of trailing separators.
            do {
                --begin;
            } while (input.charAt(begin - 1) == ch);
            // Reduce separatorCount and fix lastPartEnd.
            separatorCount -= input.length() - begin;
            lastPartEnd = begin;
        }

        // Collect the result parts.
        String[] result = new String[separatorCount + 1];
        begin = 0;
        for (int i = 0; i != separatorCount; ++i) {
            end = input.indexOf(ch, begin);
            result[i] = input.substring(begin, end);
            begin = end + 1;
        }
        // Add last part.
        result[separatorCount] = input.substring(begin, lastPartEnd);
        return result;
    }

    public static String[] split(Pattern pattern, String re, String input, int limit) {
        String[] fastResult = fastSplit(re, input, limit);
        if (fastResult != null) {
            return fastResult;
        }

        // Unlike Perl, which considers the result of splitting the empty string to be the empty
        // array, Java returns an array containing the empty string.
        if (input.isEmpty()) {
            return new String[] { "" };
        }

        // Collect text preceding each occurrence of the separator, while there's enough space.
        ArrayList list = new ArrayList();
        Matcher matcher = new Matcher(pattern, input);
        int begin = 0;
        while (list.size() + 1 != limit && matcher.find()) {
            list.add(input.substring(begin, matcher.start()));
            begin = matcher.end();
        }
        return finishSplit(list, input, begin, limit);
    }

    private static String[] finishSplit(List list, String input, int begin, int limit) {
        // Add trailing text.
        if (begin < input.length()) {
            list.add(input.substring(begin));
        } else if (limit != 0) {
            list.add("");
        } else {
            // Remove all trailing empty matches in the limit == 0 case.
            int i = list.size() - 1;
            while (i >= 0 && list.get(i).isEmpty()) {
                list.remove(i);
                i--;
            }
        }
        // Convert to an array.
        return list.toArray(new String[list.size()]);
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy