All Downloads are FREE. Search and download functionalities are using the official Maven repository.

de.unkrig.commons.text.pattern.PatternUtil Maven / Gradle / Ivy


/*
 * de.unkrig.commons - A general-purpose Java class library
 *
 * Copyright (c) 2014, Arno Unkrig
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without modification, are permitted provided that the
 * following conditions are met:
 *
 *    1. Redistributions of source code must retain the above copyright notice, this list of conditions and the
 *       following disclaimer.
 *    2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the
 *       following disclaimer in the documentation and/or other materials provided with the distribution.
 *    3. The name of the author may not be used to endorse or promote products derived from this software without
 *       specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
 * THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

package de.unkrig.commons.text.pattern;

import java.io.IOException;
import java.io.Reader;
import java.io.Writer;
import java.nio.CharBuffer;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import de.unkrig.commons.lang.protocol.Function;
import de.unkrig.commons.nullanalysis.Nullable;

/**
 * {@link Pattern}-related utility methods.
 */
public final
class PatternUtil {

    private static final Logger LOGGER = Logger.getLogger(PatternUtil.class.getName());

    private PatternUtil() {}

    /**
     * Reads text from in, replaces all matches of pattern according to the
     * replacementString, and writes the result to out.
     * 

* The pattern search is stream-oriented, not line-oriented, i.e. matches are found even across line boundaries. * Thus the pattern should have been compiled with the {@link Pattern#MULTILINE} flag. *

* * @see Matcher#appendReplacement(StringBuffer, String) For the format of the replacementString */ public static void replaceAll(Reader in, Pattern pattern, String replacementString, Writer out) throws IOException { PatternUtil.replaceAll( in, pattern, PatternUtil.replacementStringReplacer(replacementString), out, 8192 ); } /** * Return a replacer which always returns the given string. *

* Opposed to {@link #replacementStringReplacer(String)}, "{@code $}" and "{@code \}" have no special meaning. *

* * @see #replacementStringReplacer(String) * @see #systemPropertyReplacer() */ public static Function constantReplacer(@Nullable final String string) { return new Function() { @Override @Nullable public String call(@Nullable Matcher match) { return string; } }; } /** * @return A replacer which forms the replacement from the match and * the given replacementString * @see Matcher#appendReplacement(StringBuffer, String) For the format of the replacementString * @see #constantReplacer(String) * @see #systemPropertyReplacer() */ public static Function replacementStringReplacer(final String replacementString) { return new Function() { @Override @Nullable public String call(@Nullable Matcher matcher) { assert matcher != null; // Process replacement string to replace group references with groups. StringBuffer result = new StringBuffer(); for (int idx = 0; idx < replacementString.length();) { char c = replacementString.charAt(idx); if (c == '\\') { result.append(replacementString.charAt(++idx)); idx++; } else if (c == '$') { idx++; // Scan first digit of reference number. int referenceNumber = replacementString.charAt(idx) - '0'; if (referenceNumber < 0 || referenceNumber > 9) { throw new IllegalArgumentException("Illegal group reference"); } idx++; // Scan following digits of reference number. while (idx < replacementString.length()) { int nextDigit = replacementString.charAt(idx) - '0'; if (nextDigit < 0 || nextDigit > 9) break; int newRefNum = (referenceNumber * 10) + nextDigit; if (matcher.groupCount() < newRefNum) break; referenceNumber = newRefNum; idx++; } // Append group. String group = matcher.group(referenceNumber); if (group != null) result.append(group); } else { result.append(c); idx++; } } return result.toString(); } }; } /** * Returns a replacer which returns the value of the system property named by group #1 of the match. *

* Example: *

*
     *   PatternUtil.replaceAll(
     *       Pattern.compile("\\$\\{([^}]+)}").matcher("file.separator is ${file.separator}"),
     *       PatternUtil.systemPropertyReplacer()
     *   )
     * 
* * @see #constantReplacer(String) * @see #replacementStringReplacer(String) */ public static Function systemPropertyReplacer() { return new Function() { @Override @Nullable public String call(@Nullable Matcher matcher) { assert matcher != null; return System.getProperty(matcher.group(1)); } }; } /** * Reads text from in, replaces all matches of * "${system-property-name}" with the value of the system property, and writes * the result to out. * * @return The number of replacements that were executed */ public static int replaceSystemProperties(Reader in, Writer out) throws IOException { return PatternUtil.replaceAll( in, // in Pattern.compile("\\$\\{([^}]+)}", Pattern.MULTILINE), // pattern PatternUtil.systemPropertyReplacer(), // replacer out, // out 4096 // initialBufferCapacity ); } /** * The generalized form of {@link Matcher#replaceAll(String)}: The replacement for a match is not formed from a * "replacement string" (with variables "$0", "$1", ...), but is computed by the replacer. If the * replacer returns {@code null} for a match, then the match is not replaced. */ public static String replaceAll(Matcher matcher, Function replacer) { matcher.reset(); StringBuffer sb = new StringBuffer(); for (boolean result = matcher.find(); result; result = matcher.find()) { String replacement = replacer.call(matcher); if (replacement == null) continue; // It may seem odd to use "quoteReplacement()" here, but since we have no access to the matcher's text, // it is the only way to achieve what we want. Fortunately "quoteReplacement()" is very fast when the // replacement string contains no dollar signs nor backslashes. matcher.appendReplacement(sb, Matcher.quoteReplacement(replacement)); } matcher.appendTail(sb); return sb.toString(); } /** * Reads characters from in, finds all matches of pattern, replaces each match with the * result of the replacer, and writes the result to out. If the replacer returns {@code null} * for a match, then the match is not replaced. *

* The pattern search is stream-oriented, not line-oriented, i.e. matches are found even across line boundaries. * Therefore the pattern should have been compiled with the {@link Pattern#MULTILINE} flag. *

*

* This method attempts to load as few characters into memory as possible. Notice, however, that in particular * the usage of "greedy quantifiers", for example "{@code a.*b}", can lead to all the remaining content being * read into memory. *

* * @param initialBufferCapacity The initial capacity of the temporary {@link CharBuffer} that is used for pattern * matching; the buffer will automatically be resized as necessary; 4096 may be a good * value * @return The number of replacements that were executed */ public static int replaceAll(Reader in, Pattern pattern, Function replacer, Writer out, int initialBufferCapacity) throws IOException { PatternUtil.LOGGER.log(Level.FINE, "Replace all matches of ''{0}''", pattern); int replacementCount = 0; CharBuffer cb = CharBuffer.allocate(initialBufferCapacity); for (;;) { // Fill the buffer as far as possible (limited by the buffer capacity or by EOI). while (cb.hasRemaining() && in.read(cb) != -1); if (cb.position() == 0) break; // End-of-input. cb.flip(); // Find the next match. Matcher m = pattern.matcher(cb); boolean found = m.find(); while (m.hitEnd()) { // We hit the end; read more data until we don't hit the end any more. if (cb.limit() < cb.capacity()) { // There's room left in the CharBuffer; fill it. cb.compact(); if (in.read(cb) == -1) { // End-of-input. cb.flip(); break; } while (cb.hasRemaining() && in.read(cb) != -1); cb.flip(); } else { PatternUtil.LOGGER.finest("Increasing buffer size"); cb = CharBuffer.allocate(cb.capacity() * 2).append(cb); if (in.read(cb) == -1) { // End-of-input. cb.flip(); m = pattern.matcher(cb); found = m.find(); break; } while (cb.hasRemaining() && in.read(cb) != -1); cb.flip(); } m = pattern.matcher(cb); found = m.find(); } if (!found) { out.append(cb); cb.clear(); continue; } if (m.end() == 0) { // Start-of-input match. if (cb.limit() == 0) break; out.append(cb.get()); cb.compact(); continue; } String replacement = replacer.call(m); if (replacement == null) { PatternUtil.LOGGER.log(Level.CONFIG, "Leaving match ''{0}'' unreplaced", m.group()); out.append(cb, 0, m.end()); } else { PatternUtil.LOGGER.log( Level.CONFIG, "Replacing match ''{0}'' with ''{1}''", new Object[] { m.group(), replacement } ); out.append(cb, 0, m.start()); out.append(replacement); replacementCount++; } cb.position(m.end()); cb.compact(); // If the CharBuffer was enlarged (due to 'hitEnd()'), shrink it to its initial size (if possible). if (cb.capacity() > initialBufferCapacity && cb.position() <= initialBufferCapacity) { PatternUtil.LOGGER.finest("Restoring initial buffer size"); cb.flip(); cb = CharBuffer.allocate(initialBufferCapacity).append(cb); } } PatternUtil.LOGGER.log( Level.FINE, "Replaced {0,choice,0#no matches|1#one match|1<{0} matches} of ''{1}''", new Object[] { replacementCount, pattern } ); return replacementCount; } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy