All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.owasp.encoder.Encoder Maven / Gradle / Ivy

There is a newer version: 2024.11.18751.20241128T090041Z-241100
Show newest version
// Copyright (c) 2012 Jeff Ichnowski
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
//
//     * Redistributions of source code must retain the above
//       copyright notice, this list of conditions and the following
//       disclaimer.
//
//     * Redistributions in binary form must reproduce the above
//       copyright notice, this list of conditions and the following
//       disclaimer in the documentation and/or other materials
//       provided with the distribution.
//
//     * Neither the name of the OWASP nor the names of its
//       contributors may be used to endorse or promote products
//       derived from this software without specific prior written
//       permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
// COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
// INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
// STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
// OF THE POSSIBILITY OF SUCH DAMAGE.

package org.owasp.encoder;

import java.nio.CharBuffer;
import java.nio.charset.CoderResult;

/**
 * 

This is the low-level encoding API. For each flavor of encoding * there is an instance of this class that performs the actual * encoding. Overriding and implementing Encoders outside of the * OWASP Encoder's project is not currently supported.

* *

Unless otherwise documented, instances of these classes are * thread-safe. Encoders implementations do not generally carry * state, and if they do the state will be flush with a call to {@link * #encode(java.nio.CharBuffer, java.nio.CharBuffer, boolean)} with * {@code endOfInput} set to {@code true}.

* *

To use an Encoder instance directly, repeatedly call {@link * #encode(java.nio.CharBuffer, java.nio.CharBuffer, boolean)} with * the {@code endOfInput} parameter set to {@code false} while there * is (the possibility of) more input to encode. Once there is no * more input to encode, call {@link #encode(java.nio.CharBuffer, * java.nio.CharBuffer, boolean)} with {@code endOfInput} set to * {@code true} until the method returns {@link * java.nio.charset.CoderResult#UNDERFLOW}.

* *

In general, this class is not expected to be needed directly. * Use the {@link Encode} fluent interface for encoding Strings or * {@link EncodedWriter} for large blocks of contextual encoding.

* * @author Jeff Ichnowski * @see Encode * @see EncodedWriter */ public abstract class Encoder { /** * Hexadecimal conversion array. Package private to prevent corruption. */ static final char[] HEX = "0123456789abcdef".toCharArray(); /** * Bit-shift used for encoding values in hexadecimal. */ static final int HEX_SHIFT = 4; /** * Bit-mask used for encoding values in hexadecimal. */ static final int HEX_MASK = 0xf; /** * Package-private constructor to prevent having to support * external implementations of this class. This may be opened up * in future releases. */ Encoder() {} /** *

This is the kernel of encoding. Currently only CharBuffers * backed by arrays (i.e. {@link java.nio.CharBuffer#hasArray()} * returns {@code true}) are supported. Using a * direct-mapped CharBuffer will result in an * UnsupportedOperationException, though this behavior * may change in future releases.

* *

This method should be called repeatedly while {@code * endOfInput} set to {@code false} while there is more input. * Once there is no more input, this method should be called * {@code endOfInput} set to {@code false} until {@link * java.nio.charset.CoderResult#UNDERFLOW} is returned.

* *

After any call to this method, except when {@code * endOfInput} is {@code true} and the method returns {@code * UNDERFLOW}, there may be characters left to encode in the * {@code input} buffer (i.e. {@code input.hasRemaining() == * true}). This will happen when the encoder needs to see more * input before determining what to do--for example when encoding * for CDATA, if the input ends with {@code "foo]]"}, the encoder * will need to see the next character to determine if it is a ">" * or not.

* *

Example usage:

*
     *   CharBuffer input = CharBuffer.allocate(1024);
     *   CharBuffer output = CharBuffer.allocate(1024);
     *   CoderResult cr;
     *   // assuming doRead fills in the input buffer or
     *   // returns -1 at end of input
     *   while(doRead(input) != -1) {
     *     input.flip();
     *     for (;;) {
     *       cr = encoder.encode(input, output, false);
     *       if (cr.isUnderflow()) {
     *         break;
     *       }
     *       if (cr.isOverflow()) {
     *         // assuming doWrite flushes the encoded
     *         // characters somewhere.
     *         output.flip();
     *         doWrite(output);
     *         output.compact();
     *       }
     *     }
     *     input.compact();
     *   }
     *
     *   // at end of input
     *   input.flip();
     *   do {
     *     cr = encoder.encode(input, output, true);
     *     output.flip();
     *     doWrite(output);
     *     output.compact();
     *   } while (cr.isOverflow());
     * 
* * @param input the input buffer to encode * @param output the output buffer to receive the encoded results * @param endOfInput set to {@code true} if there is no more input, and any * remaining characters at the end of input will either be encoded or * replaced as invalid. * @return Either {@link java.nio.charset.CoderResult#UNDERFLOW} * or {@link java.nio.charset.CoderResult#OVERFLOW}. No other * CoderResult value will be returned. Characters or sequences * that might conceivably return and invalid or unmappable * character result (as part of the nio Charset API) are * automatically replaced to avoid security implications. */ public CoderResult encode(CharBuffer input, CharBuffer output, boolean endOfInput) { if (input.hasRemaining()) { if (input.hasArray() && output.hasArray()) { return encodeArrays(input, output, endOfInput); } else { return encodeBuffers(input, output, endOfInput); } } else { return CoderResult.UNDERFLOW; } } /** * The core encoding loop used when both the input and output buffers * are array backed. The loop is expected to fetch the arrays and * interact with the arrays directly for performance. * * @param input the input buffer. * @param output the output buffer. * @param endOfInput when true, this is the last input to encode * @return UNDERFLOW or OVERFLOW */ CoderResult encodeArrays(CharBuffer input, CharBuffer output, boolean endOfInput) { throw new UnsupportedOperationException(); } /** * The core encoding loop used when either or both input and output * buffers are NOT array-backed. E.g. they are direct buffers or * perhaps the input buffer is a read-only wrapper. In any case, * this method is not currently implemented by any of the encoder * implementations since it is not expected to be common use-case. * The stub is included here for completeness and to demarcate * where the non-array-backed use-case would be included. * * @param input the input buffer. * @param output the output buffer. * @param endOfInput when true, this is the last input to encode * @return never returns. * @throws UnsupportedOperationException -- always */ CoderResult encodeBuffers(CharBuffer input, CharBuffer output, boolean endOfInput) throws UnsupportedOperationException { throw new UnsupportedOperationException(); } /** * Returns the maximum encoded length (in chars) of an input sequence of * {@code n} characters. * * @param n the number of characters of input * @return the worst-case number of characters required to encode */ abstract int maxEncodedLength(int n); /** * Scans the input string for the first character index that requires * encoding. If the entire input does not require encoding then the * length is returned. This method is used by the Encode.forXYZ methods * to return input strings unchanged when possible. * * @param input the input to check for encoding * @param off the offset of the first character to check * @param len the number of characters to check * @return the index of the first character to encode. The return value * will be {@code off+len} if no characters in the input require encoding. */ abstract int firstEncodedOffset(String input, int off, int len); /** * Internal helper method to properly position buffers after encoding up * until an overflow. * * @param input the input buffer * @param i the array offset in the input buffer (translated to position) * @param output the output buffer * @param j the array offset in the output buffer (translated to position) * @return CoderResult.OVERFLOW */ static CoderResult overflow(CharBuffer input, int i, CharBuffer output, int j) { input.position(i - input.arrayOffset()); output.position(j - output.arrayOffset()); return CoderResult.OVERFLOW; } /** * Internal helper method to properly position buffers after encoding up * until an underflow. * * @param input the input buffer * @param i the array offset in the input buffer (translated to position) * @param output the output buffer * @param j the array offset in the output buffer (translated to position) * @return CoderResult.UNDERFLOW */ static CoderResult underflow(CharBuffer input, int i, CharBuffer output, int j) { input.position(i - input.arrayOffset()); output.position(j - output.arrayOffset()); return CoderResult.UNDERFLOW; } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy