All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.eclipse.jetty.util.CharsetStringBuilder Maven / Gradle / Ivy

There is a newer version: 2.0.32
Show newest version
//
// ========================================================================
// Copyright (c) 1995 Mort Bay Consulting Pty Ltd and others.
//
// This program and the accompanying materials are made available under the
// terms of the Eclipse Public License v. 2.0 which is available at
// https://www.eclipse.org/legal/epl-2.0, or the Apache License, Version 2.0
// which is available at https://www.apache.org/licenses/LICENSE-2.0.
//
// SPDX-License-Identifier: EPL-2.0 OR Apache-2.0
// ========================================================================
//

package org.eclipse.jetty.util;

import java.nio.ByteBuffer;
import java.nio.charset.CharacterCodingException;
import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.CodingErrorAction;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import java.util.Objects;

/**
 * 

Build a string from a sequence of bytes and/or characters.

*

Implementations of this interface are optimized for processing a mix of calls to already decoded * character based appends (e.g. {@link #append(char)} and calls to undecoded byte methods (e.g. {@link #append(byte)}. * This is particularly useful for decoding % encoded strings that are mostly already decoded but may contain * escaped byte sequences that are not decoded. The standard {@link CharsetDecoder} API is not well suited for this * use-case.

*

Any coding errors in the string will be reported by a {@link CharacterCodingException} thrown * from the {@link #build()} method.

* @see Utf8StringBuilder for UTF-8 decoding with replacement of coding errors and/or fast fail behaviour. * @see CharsetDecoder for decoding arbitrary {@link Charset}s with control over {@link CodingErrorAction}. */ public interface CharsetStringBuilder { /** * @param b An encoded byte to append */ void append(byte b); /** * @param c A decoded character to append */ void append(char c); /** * @param bytes Array of encoded bytes to append */ default void append(byte[] bytes) { append(bytes, 0, bytes.length); } /** * @param b Array of encoded bytes * @param offset offset into the array * @param length the number of bytes to append from the array. */ default void append(byte[] b, int offset, int length) { int end = offset + length; for (int i = offset; i < end; i++) append(b[i]); } /** * @param chars sequence of decoded characters * @param offset offset into the array * @param length the number of character to append from the sequence. */ default void append(CharSequence chars, int offset, int length) { int end = offset + length; for (int i = offset; i < end; i++) append(chars.charAt(i)); } /** * @param buf Buffer of encoded bytes to append. The bytes are consumed from the buffer. */ default void append(ByteBuffer buf) { int end = buf.position() + buf.remaining(); while (buf.position() < end) append(buf.get()); } /** *

Build the completed string and reset the buffer.

* @return The decoded built string which must be complete in regard to any multibyte sequences. * @throws CharacterCodingException If the bytes cannot be correctly decoded or a multibyte sequence is incomplete. */ String build() throws CharacterCodingException; /** * @return the length in characters */ int length(); /** *

Resets this sequence to be empty.

*/ void reset(); /** * @param charset The charset * @return A {@link CharsetStringBuilder} suitable for the charset. */ static CharsetStringBuilder forCharset(Charset charset) { Objects.requireNonNull(charset); if (charset == StandardCharsets.UTF_8) return new Utf8StringBuilder(); if (charset == StandardCharsets.ISO_8859_1) return new Iso88591StringBuilder(); if (charset == StandardCharsets.US_ASCII) return new UsAsciiStringBuilder(); // Use a CharsetDecoder that defaults to CodingErrorAction#REPORT return new DecoderStringBuilder(charset.newDecoder()); } /** * Extended Utf8StringBuilder that mimics {@link CodingErrorAction#REPORT} behaviour * for {@link CharsetStringBuilder} methods. */ class ReportingUtf8StringBuilder extends Utf8StringBuilder { @Override public String toCompleteString() { if (hasCodingErrors()) throw new RuntimeException(new CharacterCodingException()); return super.toCompleteString(); } @Override public String build() throws CharacterCodingException { if (hasCodingErrors()) throw new CharacterCodingException(); return super.build(); } } class Iso88591StringBuilder implements CharsetStringBuilder { private final StringBuilder _builder = new StringBuilder(); @Override public void append(byte b) { _builder.append((char)(0xff & b)); } @Override public void append(char c) { _builder.append(c); } @Override public void append(CharSequence chars, int offset, int length) { _builder.append(chars, offset, offset + length); } @Override public String build() { String s = _builder.toString(); _builder.setLength(0); return s; } @Override public int length() { return _builder.length(); } @Override public void reset() { _builder.setLength(0); } } class UsAsciiStringBuilder implements CharsetStringBuilder { private final StringBuilder _builder = new StringBuilder(); @Override public void append(byte b) { if (b < 0) throw new IllegalArgumentException(); _builder.append((char)b); } @Override public void append(char c) { _builder.append(c); } @Override public void append(CharSequence chars, int offset, int length) { _builder.append(chars, offset, offset + length); } @Override public String build() { String s = _builder.toString(); _builder.setLength(0); return s; } @Override public int length() { return _builder.length(); } @Override public void reset() { _builder.setLength(0); } } class DecoderStringBuilder implements CharsetStringBuilder { private final CharsetDecoder _decoder; private final StringBuilder _stringBuilder = new StringBuilder(32); private ByteBuffer _buffer = ByteBuffer.allocate(32); public DecoderStringBuilder(CharsetDecoder charsetDecoder) { _decoder = charsetDecoder; } private void ensureSpace(int needed) { int space = _buffer.remaining(); if (space < needed) { int position = _buffer.position(); _buffer = ByteBuffer.wrap(Arrays.copyOf(_buffer.array(), _buffer.capacity() + needed - space + 32)).position(position); } } @Override public void append(byte b) { ensureSpace(1); _buffer.put(b); } @Override public void append(char c) { if (_buffer.position() > 0) { try { // Append any data already in the decoder _stringBuilder.append(_decoder.decode(_buffer.flip())); _buffer.clear(); } catch (CharacterCodingException e) { // This will be thrown only if the decoder is configured to REPORT, // otherwise errors will be ignored or replaced and we will not catch here. throw new RuntimeException(e); } } _stringBuilder.append(c); } @Override public void append(CharSequence chars, int offset, int length) { if (_buffer.position() > 0) { try { // Append any data already in the decoder _stringBuilder.append(_decoder.decode(_buffer.flip())); _buffer.clear(); } catch (CharacterCodingException e) { // This will be thrown only if the decoder is configured to REPORT, // otherwise errors will be ignored or replaced and we will not catch here. throw new RuntimeException(e); } } _stringBuilder.append(chars, offset, offset + length); } @Override public void append(byte[] b, int offset, int length) { ensureSpace(length); _buffer.put(b, offset, length); } @Override public void append(ByteBuffer buf) { ensureSpace(buf.remaining()); _buffer.put(buf); } @Override public String build() throws CharacterCodingException { try { if (_buffer.position() > 0) { CharSequence decoded = _decoder.decode(_buffer.flip()); _buffer.clear(); if (_stringBuilder.isEmpty()) return decoded.toString(); _stringBuilder.append(decoded); } return _stringBuilder.toString(); } finally { _stringBuilder.setLength(0); } } @Override public int length() { return _stringBuilder.length(); } @Override public void reset() { _stringBuilder.setLength(0); } } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy