All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.http.message.TokenParser Maven / Gradle / Ivy

The newest version!
/*
 * ====================================================================
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 * ====================================================================
 *
 * This software consists of voluntary contributions made by many
 * individuals on behalf of the Apache Software Foundation.  For more
 * information on the Apache Software Foundation, please see
 * .
 *
 */

package org.apache.http.message;

import java.util.BitSet;

import org.apache.http.annotation.Immutable;
import org.apache.http.util.CharArrayBuffer;

/**
 * Low level parser for header field elements. The parsing routines of this class are designed
 * to produce near zero intermediate garbage and make no intermediate copies of input data.
 * 

* This class is immutable and thread safe. * * @since 4.4 */ @Immutable public class TokenParser { public static BitSet INIT_BITSET(final int ... b) { final BitSet bitset = new BitSet(); for (final int aB : b) { bitset.set(aB); } return bitset; } /** US-ASCII CR, carriage return (13) */ public static final char CR = '\r'; /** US-ASCII LF, line feed (10) */ public static final char LF = '\n'; /** US-ASCII SP, space (32) */ public static final char SP = ' '; /** US-ASCII HT, horizontal-tab (9) */ public static final char HT = '\t'; /** Double quote */ public static final char DQUOTE = '\"'; /** Backward slash / escape character */ public static final char ESCAPE = '\\'; public static boolean isWhitespace(final char ch) { return ch == SP || ch == HT || ch == CR || ch == LF; } public static final TokenParser INSTANCE = new TokenParser(); /** * Extracts from the sequence of chars a token terminated with any of the given delimiters * discarding semantically insignificant whitespace characters. * * @param buf buffer with the sequence of chars to be parsed * @param cursor defines the bounds and current position of the buffer * @param delimiters set of delimiting characters. Can be {@code null} if the token * is not delimited by any character. */ public String parseToken(final CharArrayBuffer buf, final ParserCursor cursor, final BitSet delimiters) { final StringBuilder dst = new StringBuilder(); boolean whitespace = false; while (!cursor.atEnd()) { final char current = buf.charAt(cursor.getPos()); if (delimiters != null && delimiters.get(current)) { break; } else if (isWhitespace(current)) { skipWhiteSpace(buf, cursor); whitespace = true; } else { if (whitespace && dst.length() > 0) { dst.append(' '); } copyContent(buf, cursor, delimiters, dst); whitespace = false; } } return dst.toString(); } /** * Extracts from the sequence of chars a value which can be enclosed in quote marks and * terminated with any of the given delimiters discarding semantically insignificant * whitespace characters. * * @param buf buffer with the sequence of chars to be parsed * @param cursor defines the bounds and current position of the buffer * @param delimiters set of delimiting characters. Can be {@code null} if the value * is not delimited by any character. */ public String parseValue(final CharArrayBuffer buf, final ParserCursor cursor, final BitSet delimiters) { final StringBuilder dst = new StringBuilder(); boolean whitespace = false; while (!cursor.atEnd()) { final char current = buf.charAt(cursor.getPos()); if (delimiters != null && delimiters.get(current)) { break; } else if (isWhitespace(current)) { skipWhiteSpace(buf, cursor); whitespace = true; } else if (current == DQUOTE) { if (whitespace && dst.length() > 0) { dst.append(' '); } copyQuotedContent(buf, cursor, dst); whitespace = false; } else { if (whitespace && dst.length() > 0) { dst.append(' '); } copyUnquotedContent(buf, cursor, delimiters, dst); whitespace = false; } } return dst.toString(); } /** * Skips semantically insignificant whitespace characters and moves the cursor to the closest * non-whitespace character. * * @param buf buffer with the sequence of chars to be parsed * @param cursor defines the bounds and current position of the buffer */ public void skipWhiteSpace(final CharArrayBuffer buf, final ParserCursor cursor) { int pos = cursor.getPos(); final int indexFrom = cursor.getPos(); final int indexTo = cursor.getUpperBound(); for (int i = indexFrom; i < indexTo; i++) { final char current = buf.charAt(i); if (!isWhitespace(current)) { break; } else { pos++; } } cursor.updatePos(pos); } /** * Transfers content into the destination buffer until a whitespace character or any of * the given delimiters is encountered. * * @param buf buffer with the sequence of chars to be parsed * @param cursor defines the bounds and current position of the buffer * @param delimiters set of delimiting characters. Can be {@code null} if the value * is delimited by a whitespace only. * @param dst destination buffer */ public void copyContent(final CharArrayBuffer buf, final ParserCursor cursor, final BitSet delimiters, final StringBuilder dst) { int pos = cursor.getPos(); final int indexFrom = cursor.getPos(); final int indexTo = cursor.getUpperBound(); for (int i = indexFrom; i < indexTo; i++) { final char current = buf.charAt(i); if ((delimiters != null && delimiters.get(current)) || isWhitespace(current)) { break; } else { pos++; dst.append(current); } } cursor.updatePos(pos); } /** * Transfers content into the destination buffer until a whitespace character, a quote, * or any of the given delimiters is encountered. * * @param buf buffer with the sequence of chars to be parsed * @param cursor defines the bounds and current position of the buffer * @param delimiters set of delimiting characters. Can be {@code null} if the value * is delimited by a whitespace or a quote only. * @param dst destination buffer */ public void copyUnquotedContent(final CharArrayBuffer buf, final ParserCursor cursor, final BitSet delimiters, final StringBuilder dst) { int pos = cursor.getPos(); final int indexFrom = cursor.getPos(); final int indexTo = cursor.getUpperBound(); for (int i = indexFrom; i < indexTo; i++) { final char current = buf.charAt(i); if ((delimiters != null && delimiters.get(current)) || isWhitespace(current) || current == DQUOTE) { break; } else { pos++; dst.append(current); } } cursor.updatePos(pos); } /** * Transfers content enclosed with quote marks into the destination buffer. * * @param buf buffer with the sequence of chars to be parsed * @param cursor defines the bounds and current position of the buffer * @param dst destination buffer */ public void copyQuotedContent(final CharArrayBuffer buf, final ParserCursor cursor, final StringBuilder dst) { if (cursor.atEnd()) { return; } int pos = cursor.getPos(); int indexFrom = cursor.getPos(); final int indexTo = cursor.getUpperBound(); char current = buf.charAt(pos); if (current != DQUOTE) { return; } pos++; indexFrom++; boolean escaped = false; for (int i = indexFrom; i < indexTo; i++, pos++) { current = buf.charAt(i); if (escaped) { if (current != DQUOTE && current != ESCAPE) { dst.append(ESCAPE); } dst.append(current); escaped = false; } else { if (current == DQUOTE) { pos++; break; } if (current == ESCAPE) { escaped = true; } else if (current != CR && current != LF) { dst.append(current); } } } cursor.updatePos(pos); } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy