org.apache.http.message.TokenParser Maven / Gradle / Ivy
/*
* ====================================================================
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
* ====================================================================
*
* This software consists of voluntary contributions made by many
* individuals on behalf of the Apache Software Foundation. For more
* information on the Apache Software Foundation, please see
* .
*
*/
package org.apache.http.message;
import java.util.BitSet;
import org.apache.http.annotation.ThreadingBehavior;
import org.apache.http.annotation.Contract;
import org.apache.http.util.CharArrayBuffer;
/**
* Low level parser for header field elements. The parsing routines of this class are designed
* to produce near zero intermediate garbage and make no intermediate copies of input data.
*
* This class is immutable and thread safe.
*
* @since 4.4
*/
@Contract(threading = ThreadingBehavior.IMMUTABLE)
public class TokenParser {
public static BitSet INIT_BITSET(final int ... b) {
final BitSet bitset = new BitSet();
for (final int aB : b) {
bitset.set(aB);
}
return bitset;
}
/** US-ASCII CR, carriage return (13) */
public static final char CR = '\r';
/** US-ASCII LF, line feed (10) */
public static final char LF = '\n';
/** US-ASCII SP, space (32) */
public static final char SP = ' ';
/** US-ASCII HT, horizontal-tab (9) */
public static final char HT = '\t';
/** Double quote */
public static final char DQUOTE = '\"';
/** Backward slash / escape character */
public static final char ESCAPE = '\\';
public static boolean isWhitespace(final char ch) {
return ch == SP || ch == HT || ch == CR || ch == LF;
}
public static final TokenParser INSTANCE = new TokenParser();
/**
* Extracts from the sequence of chars a token terminated with any of the given delimiters
* discarding semantically insignificant whitespace characters.
*
* @param buf buffer with the sequence of chars to be parsed
* @param cursor defines the bounds and current position of the buffer
* @param delimiters set of delimiting characters. Can be {@code null} if the token
* is not delimited by any character.
*/
public String parseToken(final CharArrayBuffer buf, final ParserCursor cursor, final BitSet delimiters) {
final StringBuilder dst = new StringBuilder();
boolean whitespace = false;
while (!cursor.atEnd()) {
final char current = buf.charAt(cursor.getPos());
if (delimiters != null && delimiters.get(current)) {
break;
} else if (isWhitespace(current)) {
skipWhiteSpace(buf, cursor);
whitespace = true;
} else {
if (whitespace && dst.length() > 0) {
dst.append(' ');
}
copyContent(buf, cursor, delimiters, dst);
whitespace = false;
}
}
return dst.toString();
}
/**
* Extracts from the sequence of chars a value which can be enclosed in quote marks and
* terminated with any of the given delimiters discarding semantically insignificant
* whitespace characters.
*
* @param buf buffer with the sequence of chars to be parsed
* @param cursor defines the bounds and current position of the buffer
* @param delimiters set of delimiting characters. Can be {@code null} if the value
* is not delimited by any character.
*/
public String parseValue(final CharArrayBuffer buf, final ParserCursor cursor, final BitSet delimiters) {
final StringBuilder dst = new StringBuilder();
boolean whitespace = false;
while (!cursor.atEnd()) {
final char current = buf.charAt(cursor.getPos());
if (delimiters != null && delimiters.get(current)) {
break;
} else if (isWhitespace(current)) {
skipWhiteSpace(buf, cursor);
whitespace = true;
} else if (current == DQUOTE) {
if (whitespace && dst.length() > 0) {
dst.append(' ');
}
copyQuotedContent(buf, cursor, dst);
whitespace = false;
} else {
if (whitespace && dst.length() > 0) {
dst.append(' ');
}
copyUnquotedContent(buf, cursor, delimiters, dst);
whitespace = false;
}
}
return dst.toString();
}
/**
* Skips semantically insignificant whitespace characters and moves the cursor to the closest
* non-whitespace character.
*
* @param buf buffer with the sequence of chars to be parsed
* @param cursor defines the bounds and current position of the buffer
*/
public void skipWhiteSpace(final CharArrayBuffer buf, final ParserCursor cursor) {
int pos = cursor.getPos();
final int indexFrom = cursor.getPos();
final int indexTo = cursor.getUpperBound();
for (int i = indexFrom; i < indexTo; i++) {
final char current = buf.charAt(i);
if (!isWhitespace(current)) {
break;
}
pos++;
}
cursor.updatePos(pos);
}
/**
* Transfers content into the destination buffer until a whitespace character or any of
* the given delimiters is encountered.
*
* @param buf buffer with the sequence of chars to be parsed
* @param cursor defines the bounds and current position of the buffer
* @param delimiters set of delimiting characters. Can be {@code null} if the value
* is delimited by a whitespace only.
* @param dst destination buffer
*/
public void copyContent(final CharArrayBuffer buf, final ParserCursor cursor, final BitSet delimiters,
final StringBuilder dst) {
int pos = cursor.getPos();
final int indexFrom = cursor.getPos();
final int indexTo = cursor.getUpperBound();
for (int i = indexFrom; i < indexTo; i++) {
final char current = buf.charAt(i);
if ((delimiters != null && delimiters.get(current)) || isWhitespace(current)) {
break;
}
pos++;
dst.append(current);
}
cursor.updatePos(pos);
}
/**
* Transfers content into the destination buffer until a whitespace character, a quote,
* or any of the given delimiters is encountered.
*
* @param buf buffer with the sequence of chars to be parsed
* @param cursor defines the bounds and current position of the buffer
* @param delimiters set of delimiting characters. Can be {@code null} if the value
* is delimited by a whitespace or a quote only.
* @param dst destination buffer
*/
public void copyUnquotedContent(final CharArrayBuffer buf, final ParserCursor cursor,
final BitSet delimiters, final StringBuilder dst) {
int pos = cursor.getPos();
final int indexFrom = cursor.getPos();
final int indexTo = cursor.getUpperBound();
for (int i = indexFrom; i < indexTo; i++) {
final char current = buf.charAt(i);
if ((delimiters != null && delimiters.get(current))
|| isWhitespace(current) || current == DQUOTE) {
break;
}
pos++;
dst.append(current);
}
cursor.updatePos(pos);
}
/**
* Transfers content enclosed with quote marks into the destination buffer.
*
* @param buf buffer with the sequence of chars to be parsed
* @param cursor defines the bounds and current position of the buffer
* @param dst destination buffer
*/
public void copyQuotedContent(final CharArrayBuffer buf, final ParserCursor cursor,
final StringBuilder dst) {
if (cursor.atEnd()) {
return;
}
int pos = cursor.getPos();
int indexFrom = cursor.getPos();
final int indexTo = cursor.getUpperBound();
char current = buf.charAt(pos);
if (current != DQUOTE) {
return;
}
pos++;
indexFrom++;
boolean escaped = false;
for (int i = indexFrom; i < indexTo; i++, pos++) {
current = buf.charAt(i);
if (escaped) {
if (current != DQUOTE && current != ESCAPE) {
dst.append(ESCAPE);
}
dst.append(current);
escaped = false;
} else {
if (current == DQUOTE) {
pos++;
break;
}
if (current == ESCAPE) {
escaped = true;
} else if (current != CR && current != LF) {
dst.append(current);
}
}
}
cursor.updatePos(pos);
}
}