
org.simpleframework.http.parse.ContentParser Maven / Gradle / Ivy
/*
* ContentParser.java February 2001
*
* Copyright (C) 2001, Niall Gallagher
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General
* Public License along with this library; if not, write to the
* Free Software Foundation, Inc., 59 Temple Place, Suite 330,
* Boston, MA 02111-1307 USA
*/
package org.simpleframework.http.parse;
import org.simpleframework.http.ContentType;
import org.simpleframework.util.KeyMap;
import org.simpleframework.util.parse.ParseBuffer;
import org.simpleframework.util.parse.Parser;
/**
* This provides access to the MIME type parts, that is the primary
* type, the secondary type and an optional character set parameter.
* The charset
parameter is one of many parameters that
* can be associated with a MIME type. This however this exposes this
* parameter with a typed method.
*
* The getCharset
will return the character encoding the
* content type is encoded within. This allows the user of the content
* to decode it correctly. Other parameters can be acquired from this
* by simply providing the name of the parameter.
*
* @author Niall Gallagher
*/
public class ContentParser extends Parser implements ContentType {
/**
* Used to store the characters consumed for the subtype.
*/
private ParseBuffer subtype;
/**
* Used to store the characters for the charset
.
*/
private ParseBuffer charset;
/**
* Used to store the characters consumed for the type.
*/
private ParseBuffer type;
/**
* Used to collect the name of a content type parameter.
*/
private ParseBuffer name;
/**
* Used to collect the value of the content type parameter.
*/
private ParseBuffer value;
/**
* Used to store the name value pairs of the parameters.
*/
private KeyMap map;
/**
* The default constructor will create a ContentParser
* that contains no charset, type or subtype. This can be used to
* extract the type, subtype and the optional charset
* parameter by using the parser's parse(String)
* method.
*/
public ContentParser(){
this.subtype = new ParseBuffer();
this.charset = new ParseBuffer();
this.value = new ParseBuffer();
this.name = new ParseBuffer();
this.type = new ParseBuffer();
this.map = new KeyMap();
}
/**
* This is primarily a convenience constructor. This will parse
* the String
given to extract the MIME type. This
* could be achived by calling the default no-arg constructor
* and then using the instance to invoke the parse
* method on that String
.
*
* @param header String
containing a MIME type value
*/
public ContentParser(String header){
this();
parse(header);
}
/**
* This sets the primary type to whatever value is in the string
* provided is. If the string is null then this will contain a
* null string for the primary type of the parameter, which is
* likely invalid in most cases.
*
* @param primary the type to set for the primary type of this
*/
public void setPrimary(String primary) {
type.reset(primary);
}
/**
* This is used to retrieve the primary type of this MIME type. The
* primary type part within the MIME type defines the generic type.
* For example text/plain; charset=UTF-8
. This will
* return the text value. If there is no primary type then this
* will return null
otherwise the string value.
*
* @return the primary type part of this MIME type
*/
public String getPrimary() {
return type.toString();
}
/**
* This sets the secondary type to whatever value is in the string
* provided is. If the string is null then this will contain a
* null string for the secondary type of the parameter, which is
* likely invalid in most cases.
*
* @param type the type to set for the primary type of this
*/
public void setSecondary(String type) {
subtype.reset(type);
}
/**
* This is used to retrieve the secondary type of this MIME type.
* The secondary type part within the MIME type defines the generic
* type. For example text/html; charset=UTF-8
. This
* will return the HTML value. If there is no secondary type then
* this will return null
otherwise the string value.
*
* @return the primary type part of this MIME type
*/
public String getSecondary(){
return subtype.toString();
}
/**
* This will set the charset
to whatever value the
* string contains. If the string is null then this will not set
* the parameter to any value and the toString
method
* will not contain any details of the parameter.
*
* @param enc parameter value to add to the MIME type
*/
public void setCharset(String enc) {
charset.reset(enc);
}
/**
* This is used to retrieve the charset
of this MIME
* type. This is a special parameter associated with the type, if
* the parameter is not contained within the type then this will
* return null, which typically means the default of ISO-8859-1.
*
* @return the value that this parameter contains
*/
public String getCharset() {
return charset.toString();
}
/**
* This is used to retrieve an arbitrary parameter from the MIME
* type header. This ensures that values for boundary
* or other such parameters are not lost when the header is parsed.
* This will return the value, unquoted if required, as a string.
*
* @param name this is the name of the parameter to be retrieved
*
* @return this is the value for the parameter, or null if empty
*/
public String getParameter(String name) {
return map.get(name);
}
/**
* This will add a named parameter to the content type header. If
* a parameter of the specified name has already been added to the
* header then that value will be replaced by the new value given.
* Parameters such as the boundary
as well as other
* common parameters can be set with this method.
*
* @param name this is the name of the parameter to be added
* @param value this is the value to associate with the name
*/
public void setParameter(String name, String value) {
map.put(name, value);
}
/**
* This is used to remove all whitespace characters from the
* String
excluding the whitespace within literals.
* The definition of a literal can be found in RFC 2616.
*
* The definition of a literal for RFC 2616 is anything between 2
* quotes but excuding quotes that are prefixed with the backward
* slash character.
*/
private void pack() {
int len = count;
int seek = 0;
int pos = 0;
char old = buf[0];
while(seek < len){
char ch = buf[seek++];
if(ch == '"' && old != '\\'){ /* qd-text*/
buf[pos++] = ch;
while(seek < len){
old = buf[seek-1];
ch = buf[seek++];
buf[pos++] = ch;
if(ch =='"'&& old!='\\'){ /*qd-text*/
break;
}
}
}else if(!space(ch)){
old = buf[seek - 1];
buf[pos++] = old;
}
}
count = pos;
}
/**
* This will initialize the parser when it is ready to parse
* a new String
. This will reset the parser to a
* ready state. The init method is invoked by the parser when
* the Parser.parse
method is invoked.
*/
protected void init(){
pack();
type.clear();
subtype.clear();
charset.clear();
name.clear();
value.clear();
map.clear();
off = 0;
}
/**
* Reads and parses the MIME type from the given String
* object. This uses the syntax defined by RFC 2616 for the media-type
* syntax. This parser is only concerned with one parameter, the
* charset
parameter. The syntax for thhe media type is
*
* media-type = token "/" token *( ";" parameter )
* parameter = token | literal
*
*/
protected void parse(){
type();
off++;
subtype();
parameters();
}
/**
* This reads the type from the MIME type. This will fill the
* type ParseBuffer
. This will read all chars
* upto but not including the first instance of a '/'. The type
* of a media-type as defined by RFC 2616 is
* type/subtype;param=val;param2=val
.
*/
private void type(){
while(off < count){
if(buf[off] =='/'){
break;
}
type.append(buf[off]);
off++;
}
}
/**
* This reads the subtype from the MIME type. This will fill the
* subtype ParseBuffer
. This will read all chars
* upto but not including the first instance of a ';'. The subtype
* of a media-type as defined by RFC 2616 is
* type/subtype;param=val;param2=val
.
*/
private void subtype(){
while(off < count){
if(buf[off] ==';'){
break;
}
subtype.append(buf[off]);
off++;
}
}
/**
* This will read the parameters from the MIME type. This will search
* for the charset
parameter within the set of parameters
* which are given to the type. The charset
param is the
* only parameter that this parser will tokenize.
*
* This will remove any parameters that preceed the charset parameter.
* Once the charset
is retrived the MIME type is considered
* to be parsed.
*/
private void parameters(){
while(skip(";")){
if(skip("charset=")){
charset();
break;
}else{
parameter();
insert();
}
}
}
/**
* This will add the name and value tokens to the parameters map.
* If any previous value of the given name has been inserted
* into the map then this will overwrite that value. This is
* used to ensure that the string value is inserted to the map.
*/
private void insert() {
insert(name, value);
}
/**
* This will add the given name and value to the parameters map.
* If any previous value of the given name has been inserted
* into the map then this will overwrite that value. This is
* used to ensure that the string value is inserted to the map.
*
* @param name this is the name of the value to be inserted
* @param value this is the value of a that is to be inserted
*/
private void insert(ParseBuffer name, ParseBuffer value) {
map.put(name.toString(), value.toString());
}
/**
* This is a parameter as defined by RFC 2616. The parameter is added to a
* MIME type e.g. type/subtype;param=val
etc. The parameter
* name and value are not stored. This is used to simply update the read
* offset past the parameter. The reason for reading the parameters is to
* search for the charset
parameter which will indicate the
* encoding.
*/
private void parameter(){
name();
off++; /* = */
value();
}
/**
* This will simply read all characters from the buffer before the first '='
* character. This represents a parameter name (see RFC 2616 for token). The
* parameter name is not buffered it is simply read from the buffer. This will
* not cause an IndexOutOfBoundsException
as each offset
* is checked before it is acccessed.
*/
private void name(){
while(off < count){
if(buf[off] =='='){
break;
}
name.append(buf[off]);
off++;
}
}
/**
* This is used to read a parameters value from the buf. This will read all
* char
's upto but excluding the first terminal char
* encountered from the off within the buf, or if the value is a literal
* it will read a literal from the buffer (literal is any data between
* quotes except if the quote is prefixed with a backward slash character).
*/
private void value(){
if(quote(buf[off])){
for(off++; off < count;){
if(quote(buf[off])){
if(buf[++off-2]!='\\'){
break;
}
}
value.append(buf[off++]);
}
}else{
while(off < count){
if(buf[off] ==';') {
break;
}
value.append(buf[off]);
off++;
}
}
}
/**
* This method is used to determine if the specified character is a quote
* character. The quote character is typically used as a boundary for the
* values within the header. This accepts a single or double quote.
*
* @param ch the character to determine if it is a quotation
*
* @return true if the character provided is a quotation character
*/
private boolean quote(char ch) {
return ch == '\'' || ch == '"';
}
/**
* This is used to read the value from the charset
param.
* This will fill the charset
ParseBuffer
and with
* the charset
value. This will read a literal or a token as
* the charset
value. If the charset
is a literal
* then the quotes will be read as part of the charset.
*/
private void charset(){
if(buf[off] == '"'){
charset.append('"');
for(off++; off < count;){
charset.append(buf[off]);
if(buf[off++]=='"')
if(buf[off-2]!='\\'){
break;
}
}
}else{
while(off < count){
if(buf[off]==';') {
break;
}
charset.append(buf[off]);
off++;
}
}
}
/**
* This will return the value of the MIME type as a string. This
* will concatenate the primary and secondary type values and
* add the charset
parameter to the type which will
* recreate the content type.
*
* @return this returns the string representation of the type
*/
private String encode() {
StringBuilder text = new StringBuilder();
if(type != null) {
text.append(type);
text.append("/");
text.append(subtype);
}
if(charset.length() > 0) {
text.append("; charset=");
text.append(charset);
}
return encode(text);
}
/**
* This will return the value of the MIME type as a string. This
* will concatenate the primary and secondary type values and
* add the charset
parameter to the type which will
* recreate the content type.
*
* @param text this is the buffer to encode the parameters to
*
* @return this returns the string representation of the type
*/
private String encode(StringBuilder text) {
for(String name : map) {
String value = map.get(name);
text.append("; ");
text.append(name);
if(value != null) {
text.append("=");
text.append(value);;
}
}
return text.toString();
}
/**
* This will return the value of the MIME type as a string. This
* will concatenate the primary and secondary type values and
* add the charset
parameter to the type which will
* recreate the content type.
*
* @return this returns the string representation of the type
*/
public String toString() {
return encode();
}
}