All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.simpleframework.http.message.SegmentConsumer Maven / Gradle / Ivy

The newest version!
/*
 * SegmentConsumer.java February 2007
 *
 * Copyright (C) 2001, Niall Gallagher 
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 
 * implied. See the License for the specific language governing 
 * permissions and limitations under the License.
 */

package org.simpleframework.http.message;

import static org.simpleframework.http.Protocol.ACCEPT_LANGUAGE;
import static org.simpleframework.http.Protocol.CONTENT_DISPOSITION;
import static org.simpleframework.http.Protocol.CONTENT_LENGTH;
import static org.simpleframework.http.Protocol.CONTENT_TYPE;
import static org.simpleframework.http.Protocol.COOKIE;
import static org.simpleframework.http.Protocol.EXPECT;
import static org.simpleframework.http.Protocol.TRANSFER_ENCODING;

import java.io.IOException;
import java.util.Collections;
import java.util.List;
import java.util.Locale;

import org.simpleframework.http.ContentDisposition;
import org.simpleframework.http.ContentType;
import org.simpleframework.http.Cookie;
import org.simpleframework.http.parse.ContentDispositionParser;
import org.simpleframework.http.parse.ContentTypeParser;
import org.simpleframework.http.parse.CookieParser;
import org.simpleframework.http.parse.LanguageParser;

/**
 * The SegmentConsumer object provides a consumer that is
 * used to consume a HTTP header. This will read all headers within a
 * HTTP header message until the carriage return line feed empty line
 * is encountered. Once all headers are consumed they are available 
 * using the case insensitive header name. This will remove leading
 * and trailing whitespace from the names and values parsed.
 * 
 * @author Niall Gallagher
 */
public class SegmentConsumer extends ArrayConsumer implements Segment {
     
   /**
    * This is the terminal carriage return and line feed end line.
    */
   private static final byte[]  TERMINAL = { 13, 10, 13, 10 };
   
   /**
    * This is used to represent the content disposition header.
    */
   protected ContentDisposition disposition;
   
   /**
    * This is used to parse the languages accepted in the request.
    */
   protected LanguageParser language;
   
   /**
    * This is used to parse the cookie headers that are consumed.
    */
   protected CookieParser cookies;
   
   /**
    * This is used to store all consumed headers by the header name.
    */
   protected MessageHeader header;
   
   /**
    * This is used to parse the content type header consumed.
    */
   protected ContentType type;
   
   /**
    * This represents the transfer encoding value of the body.
    */
   protected String encoding;
   
   /**
    * During parsing this is used to store the parsed header name,
    */
   protected String name;   
   
   /**
    * During parsing this is used to store the parsed header value.
    */
   protected String value;
   
   /**
    * This is used to determine if there is a continue expected.
    */
   protected boolean expect;
   
   /**
    * Represents the length of the body from the content length.
    */
   protected long length;
   
   /**
    * This represents the length limit of the HTTP header cosumed.
    */
   protected long limit;
   
   /**
    * This is used to track the read offset within the header.
    */
   protected int pos;
   
   /**
    * This is used to track how much of the terminal is read.
    */
   protected int scan;
   
   /**
    * Constructor for the SegmentConsumer object. This
    * is used to create a segment consumer used to consume and parse
    * a HTTP message header. This delegates parsing of headers if
    * they represent special headers, like content type or cookies.
    */
   public SegmentConsumer() {
      this(1048576);
   }   
   
   /**
    * Constructor for the SegmentConsumer object. This
    * is used to create a segment consumer used to consume and parse
    * a HTTP message header. This delegates parsing of headers if
    * they represent special headers, like content type or cookies.
    * 
    * @param limit this is the length limit for a HTTP header
    */
   public SegmentConsumer(int limit) {
      this.language = new LanguageParser();
      this.cookies = new CookieParser();
      this.header = new MessageHeader();
      this.limit = limit;
      this.length = -1;
   }   
   
   /**
    * This method is used to determine the type of a part. Typically
    * a part is either a text parameter or a file. If this is true
    * then the content represented by the associated part is a file.
    *
    * @return this returns true if the associated part is a file
    */   
   public boolean isFile() {
      if(disposition == null) {
         return false;
      }
      return disposition.isFile();
   }
   
   /**
    * This method is used to acquire the name of the part. Typically
    * this is used when the part represents a text parameter rather
    * than a file. However, this can also be used with a file part.
    * 
    * @return this returns the name of the associated part
    */   
   public String getName() {
      if(disposition == null) {
         return null;
      }
      return disposition.getName();      
   }
   
   /**
    * This method is used to acquire the file name of the part. This
    * is used when the part represents a text parameter rather than 
    * a file. However, this can also be used with a file part.
    *
    * @return this returns the file name of the associated part
    */   
   public String getFileName() {
      if(disposition == null) {
         return null;
      }
      return disposition.getFileName();
   }
   
   /**
    * This is a convenience method that can be used to determine the 
    * content type of the message body. This will determine whether
    * there is a Content-Type header, if there is then
    * this will parse that header and represent it as a typed object
    * which will expose the various parts of the HTTP header.
    *
    * @return this returns the content type value if it exists
    */      
   public ContentType getContentType() {
      return type;
   }
   
   /**
    * This is a convenience method that can be used to determine
    * the length of the message body. This will determine if there
    * is a Content-Length header, if it does then the
    * length can be determined, if not then this returns -1.
    *
    * @return the content length, or -1 if it cannot be determined
    */   
   public long getContentLength() {
      return length;
   }
   
   /**
    * This is a convenience method that can be used to determine the 
    * content type of the message body. This will determine whether
    * there is a Transfer-Encoding header, if there is 
    * then this will parse that header and return the first token in
    * the comma separated list of values, which is the primary value.
    *
    * @return this returns the transfer encoding value if it exists
    */    
   public String getTransferEncoding() {
      return encoding;      
   }
   
   /**
    * This is a convenience method that can be used to determine the 
    * content type of the message body. This will determine whether
    * there is a Content-Disposition header, if there is 
    * this will parse that header and represent it as a typed object
    * which will expose the various parts of the HTTP header.
    *
    * @return this returns the content disposition value if it exists
    */    
   public ContentDisposition getDisposition() {
      return disposition;
   }
   
   /**
    * This is used to acquire the locales from the request header. The
    * locales are provided in the Accept-Language header.
    * This provides an indication as to the languages that the client 
    * accepts. It provides the locales in preference order.
    * 
    * @return this returns the locales preferred by the client
    */
   public List getLocales() {
      if(language != null) {
         return language.list();
      }
      return Collections.emptyList();           
   }
   
   /**
    * This can be used to get the values of HTTP message headers
    * that have the specified name. This is a convenience method that 
    * will present that values as tokens extracted from the header.
    * This has obvious performance benefits as it avoids having to 
    * deal with substring and trim calls.
    * 

* The tokens returned by this method are ordered according to * there HTTP quality values, or "q" values, see RFC 2616 section * 3.9. This also strips out the quality parameter from tokens * returned. So "image/html; q=0.9" results in "image/html". If * there are no "q" values present then order is by appearance. *

* The result from this is either the trimmed header value, that * is, the header value with no leading or trailing whitespace * or an array of trimmed tokens ordered with the most preferred * in the lower indexes, so index 0 is has highest preference. * * @param name the name of the headers that are to be retrieved * * @return ordered array of tokens extracted from the header(s) */ public List getValues(String name) { return header.getValues(name); } /** * This can be used to get the value of the first message header * that has the specified name. The value provided from this will * be trimmed so there is no need to modify the value, also if * the header name specified refers to a comma separated list of * values the value returned is the first value in that list. * This returns null if theres no HTTP message header. * * @param name the HTTP message header to get the value from * * @return this returns the value that the HTTP message header */ public String getValue(String name) { return header.getValue(name); } /** * This can be used to get the value of the first message header * that has the specified name. The value provided from this will * be trimmed so there is no need to modify the value, also if * the header name specified refers to a comma separated list of * values the value returned is the first value in that list. * This returns null if there is no HTTP message header. * * @param name the HTTP message header to get the value from * @param index acquires a specific header value from multiple * * @return this returns the value that the HTTP message header */ public String getValue(String name, int index) { return header.getValue(name, index); } /** * This is used to determine if the header represents one that * requires the HTTP/1.1 continue expectation. If the request * does require this expectation then it should be send the * 100 status code which prompts delivery of the message body. * * @return this returns true if a continue expectation exists */ public boolean isExpectContinue() { return expect; } /** * This method is used to add an additional chunk size to the * internal array. Resizing of the internal array is required as * the consumed bytes may exceed the initial size of the array. * In such a scenario the array is expanded the chunk size. * * @param size this is the minimum size to expand the array to */ @Override protected void resize(int size) throws IOException { if(size > limit) { throw new IOException("Header has exceeded maximum size"); } super.resize(size); } /** * This is used to process the headers when the terminal token * has been fully read from the consumed bytes. Processing will * extract all headers from the HTTP header message and further * parse those values if required. */ @Override protected void process() throws IOException { headers(); } /** * This is used to parse the headers from the consumed HTTP header * and add them to the segment. Once added they are available via * the header name in a case insensitive manner. If the header has * a special value, that is, if further information is required it * will be extracted and exposed in the segment interface. */ protected void headers() { while(pos < count) { header(); add(name, value); } } /** * This is used to parse a header from the consumed HTTP message * and add them to the segment. Once added it is available via * the header name in a case insensitive manner. If the header has * a special value, that is, if further information is required it * will be extracted and exposed in the segment interface. */ private void header() { adjust(); name(); adjust(); value(); end(); } /** * This is used to add the name and value specified as a special * header within the segment. Special headers are those where * there are values of interest to the segment. For instance the * Content-Length, Content-Type, and Cookie headers are parsed * using an external parser to extract the values. * * @param name this is the name of the header to be added * @param value this is the value of the header to be added */ protected void add(String name, String value) { if(equal(ACCEPT_LANGUAGE, name)) { language(value); }else if(equal(CONTENT_LENGTH, name)) { length(value); } else if(equal(CONTENT_TYPE, name)) { type(value); } else if(equal(CONTENT_DISPOSITION, name)) { disposition(value); } else if(equal(TRANSFER_ENCODING, name)) { encoding(value); } else if(equal(EXPECT, name)) { expect(value); } else if(equal(COOKIE, name)) { cookie(value); } header.addValue(name, value); } /** * This is used to determine if the expect continue header is * present and thus there is a requirement to send the continue * status before the client sends the request body. This will * basically assume the expectation is always continue. * * @param value the value in the expect continue header */ protected void expect(String value) { expect = true; } /** * This will accept any cookie header and parse it such that all * cookies within it are converted to Cookie objects * and made available as typed objects. If the value can not be * parsed this will not add the cookie value. * * @param value this is the value of the cookie to be parsed */ protected void cookie(String value) { cookies.parse(value); for(Cookie cookie : cookies) { header.setCookie(cookie); } } /** * This is used to parse the Accept-Language header * value. This allows the locales the client is interested in to * be provided in preference order and allows the client do alter * and response based on the locale the client has provided. * * @param value this is the value that is to be parsed */ protected void language(String value) { language = new LanguageParser(value); } /** * This is used to parse the content type header header so that * the MIME type is available to the segment. This provides an * instance of the ContentType object to represent * the content type header, which exposes the charset value. * * @param value this is the content type value to parse */ protected void type(String value) { type = new ContentTypeParser(value); } /** * This is used to parse the content disposition header header so * that the MIME type is available to the segment. This provides * an instance of the Disposition object to represent * the content disposition, this exposes the upload type. * * @param value this is the content type value to parse */ protected void disposition(String value) { disposition = new ContentDispositionParser(value); } /** * This is used to store the transfer encoding header value. This * is used to determine the encoding of the body this segment * represents. Typically this will be the chunked encoding. * * @param value this is the value representing the encoding */ protected void encoding(String value) { encoding = value; } /** * This is used to parse a provided header value for the content * length. If the string provided is not an integer value this will * throw a number format exception, by default length is -1. * * @param value this is the header value of the content length */ protected void length(String value) { try { length = Long.parseLong(value); }catch(Exception e) { length = -1; } } /** * This updates the token for the header name. The name is parsed * according to the presence of a colon ':'. Once a colon character * is encountered then this header name is considered to be read * from the buffer and is used to key the value after the colon. */ private void name() { Token token = new Token(pos, 0); while(pos < count){ if(array[pos] == ':') { pos++; break; } token.size++; pos++; } name = token.text(); } /** * This is used to parse the HTTP header value. This will parse it * in such a way that the line can be folded over several lines * see RFC 2616 for the syntax of a folded line. The folded line * is basically a way to wrap a single HTTP header into several * lines using a tab at the start of the following line to indicate * that the header flows onto the next line. */ private void value() { Token token = new Token(pos, 0); scan: for(int mark = 0; pos < count;){ if(terminal(array[pos])) { /* CR or LF */ for(int i = 0; pos < count; i++){ if(array[pos++] == 10) { /* skip the LF */ if(pos < array.length) { if(space(array[pos])) { mark += i + 1; /* account for bytes examined */ break; /* folding line */ } } break scan; /* not a folding line */ } } } else { if(!space(array[pos])){ token.size = ++mark; } else { mark++; } pos++; } } value = token.text(); } /** * This will update the offset variable so that the next read will * be of a non whitespace character. According to RFC 2616 a white * space character is a tab or a space. This will remove multiple * occurrences of whitespace characters until an non-whitespace * character is encountered. */ protected void adjust() { while(pos < count) { if(!space(array[pos])){ break; } pos++; } } /** * This will update the offset variable so that the next read will * be a non whitespace character or terminal character. According to * RFC 2616 a white space character is a tab or a space. This will * remove multiple occurrences of whitespace characters until an * non-whitespace character or a non-terminal is encountered. This * is basically used to follow through to the end of a header line. */ protected void end() { while(pos < count) { if(!white(array[pos])){ break; } pos++; } } /** * This method is used to scan for the terminal token. It searches * for the token and returns the number of bytes in the buffer * after the terminal token. Returning the excess bytes allows the * consumer to reset the bytes within the consumer object. * * @return this returns the number of excess bytes consumed */ @Override protected int scan() { int length = count; while(pos < count) { if(array[pos++] != TERMINAL[scan++]) { scan = 0; } if(scan == TERMINAL.length) { done = true; count = pos; pos = 0; return length - count; } } return 0; } /** * This is used to determine if two header names are equal, this is * done to ensure that the case insensitivity of HTTP header names * is observed. Special headers are processed using this consumer * and this is used to ensure the correct header is always matched. * * @param name this is the name to compare the parsed token with * @param token this is the header name token to examine * * @return true of the header name token is equal to the name */ protected boolean equal(String name, String token) { return name.equalsIgnoreCase(token); } /** * This identifies a given ISO-8859-1 byte as a space character. A * space is either a space or a tab character in ISO-8859-1. * * @param octet the byte to determine whether it is a space * * @return true if it is a space character, false otherwise */ protected boolean space(byte octet) { return octet == ' ' || octet == '\t'; } /** * This determines if an ISO-8859-1 byte is a terminal character. A * terminal character is a carriage return or a line feed character. * * @param octet the byte to determine whether it is a terminal * * @return true if it is a terminal character, false otherwise */ protected boolean terminal(byte octet){ return octet == 13 || octet == 10; } /** * This is used to determine if a given ISO-8859-1 byte is a white * space character, such as a tab or space or a terminal character, * such as a carriage return or a new line. If it is, this will * return true otherwise it returns false. * * @param octet this is to be checked to see if it is a space * * @return true if the byte is a space character, false otherwise */ protected boolean white(byte octet) { switch(octet) { case ' ': case '\r': case '\n': case '\t': return true; default: return false; } } /** * This is used to provide a string representation of the header * read. Providing a string representation of the header is used * so that on debugging the contents of the delivered header can * be inspected in order to determine a cause of error. * * @return this returns a string representation of the header */ @Override public String toString() { return new String(array, 0, count); } /** * This is used to track the boundaries of a token so that it can * be converted in to a usable string. This will track the length * and offset within the consumed array of the token. When the * token is to be used it can be converted in to a string. */ private class Token { /** * This is used to track the number of bytes within the array. */ public int size; /** * This is used to mark the start offset within the array. */ public int off; /** * Constructor for the Token object. This is used * to create a new token to track the range of bytes that will * be used to create a string representing the parsed value. * * @param off the starting offset for the token range * @param size the number of bytes used for the token */ public Token(int off, int size) { this.off = off; this.size = size; } /** * This is used to convert the byte range to a string. This * will use UTF-8 encoding for the string which is compatible * with the HTTP default header encoding of ISO-8859-1. * * @return the encoded string representing the token */ public String text() { return text("UTF-8"); } /** * This is used to convert the byte range to a string. This * will use specified encoding, if that encoding is not * supported then this will return null for the token value. * * @return the encoded string representing the token */ public String text(String charset) { try { return new String(array, off, size, charset); } catch(IOException e) { return null; } } } }