org.simpleframework.http.parse.CookieParser Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of simple-http Show documentation
Simple is a high performance asynchronous HTTP framework for Java
The newest version!
/*
 * CookieParser.java February 2001
 *
 * Copyright (C) 2001, Niall Gallagher 
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 
 * implied. See the License for the specific language governing 
 * permissions and limitations under the License.
 */

package org.simpleframework.http.parse;

import org.simpleframework.common.parse.Parser;
import org.simpleframework.http.Cookie;

import java.util.Iterator;

/**
 * CookieParser is used to parse the cookie header. The cookie header is
 * one of the headers that is used by the HTTP state management mechanism.
 * The Cookie header is the header that is sent from the client to the
 * server in response to a Set-Cookie header. The syntax of the Cookie
 * header as taken from RFC 2109, HTTP State Management Mechanism.
 *  *
 *  cookie          =       "Cookie:" cookie-version
 *                          1*((";" | ",") cookie-value)
 *  cookie-value    =       NAME "=" VALUE [";" path] [";" domain]
 *  cookie-version  =       "$Version" "=" value
 *  NAME            =       attr
 *  VALUE           =       value
 *  path            =       "$Path" "=" value
 *  domain          =       "$Domain" "=" value
 *
 * 
 * The cookie header may consist of several cookies. Each cookie can be
 * extracted from the header by examining the it syntax of the cookie
 * header. The syntax of the cookie header is defined in RFC 2109.
 * 
 * Each cookie has a $Version attribute followed by multiple
 * cookies. Each contains a name and a value, followed  by an optional
 * $Path and $Domain attribute. This will parse
 * a given cookie header and return each cookie extracted as a
 * Cookie object.
 *
 * @author Niall Gallagher
 */
public class CookieParser extends Parser implements Iterable {

   /**
    * Determines when the Parser has finished.
    */
   private boolean finished;

   /**
    * Used so the Parser does not parse twice.
    */
   private boolean parsed;

   /**
    * Version of the Cookie being parsed.
    */
   private int version;

   /**
    * Used to store the name of the Cookie.
    */
   private Token name;

   /**
    * Used to store the value of the Cookie.
    */
   private Token value;

   /**
    * Used to store the $Path values.
    */
   private Token path;

   /**
    * Used to store the $Domain values.
    */
   private Token domain;

   /**
    * Create a CookieParser that contains no cookies.
    * the instance will return false for the
    * hasNext method. cookies may be parsed using
    * this instance by using the parse method.
    */
   public CookieParser(){
      this.path = new Token();
      this.domain = new Token();
      this.name = new Token();
      this.value = new Token();
      this.finished = true;
   }

   /**
    * This is primarily a convineance constructor. This will parse the
    * String given to extract the cookies. This could be
    * achived by calling the default no-arg constructor and then using
    * the instance to invoke the parse method on that
    * String.
    *
    * @param header a String containing a cookie value
    */
   public CookieParser(String header){
      this();
      parse(header);
   }

   /**
    * Resets the cookie and the buffer variables for this
    * CookieParser. It is used to set the
    * state of the parser to start parsing a new cookie.
    */
   protected void init() {
      finished = false;
      parsed =false;
      version = 0;
      off = 0;
      version();
   }

   /**
    * This will extract the next Cookie from the
    * buffer. If all the characters in the buffer have already
    * been examined then this method will simply do nothing.
    * Otherwise this will parse the remainder of the buffer
    * and (if it follows RFC 2109) produce a Cookie.
    */
   protected void parse() {
      if(!finished){
         cookie();
         parsed=true;
      }
   }

   /**
    * This is used to skip an arbitrary String within the
    * char buf. It checks the length of the String
    * first to ensure that it will not go out of bounds. A comparison
    * is then made with the buffers contents and the String
    * if the reigon in the buffer matched the String then the
    * offset within the buffer is increased by the String's
    * length so that it has effectively skipped it.
    * 

    * This skip method will ignore all of the whitespace text.
    * This will also skip trailing spaces within the the input text and
    * all spaces within the source text. For example if the input was
    * the string "s omete xt" and the source was "some text to skip" then
    * the result of a skip ignoring spaces would be "to skip" in the
    * source string, as the trailing spaces are also eaten by this.
    *
    * @param text this is the String value to be skipped
    *
    * @return true if the String was skipped
    */
   protected boolean skip(String text){      
      int size = text.length();
      int seek = off;
      int read = 0;

      if(off + size > count){
         return false;
      }
      while(read < size) {
         char a = text.charAt(read);
         char b = buf[seek];

         if(space(b)){
            if(++seek >= count){
               return false;
            }
         }else if(space(a)){
            if(++read >= size) {
               continue;
            }
         }else {
            if(toLower(a) != toLower(b)){
               return false;
            }
            read++;
            seek++;
         }
      }
      for(off = seek; off < count; off++){
         if(!space(buf[off]))
            break;
      }
      return true;
   }
   
   /**
    * This is used to acquire the cookie values from the provided 
    * the provided source text. This allows the cookie parser to be
    * used within a for each loop to parse out the values of a
    * cookie one by one so that they may be used or stored.
    * 
    * @return this returns an iterator for extracting cookie value
    */
   public Iterator iterator() {
      return new Sequence();
   }

   /**
    * This is used so that the collection of Cookies
    * can be reiterated. This allows the collection to be reused.
    * The reset method will invoke the super classes
    * init method. This will reinitialize this
    * Parser so the cookie will be reparsed.
    */
   public void reset() {
      init();
      parse();
   }

   /**
    * Creates the Cookie from the token objects. It is
    * assumed that the Cookie String has
    * been parsed when this is called. This should only be used after
    * the parse method has been called.
    * 

    * If there is no $Domain or $Path
    * within  the Cookie String then the
    * getDomain and getPath are null.
    *
    * @return the Cookie that was just parsed
    */
   private Cookie getCookie() {
      return getCookie(name.toString(),
         value.toString());
   }

   /**
    * Creates the Cookie from the token objects. It is
    * assumed that the Cookie String has
    * been parsed when this is called. This should only be used after
    * the parse method has been called.
    * 

    * If there is no $Domain or $Path
    * within  the Cookie String then the
    * getDomain and getPath are null.
    *
    * @param name the name that the Cookie contains
    * @param value the value that the Cookie contains
    *
    * @return the Cookie that was just parsed
    */
   private Cookie getCookie(String name, String value) {
      Cookie cookie = new Cookie(name, value, false);
      
      if(domain.len > 0) {
         cookie.setDomain(domain.toString());
      }
      if(path.len > 0) {
         cookie.setPath(path.toString());
      }
      cookie.setVersion(version);
      return cookie;
   }

   /**
    * This is used to parse a Cookie from the buffer
    * that contains the Cookie values. This will first
    * try to remove any trailing value after the version/prev
    * Cookie once this is removed it will extract the
    * name/value pair from the Cookie. The name and
    * value of the Cookie will be saved by the name
    * and value tokens.
    */
   private void cookie(){
      if(!skip(",")){ /* ,|; */
         skip(";");
      }
      name();
      skip("="); /* = */
      value();
   }

   /**
    * This initializes the name token and extracts the name of this
    * Cookie. The offset and length of the name will be
    * saved in the name token. This will read all char's
    * upto but excluding the first '=' char encountered
    * from the off within the buffer.
    */
   private void name() {
      name.off = off;
      name.len = 0;
      while(off < count){
         if(buf[off] == '='){
            break;
         }
         name.len++;
         off++;
      }
   }

   /**
    * Used to extract everything found after the NAME '='
    * within a Cookie. This extracts the Cookie
    * value the $Path and $Domain attributes
    * if they exist (i.e. $Path and $Domain
    * are optional in a cookie see RFC 2109).
    * 

    * The path method reads the terminal found before it as does the
    * domain method that is ";$Path" is read as the first
    * part of the path method. This is because if there is no path the
    * parser should not read data it does not know belongs to a specific
    * part of the Cookie.
    */
   private void value() {
      data();
      path();
      domain();
   }

   /**
    * This initializes the value token and extracts the value of this
    * Cookie. The offset and length of the value will be
    * saved in the value token. This will read all char's
    * upto but excluding the first terminal char encountered from the
    * off within the buffer, or if the value is a literal it will read
    * a literal from the buffer (literal is any data between quotes
    * except if the quote is prefixed with a backward slash character
    * that is '\').
    */
   private void data() {
      value.off = off;
      value.len = 0;
      if(off < count && buf[off] == '"'){
         value.len++;
         for(off++; off < count;){
            value.len++;
            if(buf[off++]=='"')
               if(buf[off-2]!='\\'){
                  break;
               }
         }
         value.len-=2;  /* remove " */
         value.off++; /* remove " */
      }else {
         while(off < count){
            if(terminal(buf[off]))
               break;
            value.len++;
            off++;
         }
      }
   }

   /**
    * This initializes the path token and extracts the $Path
    * of this Cookie. The offset and length of the path will
    * be saved in the path token. This will read all char's
    * up to but excluding the first terminal char encountered
    * from the off within the buffer, or if the value is a
    * literal it will read a literal from the buffer (literal is any data
    * between quotes except if the quote is prefixed with a backward slash
    * character, that is '\').
    * 

    * This reads the terminal before the $Path so that if
    * there is no $Path for the Cookie then
    * the character before it will not be read needlessly.
    */
   private void path() {
      path.len = 0; /* reset */
      if(skip(";$Path=")){
         path.off = off;
         if(buf[off] == '"'){
            path.len++;
            for(off++; off < count;){
               path.len++;
               if(buf[off++]=='"')
                  if(buf[off-2]!='\\'){
                     break;
                  }
            }
            path.len-=2;  /* remove " */
            path.off++; /* remove " */
         }else{
            while(off < count){
               if(terminal(buf[off]))
                  break;
               path.len++;
               off++;
            }
         }
      }
   }

   /**
    * Initializes the domain token and extracts the $Domain
    * of this Cookie. The offset and length of the domain
    * will be saved in the path token. This will read all characters up
    * to but excluding the first terminal char encountered
    * from the off within the buffer, or  if the value is a literal it
    * will read a literal from the buffer (literal is any data between
    * quotes except if the quote is prefixed with a backward slash
    * character, that is '\').
    * 

    * This reads the terminal before the $Domain so that
    * if there is  no $Domain for the Cookie
    * then the character before it will not be read needlessly.
    */
   private void domain(){
      domain.len = 0;   /* reset */
      if(skip(";$Domain=")) {
         domain.off = off;
         if(buf[off] == '"'){
            domain.len++;
            for(off++; off < count;){
               domain.len++;
               if(buf[off++]=='"')
                  if(buf[off-2]!='\\'){
                     break;
                  }
            }
            domain.len-=2;  /* remove " */
            domain.off++; /* remove " */
         }else{
            while(off < count){
               if(terminal(buf[off]))
                  break;
               domain.len++;
               off++;
            }
         }
      }
   }

   /**
    * This extracts the $Version of this Cookie.
    * The version is parsed and converted into a decimal int from the digit
    * characters that make up a version.
    * 

    * This will read all digit char's up to but excluding the
    * first non digit char that it encounters from the offset
    * within the buffer, or if the value is a literal it will read a literal
    * from the buffer (literal is any data between quotes except if the quote
    * is prefixed with a backward slash character i.e. '\').
    */
   private void version(){
      if(skip("$Version=")) {
         if(buf[off] == '"'){
            off++;
         }
         while(off < count){
            if(!digit(buf[off])){
               break;
            }
            version *= 10;
            version += buf[off];
            version -= '0';
            off++;
         }
         if(buf[off] == '"'){
            off++;
         }
      }else{
         version = 1;
      }
   }

   /**
    * This is used to determine if a given  iso8859-1 character is
    * a terminal character. That is either the ';' or ','
    * characters. Although the RFC 2109 says the terminal can be
    * either a comma, it is not used by any browsers.
    *
    * @param ch the character that is to be compared
    *
    * @return true if this is a semicolon character
    */
   private boolean terminal(char ch) {
      return ch == ';'; 
   }

   /**
    * This is used to represent an Iterator that will 
    * iterate over the available cookies within the provided source
    * text. This allows the cookie parser to be used as an iterable
    * with for each loops. Cookies can not be removed with this.
    */
   private class Sequence implements Iterator {
      
      /**
       * Extracts the next Cookie object from the string
       * given. This will return null when there are no
       * more cookies left in the String being parsed.
       * 
       * To find out when there are no more cookies left use the
       * hasNext method. This will only set the name,
       * value, path, domain name version of the cookie
       * because as of RFC 2109 these are the only attributes a
       * Cookie may have, the path and domain are
       * optional.
       *
       * @return an initialized Cookie object
       */
      public Cookie next(){
         if(!hasNext()) {
            return null;
         }
         parsed = false;
         return getCookie();
      }
      

      /**
       * Determine whether or not there are any Cookies
       * left in the String. This will attempt to extract
       * another Cookie from the String and
       * cache the result so the next method will produce
       * this Cookie. If another Cookie cannot
       * be parsed from the remainder of the String then
       * this will return false otherwise it will return
       * true.
       *
       * @return true if there are more cookies false otherwise
       */
      public boolean hasNext(){
         if(finished) {
            return false;
         }
         if(parsed) {
            return true;
         }
         parse();
         
         if(name.len <=0){
            finished = true;
            return false;
         }
         return true;

      }
      
      /**
       * This method is used to remove items from the iterator. This
       * however performs no action as the act of parsing should not
       * modify the underlying source text value so that it can be 
       * reset with the reset method and used again.
       */
      public void remove() {
         return;
      }
   }
   
   /**
    * This is a token object that is used to store the offset and
    * length of a region of chars in the CookieParser.buf
    * array. The toString method of this token will
    * produce the String value of the region it
    * represents.
    */
   private class Token {

      /**
       * The numer of characters that were consumed by this token.
       */
      public int len;

      /**
       * The offset within the buffer that this token starts from.
       */
      public int off;

      /**
       * This converts region within the buffer to a String.
       * This converts the region only if there is a sufficient length.
       *
       * @return the String value of the region
       */
      public String toString(){
         return new String(buf,off,len);
      }
   }
}