All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.myfaces.trinidadinternal.el.Tokenizer Maven / Gradle / Ivy

/*
 *  Licensed to the Apache Software Foundation (ASF) under one
 *  or more contributor license agreements.  See the NOTICE file
 *  distributed with this work for additional information
 *  regarding copyright ownership.  The ASF licenses this file
 *  to you under the Apache License, Version 2.0 (the
 *  "License"); you may not use this file except in compliance
 *  with the License.  You may obtain a copy of the License at
 * 
 *  http://www.apache.org/licenses/LICENSE-2.0
 * 
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an
 *  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 *  KIND, either express or implied.  See the License for the
 *  specific language governing permissions and limitations
 *  under the License.
 */
package org.apache.myfaces.trinidadinternal.el;

import java.util.Iterator;
import java.util.NoSuchElementException;


/**
 * converts a EL expression into tokens.
 */
public class Tokenizer implements Iterator
{
  /**
   * Creates a new Tokenizer
   * @param expression the expression to tokenize
   */
  public Tokenizer(String expression)
  {
    _exp = expression;
    _curToken = _next();
  }

  public void remove()
  {
    throw new UnsupportedOperationException();
  }
    
  public boolean hasNext()
  {
    return _curToken != null;
  }
    
  public Tokenizer.Token next()
  {
    if (_curToken == null)
      throw new NoSuchElementException();
    Token tok = _curToken;
    _curToken = _next();
    return tok;
  }
    
  private Token _next()
  {
    int size = _exp.length();
    // if there are no more characters to consume then we cannot return any
    // tokens:
    if (_curIndex >= size)
      return null;
  
    final Token tok;
    // in the initial state everything is just text, and we are searching
    // for the beginning of an expression (searching for "#{" ):
    if (_state == _STATE_INIT)
    {
      int loc = _exp.indexOf("#{", _curIndex);
      if (loc < 0)
      {
        // if we do not find the beginning of an expression, then the entire
        // string is treated as one big text token:
        loc = size;
      }
      _state = _STATE_EXP_START;
      if (loc > _curIndex)
      {
        // there was some text before the beginning of the expression.
        // create a text token from that text:
        tok = new Token(TEXT_TYPE, _curIndex, loc);
        _curIndex = loc;
      }
      else
      {
        // there was no text before the beginning of the expression.
        // create a token for the expression-begin characters #{ :
        tok = _next();
      }
    }// _STATE_INIT
    else if (_state == _STATE_EXP_START)
    {
      // create a token for the expression-begin characters #{  :
      int end = _curIndex + 2; // skip over #{
      tok = new Token(EXP_START_TYPE, _curIndex, end);
      _curIndex = end;
      _state = _STATE_EXP;
    }
    else if (_state == _STATE_EXP)
    {
      // once we are inside an expression we can have:
      // 1. whitespace
      // 2. numbers
      // 3. alphas
      // 4. quotes
      // 5. operators
      // 6. end of expression character "}"
      int start = _curIndex;
      char ch = _exp.charAt(_curIndex++);
      if (Character.isWhitespace(ch))
       tok = _readWhiteSpace(start);
      else if (Character.isDigit(ch)) 
        tok = _readNumber(start);
      else if (Character.isJavaIdentifierStart(ch))
        tok = _readVar(start);
      else if ('}' == ch)
      {
        tok = new Token(EXP_END_TYPE, start, _curIndex);
        _state = _STATE_INIT;
      }
      else if (('"' == ch) || ('\'' == ch))
        tok = _readQuoted(start, ch);
      else // everything else is treated as an operator:
        tok = new Token(OPER_TYPE, start, _curIndex);
    } // _STATE_EXP
    else
      throw new AssertionError("invalid state:"+_state);
    
    
    return tok;
  }

  /**
   * creates a quote token.
   * @param start the start index of the quotation
   * @param quote the quote character that delimits this quotation.
   */
  private Token _readQuoted(int start, char quote)
  {
    // we start at start+1  since we want to skip over the starting
    // quotation delimiter:
    for(int end = start+1;;)
    {
      // search for the ending quotation character:
      end = _exp.indexOf(quote, end);
      if (end >= 0)
      {
        end++;
        // if the ending quotation character was preceded by a back-slash
        // then ignore it; it is escaped.
        if (_exp.charAt(end-2) == '\\')
        {
          continue;
        }
        _curIndex = end;
        return new Token(QUOTED_TYPE, start, end);
      }
      else
        throw _fatal("Cannot find matching quote", start);
    }
  }
  
  private RuntimeException _fatal(String message, int charPos)
  {
    return new IllegalArgumentException(message+". character position:"+charPos);
  }
    
  /**
   * Creates a token for variables
   * @param start the starting index of a variable usage
   */
  private Token _readVar(int start)
  {
    // we skip over the first character since we know that is an alpha already:
    for(int end=start+1;;)
    {
      // search for the end of this variable usage:
      end = _getLastLocOfAlpha(end);
      // if the variable was followed by a period, then append the next variable
      // as well, since this is part of a long dot separated variable usage like:
      // "a.b.c"
      if (_exp.charAt(end) == '.')
        end++;
      else
      {
        _curIndex = end;
        break;
      }
    }
    return new Token(VAR_TYPE, start, _curIndex);
  }
  
  private int _getLastLocOfAlpha(int start)
  {
    while(Character.isJavaIdentifierPart(_exp.charAt(start)))
      start++;
    return start;
  }
    
  /**
   * reads a number token.
   * @param start the starting index of a number
   */
  private Token _readNumber(int start)
  {
    // we can skip the first character since we know it is a number already:
    int end = _getLastLocOfDigit(start+1);
    // if this is a decimal number then append it to this token:
    if (_exp.charAt(end) == '.')
      end = _getLastLocOfDigit(end +1);
    _curIndex = end;
    return new Token(NUMBER_TYPE, start, end);
  }
    

  private int _getLastLocOfDigit(int start)
  {
    while(Character.isDigit(_exp.charAt(start)))
      start++;
    return start;
  }
    
  /**
   * creates a whitespace token
   * @param start the starting index of the first whitespace character
   */
  private Token _readWhiteSpace(int start)
  {
    while(Character.isWhitespace(_exp.charAt(_curIndex)))
    {
      _curIndex++;
    }
    return new Token(WHITE_SPACE_TYPE, start, _curIndex);
  }
    
  private final String _exp;
  private int _curIndex = 0;
  private int _state = _STATE_INIT;
  private Token _curToken = null;
  
  public final class Token
  {
    public final int type;
    private final int _start, _end;
    
    private Token(int type, int start, int end)
    {
      this.type = type;
      _start = start;
      _end = end;
    }
    
    public String getText()
    {
      return _exp.substring(_start, _end);
    }
  }
  
  
  private static final int _STATE_INIT = 100;
  private static final int _STATE_EXP_START = 200;
  private static final int _STATE_EXP = 300;
  
  
  /**
   * Identifies some plain text
   */
  public static final int TEXT_TYPE = 100;
  
  /**
   * Identifies the expression start string.
   * This string is two charaters long and is: "#{"
   */
  public static final int EXP_START_TYPE = 200;
  
  /**
   * Identifies the expression end character.
   * This is character is "}"
   */
  public static final int EXP_END_TYPE = 300;
  
  /**
   * Identifies a series of whitespace characters
   */
  public static final int WHITE_SPACE_TYPE = 400;
  
  /**
   * Identifies a quotation. 
   * Eg: "quotation", 'quotation', 'he said, "foo!"'  ,
   * "it's it". 
   * More examples:   "foo \" bar" ,  'foo \' bar'
   */
  public static final int QUOTED_TYPE = 500;
  
  /**
   * Identifies a variable usage.
   * Eg: "row", "row.name", "foo.bar.baz"
   */
  public static final int VAR_TYPE = 600;
  
  /**
   * identifies a number
   * eg:  "123", "321.321"
   */
  public static final int NUMBER_TYPE = 700;
  
  /**
   * Identifies an operator.
   * eg: ">", "[", "+", "?"
   */
  public static final int OPER_TYPE = 800;
  
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy