All Downloads are FREE. Search and download functionalities are using the official Maven repository.

patsy.FormulaParser Maven / Gradle / Ivy

There is a newer version: 1.2.8
Show newest version
/*
 * Copyright (c) 2021 Villu Ruusmann
 *
 * This file is part of JPMML-Python
 *
 * JPMML-Python is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * JPMML-Python is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Affero General Public License for more details.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with JPMML-Python.  If not, see .
 */
package patsy;

import java.util.ArrayDeque;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Deque;
import java.util.EnumSet;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;

import org.jpmml.python.ParseException;
import org.jpmml.python.PushbackPythonParserTokenManager;
import org.jpmml.python.PythonParserConstants;
import org.jpmml.python.SimpleCharStream;
import org.jpmml.python.StringProvider;
import org.jpmml.python.Token;

public class FormulaParser {

	static
	public void main(String... args) throws Exception {
		PatsyTerm patsyTerm = parseFormula(args[0]);

		System.out.println(patsyTerm);
	}

	static
	public PatsyTerm parseFormula(String string) throws ParseException {
		SimpleCharStream simpleCharStream = new SimpleCharStream(new StringProvider(string));

		PushbackPythonParserTokenManager tokenManager = new PushbackPythonParserTokenManager(simpleCharStream);

		EnumSet privatePatsyOperators = EnumSet.of(PatsyOperator.OPEN_PAREN);

		EnumSet patsyOperators = EnumSet.complementOf(privatePatsyOperators);

		Set patsyOperatorTokens = patsyOperators.stream()
			.map(patsyOperator -> patsyOperator.getKind())
			.collect(Collectors.toSet());

		List patsyTokens = tokenizeFormula(tokenManager, patsyOperatorTokens);

		return infixParse(patsyTokens, patsyOperators);
	}

	static
	private PatsyTerm infixParse(List patsyTokens, Set patsyOperators) throws ParseException {
		Map unaryPatsyOperators = new HashMap<>();
		Map binaryPatsyOperators = new HashMap<>();

		for(PatsyOperator patsyOperator : patsyOperators){
			int kind = patsyOperator.getKind();
			int arity = patsyOperator.getArity();

			switch(arity){
				case 1:
					unaryPatsyOperators.put(kind, patsyOperator);
					break;
				case 2:
					binaryPatsyOperators.put(kind, patsyOperator);
					break;
				default:
					throw new ParseException();
			}
		}

		boolean wantNoun = true;

		Deque nounStack = new ArrayDeque<>();
		Deque opStack = new ArrayDeque<>();

		for(PatsyToken patsyToken : patsyTokens){

			if(wantNoun){
				wantNoun = readNoun(patsyToken, nounStack, opStack, unaryPatsyOperators, binaryPatsyOperators);
			} else

			{
				wantNoun = readOp(patsyToken, nounStack, opStack, unaryPatsyOperators, binaryPatsyOperators);
			}
		}

		while(!opStack.isEmpty()){

			if(opStack.peek() == PatsyOperator.OPEN_PAREN){
				throw new ParseException();
			}

			runOp(nounStack, opStack);
		}

		if(nounStack.size() != 1){
			throw new ParseException();
		}

		return nounStack.pop();
	}

	static
	private boolean readNoun(PatsyToken patsyToken, Deque nounStack, Deque opStack, Map unaryPatsyOperators, Map binaryPatsyOperators) throws ParseException {
		int kind = patsyToken.getKind();

		if(kind == PythonParserConstants.LPAREN){
			opStack.push(PatsyOperator.OPEN_PAREN);

			return true;
		}

		PatsyOperator unaryPatsyOperator = unaryPatsyOperators.get(kind);
		if(unaryPatsyOperator != null){
			opStack.push(unaryPatsyOperator);

			return true;
		} // End if

		if(kind == -1){
			nounStack.push(new PatsyFactor(patsyToken));

			return false;
		} else

		{
			throw new ParseException();
		}
	}

	static
	private boolean readOp(PatsyToken patsyToken, Deque nounStack, Deque opStack, Map unaryPatsyOperators, Map binaryPatsyOperators) throws ParseException {
		int kind = patsyToken.getKind();

		if(kind == PythonParserConstants.RPAREN){

			while(!opStack.isEmpty() && (opStack.peek() != PatsyOperator.OPEN_PAREN)){
				runOp(nounStack, opStack);
			}

			if(opStack.isEmpty()){
				throw new ParseException();
			} // End if

			if(opStack.peek() != PatsyOperator.OPEN_PAREN){
				throw new ParseException();
			}

			opStack.pop();

			return false;
		}

		PatsyOperator binaryPatsyOperator = binaryPatsyOperators.get(kind);
		if(binaryPatsyOperator != null){

			while(!opStack.isEmpty() && Integer.compare(binaryPatsyOperator.getPrecedence(), (opStack.peek()).getPrecedence()) <= 0){
				runOp(nounStack, opStack);
			}

			opStack.push(binaryPatsyOperator);

			return true;
		} else

		{
			throw new ParseException();
		}
	}

	static
	private void runOp(Deque nounStack, Deque opStack){
		PatsyOperator patsyOperator = opStack.pop();

		List patsyTerms = new ArrayList<>();

		for(int i = 0; i < patsyOperator.getArity(); i++){
			patsyTerms.add(nounStack.pop());
		}

		Collections.reverse(patsyTerms);

		nounStack.push(new PatsyOperation(patsyOperator, patsyTerms));
	}

	static
	private List tokenizeFormula(PushbackPythonParserTokenManager tokenManager, Set patsyOperatorTokens) throws ParseException {
		List result = new ArrayList<>();

		patsyOperatorTokens.add(PythonParserConstants.LPAREN);
		patsyOperatorTokens.add(PythonParserConstants.RPAREN);

		Set pythonEndTokens = new HashSet<>(patsyOperatorTokens);
		pythonEndTokens.remove(PythonParserConstants.LPAREN);

		tokens:
		while(true){
			Token token = tokenManager.getNextToken();

			if(token.kind == PythonParserConstants.EOF){
				break tokens;
			} // End if

			if(patsyOperatorTokens.contains(token.kind)){
				result.add(new PatsyToken(token));
			} else

			{
				tokenManager.pushBack(token);

				List tokens = readPythonExpr(tokenManager, pythonEndTokens);
				if(tokens.isEmpty()){
					throw new ParseException();
				}

				result.add(new PatsyToken(tokens));
			}
		}

		return result;
	}

	static
	private List readPythonExpr(PushbackPythonParserTokenManager tokenManager, Set pythonEndTokens) throws ParseException {
		List result = new ArrayList<>();

		int bracketLevel = 0;

		tokens:
		while(true){
			Token token = tokenManager.getNextToken();

			if(token.kind == PythonParserConstants.EOF){
				tokenManager.pushBack(token);

				break tokens;
			} // End if

			if(bracketLevel == 0){

				if(pythonEndTokens.contains(token.kind)){
					tokenManager.pushBack(token);

					break tokens;
				}
			}

			switch(token.kind){
				case PythonParserConstants.LPAREN:
				case PythonParserConstants.LBRACKET:
					{
						bracketLevel++;
					}
					break;
				case PythonParserConstants.RPAREN:
				case PythonParserConstants.RBRACKET:
					{
						bracketLevel--;

						if(bracketLevel < 0){
							throw new ParseException();
						}
					}
					break;
				default:
					break;
			}

			result.add(token);
		}

		if(bracketLevel != 0){
			throw new ParseException();
		}

		return result;
	}
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy