
patsy.FormulaParser Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of pmml-python Show documentation
Show all versions of pmml-python Show documentation
JPMML Python to PMML converter
/*
* Copyright (c) 2021 Villu Ruusmann
*
* This file is part of JPMML-Python
*
* JPMML-Python is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* JPMML-Python is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with JPMML-Python. If not, see .
*/
package patsy;
import java.util.ArrayDeque;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Deque;
import java.util.EnumSet;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
import org.jpmml.python.ParseException;
import org.jpmml.python.PushbackPythonParserTokenManager;
import org.jpmml.python.PythonParserConstants;
import org.jpmml.python.SimpleCharStream;
import org.jpmml.python.StringProvider;
import org.jpmml.python.Token;
public class FormulaParser {
static
public void main(String... args) throws Exception {
PatsyTerm patsyTerm = parseFormula(args[0]);
System.out.println(patsyTerm);
}
static
public PatsyTerm parseFormula(String string) throws ParseException {
SimpleCharStream simpleCharStream = new SimpleCharStream(new StringProvider(string));
PushbackPythonParserTokenManager tokenManager = new PushbackPythonParserTokenManager(simpleCharStream);
EnumSet privatePatsyOperators = EnumSet.of(PatsyOperator.OPEN_PAREN);
EnumSet patsyOperators = EnumSet.complementOf(privatePatsyOperators);
Set patsyOperatorTokens = patsyOperators.stream()
.map(patsyOperator -> patsyOperator.getKind())
.collect(Collectors.toSet());
List patsyTokens = tokenizeFormula(tokenManager, patsyOperatorTokens);
return infixParse(patsyTokens, patsyOperators);
}
static
private PatsyTerm infixParse(List patsyTokens, Set patsyOperators) throws ParseException {
Map unaryPatsyOperators = new HashMap<>();
Map binaryPatsyOperators = new HashMap<>();
for(PatsyOperator patsyOperator : patsyOperators){
int kind = patsyOperator.getKind();
int arity = patsyOperator.getArity();
switch(arity){
case 1:
unaryPatsyOperators.put(kind, patsyOperator);
break;
case 2:
binaryPatsyOperators.put(kind, patsyOperator);
break;
default:
throw new ParseException();
}
}
boolean wantNoun = true;
Deque nounStack = new ArrayDeque<>();
Deque opStack = new ArrayDeque<>();
for(PatsyToken patsyToken : patsyTokens){
if(wantNoun){
wantNoun = readNoun(patsyToken, nounStack, opStack, unaryPatsyOperators, binaryPatsyOperators);
} else
{
wantNoun = readOp(patsyToken, nounStack, opStack, unaryPatsyOperators, binaryPatsyOperators);
}
}
while(!opStack.isEmpty()){
if(opStack.peek() == PatsyOperator.OPEN_PAREN){
throw new ParseException();
}
runOp(nounStack, opStack);
}
if(nounStack.size() != 1){
throw new ParseException();
}
return nounStack.pop();
}
static
private boolean readNoun(PatsyToken patsyToken, Deque nounStack, Deque opStack, Map unaryPatsyOperators, Map binaryPatsyOperators) throws ParseException {
int kind = patsyToken.getKind();
if(kind == PythonParserConstants.LPAREN){
opStack.push(PatsyOperator.OPEN_PAREN);
return true;
}
PatsyOperator unaryPatsyOperator = unaryPatsyOperators.get(kind);
if(unaryPatsyOperator != null){
opStack.push(unaryPatsyOperator);
return true;
} // End if
if(kind == -1){
nounStack.push(new PatsyFactor(patsyToken));
return false;
} else
{
throw new ParseException();
}
}
static
private boolean readOp(PatsyToken patsyToken, Deque nounStack, Deque opStack, Map unaryPatsyOperators, Map binaryPatsyOperators) throws ParseException {
int kind = patsyToken.getKind();
if(kind == PythonParserConstants.RPAREN){
while(!opStack.isEmpty() && (opStack.peek() != PatsyOperator.OPEN_PAREN)){
runOp(nounStack, opStack);
}
if(opStack.isEmpty()){
throw new ParseException();
} // End if
if(opStack.peek() != PatsyOperator.OPEN_PAREN){
throw new ParseException();
}
opStack.pop();
return false;
}
PatsyOperator binaryPatsyOperator = binaryPatsyOperators.get(kind);
if(binaryPatsyOperator != null){
while(!opStack.isEmpty() && Integer.compare(binaryPatsyOperator.getPrecedence(), (opStack.peek()).getPrecedence()) <= 0){
runOp(nounStack, opStack);
}
opStack.push(binaryPatsyOperator);
return true;
} else
{
throw new ParseException();
}
}
static
private void runOp(Deque nounStack, Deque opStack){
PatsyOperator patsyOperator = opStack.pop();
List patsyTerms = new ArrayList<>();
for(int i = 0; i < patsyOperator.getArity(); i++){
patsyTerms.add(nounStack.pop());
}
Collections.reverse(patsyTerms);
nounStack.push(new PatsyOperation(patsyOperator, patsyTerms));
}
static
private List tokenizeFormula(PushbackPythonParserTokenManager tokenManager, Set patsyOperatorTokens) throws ParseException {
List result = new ArrayList<>();
patsyOperatorTokens.add(PythonParserConstants.LPAREN);
patsyOperatorTokens.add(PythonParserConstants.RPAREN);
Set pythonEndTokens = new HashSet<>(patsyOperatorTokens);
pythonEndTokens.remove(PythonParserConstants.LPAREN);
tokens:
while(true){
Token token = tokenManager.getNextToken();
if(token.kind == PythonParserConstants.EOF){
break tokens;
} // End if
if(patsyOperatorTokens.contains(token.kind)){
result.add(new PatsyToken(token));
} else
{
tokenManager.pushBack(token);
List tokens = readPythonExpr(tokenManager, pythonEndTokens);
if(tokens.isEmpty()){
throw new ParseException();
}
result.add(new PatsyToken(tokens));
}
}
return result;
}
static
private List readPythonExpr(PushbackPythonParserTokenManager tokenManager, Set pythonEndTokens) throws ParseException {
List result = new ArrayList<>();
int bracketLevel = 0;
tokens:
while(true){
Token token = tokenManager.getNextToken();
if(token.kind == PythonParserConstants.EOF){
tokenManager.pushBack(token);
break tokens;
} // End if
if(bracketLevel == 0){
if(pythonEndTokens.contains(token.kind)){
tokenManager.pushBack(token);
break tokens;
}
}
switch(token.kind){
case PythonParserConstants.LPAREN:
case PythonParserConstants.LBRACKET:
{
bracketLevel++;
}
break;
case PythonParserConstants.RPAREN:
case PythonParserConstants.RBRACKET:
{
bracketLevel--;
if(bracketLevel < 0){
throw new ParseException();
}
}
break;
default:
break;
}
result.add(token);
}
if(bracketLevel != 0){
throw new ParseException();
}
return result;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy