cn.wanghaomiao.xpath.core.NodeTreeBuilderStateMachine Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of JsoupXpath Show documentation
Show all versions of JsoupXpath Show documentation
一个非常好用而且强大的基于xpath的html解析器。html的DOM树生成依赖Jsoup。Lexer 和 Parser基于Antlr4,支持完备的W3C XPATH 1.0标准语法,W3C规范:http://www.w3.org/TR/1999/REC-xpath-19991116。
package cn.wanghaomiao.xpath.core;
import cn.wanghaomiao.xpath.model.Node;
import cn.wanghaomiao.xpath.model.Predicate;
import cn.wanghaomiao.xpath.util.EmMap;
/**
* 用于生成xpath语法树的有限状态机
* @author 汪浩淼 [[email protected]]
* @since 13-12-26 下午3:48
*/
public class NodeTreeBuilderStateMachine {
BuilderState state = BuilderState.SCOPE;
XContext context = new XContext();
int cur=0;
StringBuilder accum = new StringBuilder();
enum BuilderState {
SCOPE {
@Override
public void parser(NodeTreeBuilderStateMachine stateMachine, char[] xpath) {
while (stateMachine.cur|<|>=|<=|^=|\\*=|$=|~=|!=)'.+'")){
while (index>=0){
char tmp = preArray[index];
if (tmp=='\''){
argDeep+=1;
}
if (argDeep==1&&tmp!='\''){
right.insert(0,tmp);
}else if (argDeep==2&&EmMap.getInstance().commOpChar.contains(tmp)){
op.insert(0,tmp);
opFlag=1;
}else if (argDeep>=2&&opFlag>0){
argDeep++;//取完操作符后剩下的都属于left
left.insert(0,tmp);
}
index-=1;
}
}else if (pre.matches(".+(\\+|=|-|>|<|>=|<=|^=|\\*=|$=|~=|!=)[^']+")){
while (index>=0){
char tmp = preArray[index];
if (opFlag==0&&EmMap.getInstance().commOpChar.contains(tmp)){
op.insert(0,tmp);
}else {
if (op.length()>0){
left.insert(0,tmp);
opFlag=1;
}else {
right.insert(0,tmp);
}
}
index-=1;
}
}
predicate.setOpEm(EmMap.getInstance().opEmMap.get(op.toString()));
predicate.setLeft(left.toString());
predicate.setRight(right.toString());
predicate.setValue(pre);
return predicate;
}
}