All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.jsoup.parser.PrefixXmlTreeBuilder Maven / Gradle / Ivy

Go to download

This is a open project of Java. The project integrated Apache Commons-VFS and Jsoup. It can be grabbing data much easy.

There is a newer version: 1.1.1
Show newest version
package org.jsoup.parser;
/**
 * 利用增加prefix的方式,騙過 Token.Read
 * 再於寫入StartTag及EndTag前,將TagName還原
 *
 * 

Usage example: {@code Document xmlDoc = Jsoup.parse(html, baseUrl, new Parser( new PrefixXmlTreeBuilder("prefixName") );}

* * @author Abola Lee **** * Use the {@code XmlTreeBuilder} when you want to parse XML without any of the HTML DOM rules being applied to the * document. *

Usage example: {@code Document xmlDoc = Jsoup.parse(html, baseUrl, Parser.xmlParser());}

* * @author Jonathan Hedley */ import org.jsoup.helper.Validate; import org.jsoup.nodes.Document.OutputSettings.Syntax; import org.jsoup.nodes.*; import org.jsoup.parser.Token.Comment; import org.jsoup.parser.Token.Doctype; import org.jsoup.parser.Token.EndTag; import org.jsoup.parser.Token.StartTag; import java.util.List; public class PrefixXmlTreeBuilder extends XmlTreeBuilder { String prefix; public PrefixXmlTreeBuilder(String prefix) { this.prefix = prefix; } protected boolean process(Token token) { switch (token.type) { case StartTag: insert(token.asStartTag()); break; case EndTag: popStackToClose(token.asEndTag()); break; case Comment: insert(token.asComment().asStartTag()); break; case Character: insert(token.asCharacter()); break; case Doctype: insert(token.asDoctype()); break; case EOF: // could put some normalisation here if desired break; default: Validate.fail("Unexpected token type: " + token.type); } return true; } protected void initialiseParse(String input, String baseUri, ParseErrorList errors) { super.initialiseParse(input, baseUri, errors); this.stack.add(this.doc); this.doc.outputSettings().syntax(Syntax.xml); } private void insertNode(Node node) { this.currentElement().appendChild(node); } Element insert(StartTag startTag) { // remove prefix Tag tag = Tag.valueOf(startTag.name().replace(this.prefix,"")); Element el = new Element(tag, this.baseUri, startTag.attributes); this.insertNode(el); if(startTag.isSelfClosing()) { this.tokeniser.acknowledgeSelfClosingFlag(); if(!tag.isKnownTag()) { tag.setSelfClosing(); } } else { this.stack.add(el); } return el; } void insert(Comment commentToken) { org.jsoup.nodes.Comment comment = new org.jsoup.nodes.Comment(commentToken.getData(), this.baseUri); Object insert = comment; if(commentToken.bogus) { String data = comment.getData(); if(data.length() > 1 && (data.startsWith("!") || data.startsWith("?"))) { String declaration = data.substring(1); insert = new XmlDeclaration(declaration, comment.baseUri(), data.startsWith("!")); } } this.insertNode((Node)insert); } void insert(Token.Character characterToken) { TextNode node = new TextNode(characterToken.getData(), this.baseUri); this.insertNode(node); } void insert(Doctype d) { DocumentType doctypeNode = new DocumentType(d.getName(), d.getPublicIdentifier(), d.getSystemIdentifier(), this.baseUri); this.insertNode(doctypeNode); } private void popStackToClose(EndTag endTag) { // remove prefix String elName = endTag.name().replace(this.prefix,""); Element firstFound = null; int pos; Element next; for(pos = this.stack.size() - 1; pos >= 0; --pos) { next = (Element)this.stack.get(pos); if(next.nodeName().equals(elName)) { firstFound = next; break; } } if(firstFound != null) { for(pos = this.stack.size() - 1; pos >= 0; --pos) { next = (Element)this.stack.get(pos); this.stack.remove(pos); if(next == firstFound) { break; } } } } List parseFragment(String inputFragment, String baseUri, ParseErrorList errors) { this.initialiseParse(inputFragment, baseUri, errors); this.runParser(); return this.doc.childNodes(); } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy