All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.tinygroup.xmlparser.parser.XmlStringParser Maven / Gradle / Ivy

The newest version!
/**
 * Copyright (c) 2012-2017, www.tinygroup.org ([email protected]).
 * 

* Licensed under the GPL, Version 3.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at *

* http://www.gnu.org/licenses/gpl.html *

* Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.tinygroup.xmlparser.parser; import org.tinygroup.xmlparser.XmlDocument; import org.tinygroup.xmlparser.XmlNodeType; import org.tinygroup.xmlparser.document.XmlDocumentImpl; import org.tinygroup.xmlparser.node.XmlNode; import java.util.HashMap; import java.util.Map; import java.util.regex.Matcher; import java.util.regex.Pattern; public class XmlStringParser extends XmlParser { public static final String HEAD_END_PATTERN = "HeadEndPattern"; private static final String TAIL_END_PATTERN = "TailEndPattern"; private static Pattern endTagName = Pattern .compile("(\\w|[\u4e00-\u9fa5]|[.]|[:]|[-])+\\s*>");// 头标签 名正则表达式 // ,以‘>’结束,不管之前是否为空格符 private static Pattern startTagName = Pattern .compile("(\\w|[\u4e00-\u9fa5]|[-]|[:]|[.])+");// 头标签名正则表达式 private static Pattern attribute = Pattern // 结点标签属性的正则表达式 .compile("(\\b(\\w|[\u4e00-\u9fa5]|[/]|[:]|[.]|[-])+\\s*=\\s*\"[^\"]*\")|(\\b(\\w|[\u4e00-\u9fa5]|[/]|[.]|[:]|[-])+\\s*=\\s*'[^']*')|(\\b(\\w|[\u4e00-\u9fa5]|[/]|[.]|[-])+\\s*=\\s*(\\w|[\u4e00-\u9fa5]|[.]|[-])+)"); private static Map patternTable = new HashMap(); private int start = 0; private String parseNode(String xmlSource, XmlNode pnode) { Pattern pattern = Pattern.compile(getHeadStartPattern()); Matcher matcher = pattern.matcher(xmlSource); matcher.region(start, xmlSource.length());// 匹配域限制 nexttag: while (matcher.find()) { if (start < matcher.start()) { String str = xmlSource.substring(start, matcher.start()); if (str.trim().length() > 0) { XmlNode node = new XmlNode(XmlNodeType.TEXT); node.setContent(str); pnode.addNode(node); } start = matcher.start(); } String headStart = xmlSource.substring(matcher.start(), matcher.end());// 开头匹配字符串 if (headStart.equals("/>")) { // 结束,返回节点 start = matcher.end(); return null; } else if (headStart.equals("', matcher.end())); parseHeader(xmlSource, node, m); } // 读入头标签结束符 if (nodetype.getHead().getEnd() != null) { Pattern p = patternTable.get(nodetype + HEAD_END_PATTERN); if (p == null) { p = Pattern.compile(getHeadEndPattern(nodetype)); patternTable.put(nodetype + HEAD_END_PATTERN, p); } Matcher m = p.matcher(xmlSource); m.region(start, xmlSource.length()); if (m.find()) { char c = xmlSource.charAt(m.start() - 1); start = m.end(); if (c != '/') { if (nodetype.isHasBody()) {// 如果结点有子结点,嵌套读取子结点 String r = parseNode(xmlSource, node); if (r != null) { if (r.endsWith(pnode.getNodeName())) { return null; } else { return r; } } else { matcher.region(start, xmlSource.length()); continue nexttag; } } } else {// 如果结点为文本内容,继续对其文本内容进行解析 matcher.region(start, xmlSource.length()); continue nexttag; } } } // 读入尾标签 if (nodetype.getTail() != null && nodetype.getTail().getEnd() != null) { Pattern p = patternTable.get(nodetype + TAIL_END_PATTERN); if (p == null) { p = Pattern.compile(getTailEndPattern(nodetype)); patternTable.put(nodetype + TAIL_END_PATTERN, p); } Matcher m = p.matcher(xmlSource); m.region(matcher.end(), xmlSource.length()); if (m.find()) { node.setContent(xmlSource.substring(matcher.end(), m.start())); start = m.end(); matcher.region(start, xmlSource.length()); continue nexttag; } } } } } return null; } private void parseHeader(String xmlSource, XmlNode node, Matcher m) { while (m.find()) { String str = xmlSource.substring(m.start(), m.end()); String k = str.substring(0, str.indexOf('=')).trim(); String v = str.substring(str.indexOf('=') + 1).trim(); if (v.startsWith("\"")) { v = v.substring(1, v.length() - 1); } else if (v.startsWith("'")) { v = v.substring(1, v.length() - 1); } node.setAttribute(k, v); start = m.end(); } } /** * 解析xml文档 * * @param xmlSource * @return XmlDocument */ public XmlDocument parse(String xmlSource) { XmlDocument document = new XmlDocumentImpl(); Pattern pattern = Pattern.compile(getHeadStartPattern()); Matcher matcher = pattern.matcher(xmlSource); nexttag: while (matcher.find()) { // 前面的无效字符丢弃 start = matcher.end(); String headStart = matcher.group(); for (XmlNodeType nt : XmlNodeType.values()) { if (nt.getHead() != null && nt.getHead().getStart() != null && nt.getHead().getStart().equals(headStart)) {// 查找NodeType,在Document中添加相应NodeType结点 XmlNode node = new XmlNode(nt); if (nt == XmlNodeType.ELEMENT) { document.setRoot(node); } else if (nt == XmlNodeType.COMMENT) { document.addComment(node); } else if (nt == XmlNodeType.DOCTYPE) { document.addDoctype(node); } else if (nt == XmlNodeType.XML_DECLARATION) { document.setXmlDeclaration(node); } else if (nt == XmlNodeType.PROCESSING_INSTRUCTION) { document.addProcessingInstruction(node); } // 读入标签名称 if (nt == XmlNodeType.ELEMENT) { Matcher m = startTagName.matcher(xmlSource); m.region(start, xmlSource.length()); if (m.find()) { node.setNodeName(xmlSource.substring(m.start(), m.end())); start = m.end(); } } if (nt.isHasHeader()) {// 为结点添加属性信息 Matcher m = attribute.matcher(xmlSource); m.region(start, xmlSource.indexOf('>', matcher.end())); parseHeader(xmlSource, node, m); } // 读入头标签结束符 if (nt.getHead().getEnd() != null) { Pattern p = patternTable.get(nt + HEAD_END_PATTERN); if (p == null) { p = Pattern.compile(getHeadEndPattern(nt)); patternTable.put(nt + HEAD_END_PATTERN, p); } Matcher m = p.matcher(xmlSource); m.region(matcher.end(), xmlSource.length()); if (m.find()) { start = m.end(); if (nt.isHasBody()) { parseNode(xmlSource, node); matcher.region(start, xmlSource.length()); continue nexttag; } } } // 读入结尾标签 if (nt.getTail() != null && nt.getTail().getEnd() != null) { Pattern p = patternTable.get(nt + TAIL_END_PATTERN); if (p == null) { p = Pattern.compile(getTailEndPattern(nt)); patternTable.put(nt + TAIL_END_PATTERN, p); } Matcher m = p.matcher(xmlSource); m.region(start, xmlSource.length()); if (m.find()) { node.setContent(xmlSource.substring(matcher.end(), m.start())); start = m.end(); matcher.region(start, xmlSource.length()); continue nexttag; } } } } } return document; } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy