All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.spdx.licensexml.LicenseXmlHelper Maven / Gradle / Ivy

There is a newer version: 2.2.8
Show newest version
/**
 * Copyright (c) 2016 Source Auditor Inc.
 *
 *   Licensed under the Apache License, Version 2.0 (the "License");
 *   you may not use this file except in compliance with the License.
 *   You may obtain a copy of the License at
 *
 *       http://www.apache.org/licenses/LICENSE-2.0
 *
 *   Unless required by applicable law or agreed to in writing, software
 *   distributed under the License is distributed on an "AS IS" BASIS,
 *   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *   See the License for the specific language governing permissions and
 *   limitations under the License.
 *
*/
package org.spdx.licensexml;

import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;

import org.apache.log4j.Logger;
import org.spdx.rdfparser.SpdxRdfConstants;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;

/**
 * Static helper class for License XML parsing
 * @author Gary O'Neall
 *
 */
public class LicenseXmlHelper implements SpdxRdfConstants {
	static final Logger logger = Logger.getLogger(LicenseXmlHelper.class);

	private static final String INDENT_STRING = "   ";

	/**
	 * Tags that do not require any processing - the text for the children will be included
	 */
	static HashSet LICENSE_AND_EXCEPTION_SKIPPED_TAGS = new HashSet();
	static {
		LICENSE_AND_EXCEPTION_SKIPPED_TAGS.add(SpdxRdfConstants.LICENSEXML_ELEMENT_CROSS_REF);
		LICENSE_AND_EXCEPTION_SKIPPED_TAGS.add(SpdxRdfConstants.LICENSEXML_ELEMENT_CROSS_REFS);
		LICENSE_AND_EXCEPTION_SKIPPED_TAGS.add(SpdxRdfConstants.LICENSEXML_ELEMENT_NOTES);
		LICENSE_AND_EXCEPTION_SKIPPED_TAGS.add(SpdxRdfConstants.LICENSEXML_ELEMENT_STANDARD_LICENSE_HEADER);
	}
	
	static HashSet LICENSE_AND_EXCEPTION_UNPROCESSED_TAGS = new HashSet();
	static {
		LICENSE_AND_EXCEPTION_UNPROCESSED_TAGS.add(LICENSEXML_ELEMENT_COPYRIGHT_TEXT);
		LICENSE_AND_EXCEPTION_UNPROCESSED_TAGS.add(LICENSEXML_ELEMENT_TITLE_TEXT);
		LICENSE_AND_EXCEPTION_UNPROCESSED_TAGS.add(LICENSEXML_ELEMENT_ITEM);
		LICENSE_AND_EXCEPTION_UNPROCESSED_TAGS.add(LICENSEXML_ELEMENT_LICENSE);
		LICENSE_AND_EXCEPTION_UNPROCESSED_TAGS.add(LICENSEXML_ELEMENT_EXCEPTION);
		LICENSE_AND_EXCEPTION_UNPROCESSED_TAGS.add(LICENSEXML_ELEMENT_BULLET);
	}
	
	static HashSet NOTES_SKIPPED_TAGS = new HashSet();
	static {
		NOTES_SKIPPED_TAGS.add(SpdxRdfConstants.LICENSEXML_ELEMENT_CROSS_REF);
		NOTES_SKIPPED_TAGS.add(SpdxRdfConstants.LICENSEXML_ELEMENT_CROSS_REFS);
		NOTES_SKIPPED_TAGS.add(SpdxRdfConstants.LICENSEXML_ELEMENT_STANDARD_LICENSE_HEADER);
	}
	
	static HashSet NOTES_UNPROCESSED_TAGS = new HashSet();
	static {
		NOTES_UNPROCESSED_TAGS.add(LICENSEXML_ELEMENT_COPYRIGHT_TEXT);
		NOTES_UNPROCESSED_TAGS.add(LICENSEXML_ELEMENT_TITLE_TEXT);
		NOTES_UNPROCESSED_TAGS.add(LICENSEXML_ELEMENT_ITEM);
		NOTES_UNPROCESSED_TAGS.add(LICENSEXML_ELEMENT_LICENSE);
		NOTES_UNPROCESSED_TAGS.add(LICENSEXML_ELEMENT_EXCEPTION);
		NOTES_UNPROCESSED_TAGS.add(SpdxRdfConstants.LICENSEXML_ELEMENT_NOTES);
		NOTES_UNPROCESSED_TAGS.add(LICENSEXML_ELEMENT_BULLET);
	}
	
	static HashSet HEADER_SKIPPED_TAGS = new HashSet();
	static {
		HEADER_SKIPPED_TAGS.add(SpdxRdfConstants.LICENSEXML_ELEMENT_CROSS_REF);
		HEADER_SKIPPED_TAGS.add(SpdxRdfConstants.LICENSEXML_ELEMENT_CROSS_REFS);
		HEADER_SKIPPED_TAGS.add(SpdxRdfConstants.LICENSEXML_ELEMENT_NOTES);
	}
	
	static HashSet HEADER_UNPROCESSED_TAGS = new HashSet();
	static {
		HEADER_UNPROCESSED_TAGS.add(LICENSEXML_ELEMENT_COPYRIGHT_TEXT);
		HEADER_UNPROCESSED_TAGS.add(LICENSEXML_ELEMENT_TITLE_TEXT);
		HEADER_UNPROCESSED_TAGS.add(LICENSEXML_ELEMENT_ITEM);
		HEADER_UNPROCESSED_TAGS.add(LICENSEXML_ELEMENT_LICENSE);
		HEADER_UNPROCESSED_TAGS.add(LICENSEXML_ELEMENT_EXCEPTION);
		HEADER_UNPROCESSED_TAGS.add(LICENSEXML_ELEMENT_BULLET);
		HEADER_UNPROCESSED_TAGS.add(SpdxRdfConstants.LICENSEXML_ELEMENT_STANDARD_LICENSE_HEADER);
	}
	
	static HashSet EXAMPLE_SKIPPED_TAGS = new HashSet();
	static {
		EXAMPLE_SKIPPED_TAGS.add(SpdxRdfConstants.LICENSEXML_ELEMENT_CROSS_REF);
		EXAMPLE_SKIPPED_TAGS.add(SpdxRdfConstants.LICENSEXML_ELEMENT_CROSS_REFS);
		EXAMPLE_SKIPPED_TAGS.add(SpdxRdfConstants.LICENSEXML_ELEMENT_NOTES);
		EXAMPLE_SKIPPED_TAGS.add(SpdxRdfConstants.LICENSEXML_ELEMENT_STANDARD_LICENSE_HEADER);
	}
	
	static HashSet EXAMPLE_UNPROCESSED_TAGS = new HashSet();
	static {
		EXAMPLE_UNPROCESSED_TAGS.add(LICENSEXML_ELEMENT_COPYRIGHT_TEXT);
		EXAMPLE_UNPROCESSED_TAGS.add(LICENSEXML_ELEMENT_TITLE_TEXT);
		EXAMPLE_UNPROCESSED_TAGS.add(LICENSEXML_ELEMENT_ITEM);
		EXAMPLE_UNPROCESSED_TAGS.add(LICENSEXML_ELEMENT_LICENSE);
		EXAMPLE_UNPROCESSED_TAGS.add(LICENSEXML_ELEMENT_EXCEPTION);
		EXAMPLE_UNPROCESSED_TAGS.add(LICENSEXML_ELEMENT_EXAMPLE);
		EXAMPLE_UNPROCESSED_TAGS.add(LICENSEXML_ELEMENT_BULLET);
	}	
	/**
	 * Convert a node to text which contains various markup information and appends it to the sb
	 * @param node node to convert
	 * @param useTemplateFormat If true, convert any optional or variable elements into the template markup language
	 * if false, translate to the equivalent text
	 * @param sb Stringbuilder to append the text to
	 * @param indentCount number of indentations (e.g. number of embedded lists)
	 * @param unprocessedTags Tags that do not require any process - text of the children of that tag should just be appended.
	 * @param skippedTags Tags that should not be included
	 * @return
	 * @throws LicenseXmlException 
	 */
	private static void appendNodeText(Node node, boolean useTemplateFormat, StringBuilder sb, int indentCount, HashSet unprocessedTags,
			HashSet skippedTags) throws LicenseXmlException {
		if (node.getNodeType() == Node.TEXT_NODE) {
			appendNormalizedWhiteSpaceText(sb, node.getNodeValue());
		} else if (node.getNodeType() == Node.ELEMENT_NODE) {
			Element element = (Element)node;
			String tagName = element.getTagName();
			if (LICENSEXML_ELEMENT_LIST.equals(tagName)) {
				appendListElements(element, useTemplateFormat, sb, indentCount, unprocessedTags, skippedTags);
			} else if (LICENSEXML_ELEMENT_ALT.equals(tagName)) {
				appendAltText(element, useTemplateFormat, sb, indentCount, unprocessedTags, skippedTags);
			} else if (LICENSEXML_ELEMENT_OPTIONAL.equals(tagName)) {
				appendOptionalText(element, useTemplateFormat, sb, indentCount, unprocessedTags, skippedTags);
			} else if (LICENSEXML_ELEMENT_BREAK.equals(tagName)) {
				addNewline(sb, indentCount);
				appendElementChildrenText(element, useTemplateFormat, sb, indentCount, unprocessedTags, skippedTags);
			} else if (LICENSEXML_ELEMENT_PARAGRAPH.equals(tagName)) {
				if (sb.length() > 1) {
					addNewline(sb, indentCount);
				}
				appendElementChildrenText(element, useTemplateFormat, sb, indentCount, unprocessedTags, skippedTags);
			} else if (unprocessedTags.contains(tagName)) {
				appendElementChildrenText(element, useTemplateFormat, sb, indentCount, unprocessedTags, skippedTags);
			} else if (!skippedTags.contains(tagName)) {
				throw(new LicenseXmlException("Unknown license element tag name: "+tagName));
			}
		}
	}

	/**
	 * Appends text removing any extra whitespace and linefeed information
	 * @param text
	 */
	private static void appendNormalizedWhiteSpaceText(StringBuilder sb, String text) {
		boolean endsInWhiteSpace = sb.length() == 0 || Character.isWhitespace(sb.charAt(sb.length()-1));
		List tokens = tokenize(text);
		if (tokens.size() > 0) {
			if (!endsInWhiteSpace) {
				sb.append(' ');
			}
			sb.append(tokens.get(0));
			for (int i = 1; i < tokens.size(); i++) {
				sb.append(' ');
				sb.append(tokens.get(i));
			}
		}
	}

	/**
	 * Tokenize a string based on the Character whitespace
	 * @param text
	 * @return
	 */
	private static List tokenize(String text) {
		List result = new ArrayList();
		int loc = 0;
		while (loc < text.length()) {
			while (loc < text.length() && Character.isWhitespace(text.charAt(loc))) {
				loc++;
			}
			if (loc < text.length()) {
				StringBuilder sb = new StringBuilder();
				while (loc < text.length() && !Character.isWhitespace(text.charAt(loc))) {
					sb.append(text.charAt(loc++));
				}
				result.add(sb.toString());
			}
		}
		return result;
	}

	/**
	 * Appends the text for all the child nodes in the element
	 * @param element Element to convert
	 * @param useTemplateFormat If true, convert any optional or variable elements into the template markup language
	 * if false, translate to the equivalent text
	 * @param sb Stringbuilder to append the text to
	 * @param indentCount number of indentations (e.g. number of embedded lists)
	 * @param unprocessedTags Tags that do not require any process - text of the children of that tag should just be appended.
	 * @param skippedTags Tags that should not be included
	 * @throws LicenseXmlException 
	 */
	private static void appendElementChildrenText(Element element,
			boolean useTemplateFormat, StringBuilder sb, int indentCount, HashSet unprocessedTags,
			HashSet skippedTags) throws LicenseXmlException {
		NodeList licenseChildNodes = element.getChildNodes();
		for (int i = 0; i < licenseChildNodes.getLength(); i++) {
			appendNodeText(licenseChildNodes.item(i),useTemplateFormat, sb, indentCount, unprocessedTags, skippedTags);
		}	
	}

	/**
	 * Add a newline to the stringbuilder and indent per the indent count
	 * @param sb Stringbuild to append to
	 * @param indentCount
	 */
	private static void addNewline(StringBuilder sb, int indentCount) {
		sb.append('\n');
		for (int i = 0; i < indentCount; i ++) {
			sb.append(INDENT_STRING);
		}
	}

	/**
	 * Append optional text
	 * @param element Element element containing the optional text
	 * @param useTemplateFormat If true, convert any optional or variable elements into the template markup language
	 * if false, translate to the equivalent text
	 * @param sb Stringbuilder to append the text to
	 * @param indentCount number of indentations (e.g. number of embedded lists)
	 * @param unprocessedTags Tags that do not require any process - text of the children of that tag should just be appended.
	 * @param skippedTags Tags that should not be included
	 * @throws LicenseXmlException 
	 */
	private static void appendOptionalText(Element element,
			boolean useTemplateFormat, StringBuilder sb, int indentCount, HashSet unprocessedTags,
			HashSet skippedTags) throws LicenseXmlException {
		if (!LICENSEXML_ELEMENT_OPTIONAL.equals(element.getTagName())) {
			throw(new LicenseXmlException("Expecting optional tag, found "+element.getTagName()));
		}
		StringBuilder childSb = new StringBuilder();
		if (element.hasChildNodes()) {
			appendElementChildrenText(element, useTemplateFormat, childSb, indentCount, unprocessedTags, skippedTags);
		} else {
			childSb.append(element.getTextContent());
		}
		if (useTemplateFormat) {
			sb.append("<>");
			if (childSb.length() > 0 && childSb.charAt(0) == ' ') {
				sb.append(' ');
				childSb.delete(0, 1);
			} else if (sb.length() > 0 && !Character.isWhitespace(sb.charAt(sb.length()-1))) {
				sb.append(' ');
			}
			sb.append(childSb);
			sb.append("<>");
		} else {
			if (sb.length() > 0 && !Character.isWhitespace(sb.charAt(sb.length()-1))) {
				sb.append(' ');
			}
			sb.append(childSb);
		}
	}

	/**
	 * Add text for an alternative expression
	 * @param element Element containing the alternative expression
	 * @param useTemplateFormat If true, convert any optional or variable elements into the template markup language
	 * if false, translate to the equivalent text
	 * @param sb Stringbuilder to append the text to
	 * @param indentCount number of indentations (e.g. number of embedded lists)
	 * @param unprocessedTags Tags that do not require any process - text of the children of that tag should just be appended.
	 * @param skippedTags Tags that should not be included
	 * @throws LicenseXmlException 
	 */
	private static void appendAltText(Element element,
			boolean useTemplateFormat, StringBuilder sb, int indentCount, HashSet unprocessedTags,
			HashSet skippedTags) throws LicenseXmlException {
		if (!LICENSEXML_ELEMENT_ALT.equals(element.getTagName())) {
			throw(new LicenseXmlException("Expected alt tag.  Found '"+element.getTagName()+"'"));
		}
		StringBuilder originalSb = new StringBuilder();
		if (element.hasChildNodes()) {
			appendElementChildrenText(element, useTemplateFormat, originalSb, indentCount, unprocessedTags, skippedTags);
		} else {
			originalSb.append(element.getTextContent());
		}
		if (useTemplateFormat) {
			if (originalSb.length() > 0 && originalSb.charAt(0) == ' ') {
				sb.append(' ');
				originalSb.delete(0, 1);
			} else if (sb.length() > 0 && !Character.isWhitespace(sb.charAt(sb.length()-1))) {
				sb.append(' ');
			}
			sb.append("<>");
		} else {
			if (sb.length() > 0 && !Character.isWhitespace(sb.charAt(sb.length()-1))) {
				sb.append(' ');
			}
			sb.append(originalSb);
		}
	}

	/**
	 * Appends a list element to the stringbuilder sb
	 * @param element
	 * @param useTemplateFormat
	 * @param sb
	 * @param indentCount Number of indentations for the text
	 * @param unprocessedTags Tags that do not require any process - text of the children of that tag should just be appended.
	 * @param skippedTags Tags that should not be included
	 * @throws LicenseXmlException 
	 */
	private static void appendListElements(Element element,
			boolean useTemplateFormat, StringBuilder sb, int indentCount, HashSet unprocessedTags,
			HashSet skippedTags) throws LicenseXmlException {
		if (!LICENSEXML_ELEMENT_LIST.equals(element.getTagName())) {
			throw(new LicenseXmlException("Invalid list element tag - expected 'list', found '"+element.getTagName()+"'"));
		}
		NodeList listItemNodes = element.getChildNodes();
		for (int i = 0; i < listItemNodes.getLength(); i++) {
			if (listItemNodes.item(i).getNodeType() == Node.ELEMENT_NODE) {
				Element listItem = (Element)listItemNodes.item(i);
				if (!LICENSEXML_ELEMENT_ITEM.equals(listItem.getTagName())) {
					throw(new LicenseXmlException("Expected only list item tags ('item') in a list, found "+listItem.getTagName()));
				}
				addNewline(sb, indentCount+1);
				appendNodeText(listItem, useTemplateFormat, sb, indentCount + 1, unprocessedTags, skippedTags);
			} else if (listItemNodes.item(i).getNodeType() != Node.TEXT_NODE) {
				throw(new LicenseXmlException("Expected only element children for a list element"));	
			}
		}
	}

	/**
	 * Gets the license template text from the license element
	 * @param licenseElement
	 * @return
	 * @throws LicenseXmlException 
	 */
	public static String getLicenseTemplate(Element licenseElement) throws LicenseXmlException {
		StringBuilder sb = new StringBuilder();
		appendNodeText(licenseElement, true, sb, 0, LICENSE_AND_EXCEPTION_UNPROCESSED_TAGS, LICENSE_AND_EXCEPTION_SKIPPED_TAGS);
		return sb.toString();
	}
	
	/**
	 * Format note text taking into account line breaks, paragraphs etc.
	 * @param licenseElement
	 * @return
	 * @throws LicenseXmlException
	 */
	public static String getNoteText(Element licenseElement) throws LicenseXmlException {
		StringBuilder sb = new StringBuilder();
		appendNodeText(licenseElement, false, sb, 0, NOTES_UNPROCESSED_TAGS, NOTES_SKIPPED_TAGS);
		return sb.toString();
	}

	/**
	 * Gets license text from the license element
	 * @param licenseElement
	 * @return
	 * @throws LicenseXmlException 
	 */
	public static String getLicenseText(Element licenseElement) throws LicenseXmlException {
		StringBuilder sb = new StringBuilder();
		appendNodeText(licenseElement, false, sb, 0, LICENSE_AND_EXCEPTION_UNPROCESSED_TAGS, LICENSE_AND_EXCEPTION_SKIPPED_TAGS);
		return sb.toString();
	}
	
	public static String dumpLicenseDom(Element licenseElement) {
		StringBuilder sb = new StringBuilder();
		appendNode(licenseElement, sb, 0);
		return sb.toString();
	}

	/**
	 * @param licenseElement
	 * @param sb
	 */
	private static void appendNode(Node node,
			StringBuilder sb, int indent) {
		for (int i = 0; i  < indent; i++) {
			sb.append(INDENT_STRING);
		}
		sb.append("Node Type: ");
		sb.append(node.getNodeType());
		sb.append(", Node Name: ");
		sb.append(node.getNodeName());
		sb.append(", Node Value: '");
		sb.append(node.getNodeValue());
		sb.append('\'');
		sb.append(", Node Text: '");
		sb.append(node.getTextContent());
		sb.append("'\n");
		if (node.hasChildNodes()) {
			NodeList children = node.getChildNodes();
			for (int i = 0; i < children.getLength(); i++) {
				appendNode(children.item(i), sb, indent+1);
			}
		}
	}

	/**
	 * @param headerNode
	 * @return header text where headerNode is the root element
	 * @throws LicenseXmlException 
	 */
	public static Object getHeaderText(Node headerNode) throws LicenseXmlException {
		StringBuilder sb = new StringBuilder();
		appendNodeText(headerNode, false, sb, 0, HEADER_UNPROCESSED_TAGS, HEADER_SKIPPED_TAGS);
		return sb.toString();
	}

	/**
	 * @param exampleElement
	 * @return Example text where exampleElement is the root element
	 * @throws LicenseXmlException 
	 */
	public static String getExampleText(Element exampleElement) throws LicenseXmlException {
		StringBuilder sb = new StringBuilder();
		appendNodeText(exampleElement, false, sb, 0, EXAMPLE_UNPROCESSED_TAGS, EXAMPLE_SKIPPED_TAGS);
		return sb.toString();
	}

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy