org.spdx.compare.CompareTemplateOutputHandler Maven / Gradle / Ivy

/**
 * Copyright (c) 2013 Source Auditor Inc.
 *
 *   Licensed under the Apache License, Version 2.0 (the "License");
 *   you may not use this file except in compliance with the License.
 *   You may obtain a copy of the License at
 *
 *       http://www.apache.org/licenses/LICENSE-2.0
 *
 *   Unless required by applicable law or agreed to in writing, software
 *   distributed under the License is distributed on an "AS IS" BASIS,
 *   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *   See the License for the specific language governing permissions and
 *   limitations under the License.
 *
*/
package org.spdx.compare;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.spdx.licenseTemplate.ILicenseTemplateOutputHandler;
import org.spdx.licenseTemplate.LicenseTemplateRule;
import org.spdx.licenseTemplate.LicenseTemplateRule.RuleType;
import org.spdx.rdfparser.license.LicenseParserException;

import com.google.common.collect.Lists;
import com.google.common.collect.Maps;

/**
 * Compares the output of a parsed license template to text.  The method matches is called after
 * the document is parsed to determine if the text matches.
 * @author Gary O'Neall
 *
 */
public class CompareTemplateOutputHandler implements
		ILicenseTemplateOutputHandler {
	private static final int MAX_NEXT_NORMAL_TEXT_SEARCH_LENGTH = 15;	// Maximum number of tokens to compare when searching for a normal text match
	private static final int MIN_TOKENS_NORMAL_TEXT_SEARCH = 3;	// Minimum number of tokens to match of normal text to match after a variable block to bound greedy regex var text
	
	class ParseInstruction {
		LicenseTemplateRule rule;
		String text;
		List subInstructions;
		ParseInstruction parent;

		private boolean skip = false;	// skip this instruction in matching
		private boolean skipFirstTextToken = false;	// skip the first text token
		private DifferenceDescription lastOptionalDifference = null;
		
		ParseInstruction(LicenseTemplateRule rule, String text, ParseInstruction parent) {
			this.rule = rule;
			this.text = text;
			this.subInstructions = Lists.newArrayList();
			this.parent = parent;
		}
		
		@Override 
		public String toString() {
			if (this.rule != null) {
				return this.rule.toString();
			} else if (this.text != null) {
				String retval = "TEXT: '";
				if (this.text.length() > 10) {
					retval = retval + this.text.substring(0, 10) + "...'";
				} else {
					retval = retval + this.text + "'";
				}
				return retval;
			}else {
				return "NONE";
			}
		}

		/**
		 * @return the rule
		 */
		public LicenseTemplateRule getRule() {
			return rule;
		}

		/**
		 * @param rule the rule to set
		 */
		public void setRule(LicenseTemplateRule rule) {
			this.rule = rule;
		}

		/**
		 * @return the text
		 */
		public String getText() {
			return text;
		}

		/**
		 * @param optionalText the text to set
		 */
		public void setText(String text) {
			this.text = text;
		}

		/**
		 * Add the instruction to the list of sub-instructions
		 * @param instruction
		 */
		public void addSubInstruction(ParseInstruction instruction) {

			if (instruction.getRule() != null && RuleType.VARIABLE.equals(instruction.getRule().getType()) &&
					subInstructions.size() > 0 && 
					subInstructions.get(subInstructions.size()-1).getRule() != null &&
							RuleType.VARIABLE.equals(subInstructions.get(subInstructions.size()-1).getRule().getType())) {
				// Maybe this is a little bit of a hack, but merge any var instructions so that
				// the match will work
				LicenseTemplateRule lastRule = subInstructions.get(subInstructions.size()-1).getRule();
				lastRule.setMatch("("+lastRule.getMatch()+")\\s*("+instruction.getRule().getMatch()+")");
				lastRule.setName("combined-"+lastRule.getName()+"-"+instruction.getRule().getName());
				lastRule.setOriginal(lastRule.getOriginal() + " " + lastRule.getOriginal());
			} else {
				instruction.setParent(this);
				this.subInstructions.add(instruction);
			}
		}

		/**
		 * @return the parent
		 */
		public ParseInstruction getParent() {
			return parent;
		}

		/**
		 * @param parent the parent to set
		 */
		public void setParent(ParseInstruction parent) {
			this.parent = parent;
		}

		/**
		 * @return the subInstructions
		 */
		public List getSubInstructions() {
			return subInstructions;
		}

		/**
		 * @return true iff there are only text instructions as sub instructions
		 */
		public boolean onlyText() {
			if (this.subInstructions.size() < 1) {
				return false;
			}
			for (ParseInstruction subInstr:this.subInstructions) {
				if (subInstr.getText() == null) {
					return false;
				}
			}
			return true;
		}

		public String toText() {
			StringBuilder sb = new StringBuilder();
			for (ParseInstruction subInstr:this.subInstructions) {
				if (subInstr.getText() != null) {
					sb.append(subInstr.getText());
				}
			}
			return sb.toString();
		}
		
		/**
		 * Attempt to match this instruction against a tokenized array
		 * @param matchTokens Tokens to match the instruction against
		 * @param startToken Index of the tokens to start the match
		 * @param endToken Last index of the tokens to use in the match
		 * @param originalText Original text used go generate the matchTokens
		 * @param differenceDescription Description of differences found
		 * @param nextNormalText if there is a nextOptionalText, this would be the normal text that follows the optional text
		 * @return Next token index after the match or -1 if no match was found
		 * @throws LicenseParserException 
		 */
		public int match(String[] matchTokens, int startToken, int endToken, String originalText,
				DifferenceDescription differences, Map tokenToLocation) throws LicenseParserException {
			return match(matchTokens, startToken, endToken, originalText, differences, tokenToLocation, false);
		}
		
		/**
		 * Attempt to match this instruction against a tokenized array
		 * @param matchTokens Tokens to match the instruction against
		 * @param startToken Index of the tokens to start the match
		 * @param endToken Last index of the tokens to use in the match
		 * @param originalText Original text used go generate the matchTokens
		 * @param differenceDescription Description of differences found
		 * @param nextNormalText if there is a nextOptionalText, this would be the normal text that follows the optional text
		 * @param ignoreOptionalDifferences if true, don't record any optional differences
		 * @return Next token index after the match or -1 if no match was found
		 * @throws LicenseParserException 
		 */
		public int match(String[] matchTokens, int startToken, int endToken, String originalText,
				DifferenceDescription differences, Map tokenToLocation, boolean ignoreOptionalDifferences) throws LicenseParserException {
			if (this.skip) {
				return startToken;
			}
			int nextToken = startToken;
			if (this.rule == null) {
				if (this.text != null) {
					Map textLocations = new HashMap();
					String[] textTokens = LicenseCompareHelper.tokenizeLicenseText(LicenseCompareHelper.normalizeText(text), textLocations);
					if (this.skipFirstTextToken) {
						textTokens = Arrays.copyOfRange(textTokens, 1, textTokens.length);
					}
					nextToken = compareText(textTokens, matchTokens, nextToken, endToken, this);
					if (nextToken < 0) {
						int errorLocation = -nextToken;
						differences.addDifference(tokenToLocation.get(errorLocation), LicenseCompareHelper.getTokenAt(matchTokens, errorLocation), 
										"Normal text of license does not match", text, null, getLastOptionalDifference());
					}
					if (this.subInstructions.size() > 0) {
						throw new LicenseParserException("License template parser error.  Sub expressions are not allows for plain text.");
					}
				} else {
					// just process the sub instructions
					for (ParseInstruction sub:subInstructions) {
						nextToken = sub.match(matchTokens, nextToken, endToken, originalText, differences, 
								tokenToLocation, ignoreOptionalDifferences);
						if (nextToken < 0) {
							return nextToken;
						}
					}
				}

			} else if (this.rule.getType().equals(RuleType.BEGIN_OPTIONAL)) {
				if (this.getText() != null) {
					throw new LicenseParserException("License template parser error - can not have text associated with a begin optional rule");
				}
				if (this.onlyText() || this.parent == null) {
					// optimization, don't go through the effort to subset the text
					for (ParseInstruction sub:subInstructions) {
						DifferenceDescription optionalDifference = new DifferenceDescription();
						nextToken = sub.match(matchTokens, nextToken, endToken, originalText, 
								optionalDifference, tokenToLocation);
						if (nextToken < 0) {
							if (!ignoreOptionalDifferences) {
								setLastOptionalDifference(optionalDifference);
							}					
							return startToken;	// the optional text didn't match, just return the start token
						}
					}
				} else {
					List matchingNormalTextStartTokens = this.parent.findNextNonVarTextStartTokens(this, matchTokens, 
							startToken, endToken, originalText, differences, tokenToLocation);
					nextToken = matchOptional(matchingNormalTextStartTokens, matchTokens, 
							nextToken, endToken, originalText, differences, tokenToLocation, ignoreOptionalDifferences);
				}
			} else if (this.rule.getType().equals(RuleType.VARIABLE)) {
				List matchingNormalTextStartTokens = this.parent.findNextNonVarTextStartTokens(this, matchTokens, 
						startToken, endToken, originalText, differences, tokenToLocation);
				nextToken = matchVariable(matchingNormalTextStartTokens, matchTokens, 
						nextToken, endToken, originalText, differences, tokenToLocation);
			} else {
				throw new LicenseParserException("Unexpected parser state - instruction is not root, optional, variable or text");
			}
			return nextToken;
		}

		/**
		 * Match to an optional rule
		 * @param optionalInstruction Optional Instruction
		 * @param matchingStartTokens List of indexes for the start tokens for the next normal text
		 * @param matchTokens Tokens to match against
		 * @param startToken Index of the first token to search for the match
		 * @param endToken Index of the last token to search for the match
		 * @param originalText Original text used go generate the matchTokens
		 * @param differences Any differences found
		 * @param tokenToLocation Map of token index to line/column where the token was found in the original text
		 *  @param ignoreOptionalDifferences if true, don't record any optional differences
		 * @return the index of the token after the find or -1 if the text did not match
		 * @throws LicenseParserException 
		 */
		private int matchOptional(List matchingStartTokens,
				String[] matchTokens, int startToken, int endToken, String originalText,
				DifferenceDescription differences, Map tokenToLocation, boolean ignoreOptionalDifferences) throws LicenseParserException {
			for (int matchingStartToken:matchingStartTokens) {
				DifferenceDescription matchDifferences = new DifferenceDescription();
				int matchLocation = startToken;
				for (ParseInstruction sub:subInstructions) {
					matchLocation = sub.match(matchTokens, matchLocation, matchingStartToken-1, originalText, 
							matchDifferences, tokenToLocation);
					if (matchLocation < 0) {
						break;
					}
				}
				if (matchLocation > 0) {
					return matchLocation;	// found a match
				} else if (!ignoreOptionalDifferences) {				
					setLastOptionalDifference(matchDifferences);
				}
			}
			// We didn't find any matches, return the original start token
			return startToken;
		}

		/**
		 * Find the indexes that match the matching optional or first normal text within the sub-instructions
		 * @param afterChild the child after which to start searching for the first normal text
		 * @param matchTokens Tokens used to match the text against
		 * @param startToken Start of the match tokens to begin the search
		 * @param endToken End of the match tokens to end the search
		 * @param originalText original text that created the match tokens
		 * @param differences Information on any differences found
		 * @param tokenToLocation Map of match token indexes to line/column locations
		 * @return List of indexes for the start tokens for the next non variable text that matches
		 * @throws LicenseParserException 
		 */
		private List findNextNonVarTextStartTokens(ParseInstruction afterChild,
				String[] matchTokens, int startToken, int endToken, String originalText,
				DifferenceDescription differences, Map tokenToLocation) throws LicenseParserException {
			List retval = new ArrayList();
			// We find the first index to start our search
			int indexOfChild = subInstructions.indexOf(afterChild);
			if (indexOfChild < 0) {
				throw new LicenseParserException("Template Parser Error: Could not locate sub instruction");
			}
			int startSubinstructionIndex = indexOfChild + 1;
			if (startSubinstructionIndex >= subInstructions.size()) {
				// no start tokens found
				// Set return value to the end
				retval.add(endToken+1);
				return retval;
			}
			int firstNormalTextIndex = -1;	// initial value for not yet found
			// keep track of all optional rules prior to the first solid normal text since the optional
			// rules can provide a valid result
			List leadingOptionalSubInstructions = Lists.newArrayList();
			int i = startSubinstructionIndex;
			while (i < subInstructions.size() && firstNormalTextIndex < 0) {
				LicenseTemplateRule subInstructionRule = subInstructions.get(i).getRule();
				if (subInstructionRule != null && subInstructionRule.getType() == RuleType.BEGIN_OPTIONAL) {
					leadingOptionalSubInstructions.add(i);
				} else if (subInstructions.get(i).getText() != null) {
					firstNormalTextIndex = i;
				}
				i++;
			}
			int nextMatchingStart = startToken;
			// Go through the preceding optional rules.  If there enough token matches, add it to the result list
			for (int optionalSub:leadingOptionalSubInstructions) {
				DifferenceDescription tempDiffDescription = new DifferenceDescription();
				int nextOptMatchingStart = nextMatchingStart;
				int optTokenAfterMatch = subInstructions.get(optionalSub).match(matchTokens, nextOptMatchingStart, endToken, originalText, tempDiffDescription, tokenToLocation, true);
				while (optTokenAfterMatch <= nextOptMatchingStart && -optTokenAfterMatch <= endToken 
						&& !tempDiffDescription.differenceFound && nextOptMatchingStart <= endToken) {
					// while we didn't find a match
					nextOptMatchingStart++;
					optTokenAfterMatch = subInstructions.get(optionalSub).match(matchTokens, nextOptMatchingStart, endToken, originalText, tempDiffDescription, tokenToLocation, true);
				}
				if (optTokenAfterMatch > 0 && !tempDiffDescription.differenceFound && nextOptMatchingStart <= endToken) {
					// we found a match
					if (optTokenAfterMatch - nextOptMatchingStart > MIN_TOKENS_NORMAL_TEXT_SEARCH) {
						// Only add possible matches if it matched enough tokens
						//TODO: This approximation of the number of tokens matched may include tokens consumed by a variable match. To make this more accurate, we should count the tokens of just the text nodes a children 
						retval.add(nextOptMatchingStart);
					}
					nextMatchingStart = optTokenAfterMatch;
				}
			}
			if (firstNormalTextIndex < 0) {
				// Set to the end
				retval.add(endToken+1);
				return retval;
			}
			
			Map normalTextLocations = new HashMap();
			String[] textTokens = LicenseCompareHelper.tokenizeLicenseText(LicenseCompareHelper.normalizeText(subInstructions.get(firstNormalTextIndex).getText()), normalTextLocations);
			if (textTokens.length > MAX_NEXT_NORMAL_TEXT_SEARCH_LENGTH) {
				textTokens = Arrays.copyOf(textTokens, MAX_NEXT_NORMAL_TEXT_SEARCH_LENGTH);
			}

			int tokenAfterMatch = compareText(textTokens, matchTokens, nextMatchingStart, endToken, null);
			boolean foundEnoughTokens = false;
			while (!foundEnoughTokens && nextMatchingStart <= endToken && !differences.differenceFound) {
				while (tokenAfterMatch < 0 && -tokenAfterMatch <= endToken) {			
					nextMatchingStart = nextMatchingStart + 1;
					tokenAfterMatch = compareText(textTokens, matchTokens, nextMatchingStart, endToken, null);
				}
				if (tokenAfterMatch < 0) {
					// Can not find the text, report a difference
					String ruleDesc = "variable or optional rule";
					if (afterChild.getRule() != null) {
						if (afterChild.getRule().getType() == RuleType.BEGIN_OPTIONAL) {
							ruleDesc = "optional rule";
						} else if (afterChild.getRule().getType() == RuleType.VARIABLE) {
							ruleDesc = "variable rule '" + afterChild.getRule().getName() + "'";
						}
					}
					differences.addDifference(tokenToLocation.get(nextMatchingStart), "",
							"Unable to find the text '" + subInstructions.get(firstNormalTextIndex).getText() + "' following a "+ruleDesc,
									null, rule, getLastOptionalDifference());
				} else if (textTokens.length >= MIN_TOKENS_NORMAL_TEXT_SEARCH) {
					retval.add(nextMatchingStart);
					foundEnoughTokens = true;
				} else {
					// Not enough text tokens, we need to make sure everything matches beyond this point
					DifferenceDescription tempDiffDescription = new DifferenceDescription();
					int nextCheckToken = subInstructions.get(firstNormalTextIndex).match(matchTokens, nextMatchingStart, endToken, originalText, tempDiffDescription, tokenToLocation, true);
					int nextCheckSubInstruction = firstNormalTextIndex + 1;
					while (nextCheckToken > 0 &&
							nextCheckToken - tokenAfterMatch < MIN_TOKENS_NORMAL_TEXT_SEARCH &&
							nextCheckSubInstruction < subInstructions.size()) {
						nextCheckToken = subInstructions.get(nextCheckSubInstruction++).match(matchTokens, nextCheckToken, endToken, originalText, tempDiffDescription, tokenToLocation, true);
					}
					if (nextCheckToken < 0) {
						// we didn't match enough, move on to the next
						nextMatchingStart = nextMatchingStart + 1;
						tokenAfterMatch = compareText(textTokens, matchTokens, nextMatchingStart, endToken, null);
					} else {
						retval.add(nextMatchingStart);
						foundEnoughTokens = true;
					}
				}
			}
			return retval;
		}
		
		/**
		 * Determine the number of tokens matched from the compare text
		 * @param text
		 * @param end End of matching text
		 * @return
		 */
		private int numTokensMatched(String text, int end) {
			if (text.trim().isEmpty()) {
				return 0;
			}
			if (end == 0) {
				return 0;
			}
			Map temp = new HashMap();
			String subText = text.substring(0, end);
			String[] tokenizedString = LicenseCompareHelper.tokenizeLicenseText(subText, temp);
			return tokenizedString.length;
		}

		/**
		 * Match to a variable rule
		 * @param matchingStartTokens List of indexes for the start tokens for the next normal text
		 * @param matchTokens Tokens to match against
		 * @param startToken Index of the first token to search for the match
		 * @param endToken Index of the last token to search for the match
		 * @param originalText Original text used go generate the matchTokens
		 * @param differences Any differences found
		 * @param tokenToLocation Map of token index to line/column where the token was found in the original text
		 * @return the index of the token after the find or -1 if the text did not match
		 */
		private int matchVariable(List matchingStartTokens, String[] matchTokens, int startToken, int endToken, 
				String originalText, DifferenceDescription differences, Map tokenToLocation) {
			
			if (differences.isDifferenceFound()) {
				return -1;
			}
			for (int matchingStartToken:matchingStartTokens) {
				String compareText = LicenseCompareHelper.locateOriginalText(originalText, startToken, matchingStartToken-1, tokenToLocation, matchTokens);
				Pattern matchPattern = Pattern.compile(rule.getMatch(), Pattern.CASE_INSENSITIVE | Pattern.DOTALL);
				Matcher matcher = matchPattern.matcher(compareText);
				if (!matcher.find() || matcher.start() > 0) {
					continue;
				} else {
					int numMatched = numTokensMatched(compareText, matcher.end());
					return startToken + numMatched;
				}
			}
			// if we got here, there was no match found
			differences.addDifference(tokenToLocation.get(startToken), LicenseCompareHelper.getTokenAt(matchTokens, startToken), "Variable text rule "+rule.getName()+" did not match the compare text",
					null, rule, getLastOptionalDifference());
			return -1;
		}

		/**
		 * @return The difference description for the last optional rule which did not match
		 */
		public DifferenceDescription getLastOptionalDifference() {
			if (this.lastOptionalDifference != null) {
				return this.lastOptionalDifference;
			} else if (this.parent != null) {
				return parent.getLastOptionalDifference();
			} else {
				return null;
			}
		}
		
		public void setLastOptionalDifference(DifferenceDescription optionalDifference) {
			if (optionalDifference != null && optionalDifference.getDifferenceMessage() != null && !optionalDifference.getDifferenceMessage().isEmpty()) {
				this.lastOptionalDifference = optionalDifference;
				if (this.parent != null) {
					this.parent.setLastOptionalDifference(optionalDifference);
				}
			}
		}

		/**
		 * @return true if the instruction following this instruction is a beginOptional rule containing text with a single token
		 */
		public boolean isFollowingInstructionOptionalSingleToken() {
			if (parent == null) {
				return false;
			}
			ParseInstruction nextInstruction = parent.findFollowingInstruction(this);
			if (nextInstruction == null || nextInstruction.getRule() == null) {
				return false;
			} else {
				if (!RuleType.BEGIN_OPTIONAL.equals(nextInstruction.getRule().getType())) {
					return false;
				}
				if (nextInstruction.getSubInstructions().size() != 1) {
					return false;
				}
				String optionalText = nextInstruction.getSubInstructions().get(0).getText();
				return LicenseCompareHelper.isSingleTokenString(optionalText);
			}
		}

		/**
		 * @param parseInstruction subInstruction to find the next parse instruction after
		 * @return the next instruction after parseInstruction in the subInstructions
		 */
		private ParseInstruction findFollowingInstruction(ParseInstruction parseInstruction) {
			if (parseInstruction == null) {
				return null;
			}
			for (int i = 0; i < subInstructions.size(); i++) {
				if (parseInstruction.equals(subInstructions.get(i))) {
					if (subInstructions.size() > i+1) {
						return subInstructions.get(i+1);
					} else if (parent == null) {
						return null;
					} else {
						return parent.findFollowingInstruction(this);
					}
				}
			}
			return null;	// instruction not found
		}

		/**
		 * @return the tokens from the next group of optional
		 */
		public String[] getNextOptionalTextTokens() {
			if (parent == null) {
				return new String[0];
			}
			ParseInstruction nextInstruction = parent.findFollowingInstruction(this);
			if (nextInstruction == null || nextInstruction.getRule() == null) {
				return new String[0];
			} else {
				if (!RuleType.BEGIN_OPTIONAL.equals(nextInstruction.getRule().getType())) {
					return new String[0];
				}
				StringBuilder sb = new StringBuilder();
				for (ParseInstruction inst:nextInstruction.getSubInstructions()) {
					if (inst.getText() != null) {
						sb.append(inst.getText());
					}
				}
				Map temp = Maps.newHashMap();
				return LicenseCompareHelper.tokenizeLicenseText(sb.toString(), temp);
			}
		}

		/**
		 * Skip the next instruction
		 */
		public void skipNextInstruction() {
			if (parent == null) {
				return;
			}
			ParseInstruction nextInst = parent.findFollowingInstruction(this);
			nextInst.setSkip(true);
		}
		
		public boolean getSkip() {
			return this.skip ;
		}
		
		public void setSkip(boolean skip) {
			this.skip = skip;
		}

		/**
		 * @return the next sibling parse instruction which is just text (no rules)
		 */
		public ParseInstruction getNextNormalTextInstruction() {
			if (this.parent == null) {
				return null;
			}
			List siblings = parent.getSubInstructions();
			int mySiblingIndex = -1;
			for (int i = 0; i < siblings.size(); i++) {
				if (this.equals(siblings.get(i))) {
					mySiblingIndex = i;
					break;
				}
			}
			if (mySiblingIndex < 0) {
				return null;
			}
			int nextOptionalIndex = -1;
			for (int i = mySiblingIndex + 1; i < siblings.size(); i++) {
				if (siblings.get(i).getRule() != null && RuleType.BEGIN_OPTIONAL.equals(siblings.get(i).getRule().getType())) {
					nextOptionalIndex = i;
					break;
				}
			}
			if (nextOptionalIndex > 0) {
				for (int i = nextOptionalIndex + 1; i < siblings.size(); i++) {
					if (siblings.get(i).getText() != null) {
						return siblings.get(i);
					}
				}
				return null; // Note - we could go up to the parent to look for the next text token, but this is getting messy enough as it is
			} else {
				return parent.getNextNormalTextInstruction();
			}
		}

		/**
		 * @param skipFirstTextToken if true, the first text token will be skipped
		 */
		public void setSkipFirstToken(boolean skipFirstTextToken) {
			this.skipFirstTextToken = skipFirstTextToken;
		}
		
		/**
		 * @return true if the first text token should be skipped
		 */
		public boolean isSkipFirstTextToken() {
			return this.skipFirstTextToken;
		}
	}
	
	public class DifferenceDescription {
		private static final int MAX_DIFF_TEXT_LENGTH = 100;
		private boolean differenceFound;
		private String differenceMessage;
		private List differences;
		
		public DifferenceDescription(boolean differenceFound, String differenceMessage, List differences) {
			this.differenceFound = differenceFound;
			this.differenceMessage = differenceMessage;
			this.differences = differences;
		}

		public DifferenceDescription() {
			this.differenceFound = false;
			this.differenceMessage = "No difference found";
			this.differences = Lists.newArrayList();
		}

		public boolean isDifferenceFound() {
			return differenceFound;
		}

		public void setDifferenceFound(boolean differenceFound) {
			this.differenceFound = differenceFound;
		}

		public String getDifferenceMessage() {
			return differenceMessage;
		}

		public void setDifferenceMessage(String differenceMessage) {
			this.differenceMessage = differenceMessage;
		}

		public List getDifferences() {
			return differences;
		}

		public void setDifferences(List differences) {
			this.differences = differences;
		}
		
		/**
		 * @param location Location in the text of the difference
		 * @param token Token causing the difference
		 * @param msg Message for the difference
		 * @param text Template text being compared to
		 * @param rule Template rule where difference was found
		 * @param lastOptionalDifference The difference for the last optional difference that failed
		 */
		public void addDifference(LineColumn location, String token, String msg, String text, 
				LicenseTemplateRule rule, DifferenceDescription lastOptionalDifference) {
			if (token == null) {
				token = "";
			}
			if (msg == null) {
				msg = "UNKNOWN (null)";
			}
			this.differenceMessage = msg;
			if (location != null) {
				this.differenceMessage = this.differenceMessage + " starting at line #"+
						String.valueOf(location.getLine())+ " column #" +
						String.valueOf(location.getColumn())+" \""+
						token+"\"";
				this.differences.add(location);
			} else {
				this.differenceMessage = this.differenceMessage + " at end of text";
			}
			if (text != null) {
				this.differenceMessage = this.differenceMessage + " when comparing to template text \"";
				if (text.length() > MAX_DIFF_TEXT_LENGTH) {
					this.differenceMessage = this.differenceMessage + 
							text.substring(0, MAX_DIFF_TEXT_LENGTH) + "...\"";
				} else {
					this.differenceMessage = this.differenceMessage + text + "\"";
				}
			}
			if (rule != null) {
				this.differenceMessage = this.differenceMessage + " while processing rule " + rule.toString();
			}
			if (lastOptionalDifference != null) {
				this.differenceMessage = this.differenceMessage + 
						".  Last optional text was not found due to the optional difference: \n\t" + 
						lastOptionalDifference.getDifferenceMessage();
			}
			this.differenceFound = true;
		}
	}

	String[] compareTokens = new String[0];
	String compareText = "";
	Map tokenToLocation = new HashMap();
	ParseInstruction topLevelInstruction = new ParseInstruction(null, null, null);
	DifferenceDescription differences = new DifferenceDescription();
	ParseInstruction currentOptionalInstruction = null;
	boolean parsingComplete = false;
	
	/**
	 * @param compareText Text to compare the parsed SPDX license template to
	 * @throws IOException This is not to be expected since we are using StringReaders
	 */
	public CompareTemplateOutputHandler(String compareText) throws IOException {
		this.compareText = LicenseCompareHelper.normalizeText(compareText);
		this.compareTokens = LicenseCompareHelper.tokenizeLicenseText(this.compareText, tokenToLocation);
	}
	
	/**
	 * @param textTokens
	 * @param matchTokens
	 * @param startToken
	 * @param endToken
	 * @param instruction
	 * @return positive index of the next match token after the match or negative index of the token which first failed the match
	 */
	private int compareText(String[] textTokens, String[] matchTokens, int startToken, int endToken,
			ParseInstruction instruction) {
		int textTokenCounter = 0;
		String nextTextToken = LicenseCompareHelper.getTokenAt(textTokens, textTokenCounter++);
		int matchTokenCounter = startToken;
		String nextMatchToken = LicenseCompareHelper.getTokenAt(matchTokens, matchTokenCounter++);
		while (nextTextToken != null) {
			if (nextMatchToken == null) {
				// end of compare text stream
				while (nextTextToken != null && LicenseCompareHelper.canSkip(nextTextToken)) {
					nextTextToken = LicenseCompareHelper.getTokenAt(textTokens, textTokenCounter++);
				}
				if (nextTextToken != null) {
					return -matchTokenCounter;	// there is more stuff in the compare license text, so not equiv.
				}
			} else if (LicenseCompareHelper.tokensEquivalent(nextTextToken, nextMatchToken)) { 
				// just move onto the next set of tokens
				nextTextToken = LicenseCompareHelper.getTokenAt(textTokens, textTokenCounter++);
				if (nextTextToken != null) {
					nextMatchToken = LicenseCompareHelper.getTokenAt(matchTokens, matchTokenCounter++);
				}
			} else {
				// see if we can skip through some compare tokens to find a match
				while (nextMatchToken != null && LicenseCompareHelper.canSkip(nextMatchToken)) {
					nextMatchToken = LicenseCompareHelper.getTokenAt(matchTokens, matchTokenCounter++);
				}
				// just to be sure, skip forward on the text
				while (nextTextToken != null && LicenseCompareHelper.canSkip(nextTextToken)) {
					nextTextToken = LicenseCompareHelper.getTokenAt(textTokens, textTokenCounter++);
				}
				if (LicenseCompareHelper.tokensEquivalent(nextMatchToken, nextTextToken)) {
					nextTextToken = LicenseCompareHelper.getTokenAt(textTokens, textTokenCounter++);
					if (nextTextToken != null) {
						nextMatchToken = LicenseCompareHelper.getTokenAt(compareTokens, matchTokenCounter++);
					}	
				} else {
					if (textTokenCounter == textTokens.length &&
							instruction != null &&
							instruction.isFollowingInstructionOptionalSingleToken() &&
							nextMatchToken != null) {
						//This is the special case where there may be optional characters which are
						//less than a token at the end of a compare
						//Yes - this is a bit of a hack
						String compareToken = nextTextToken + instruction.getNextOptionalTextTokens()[0];
						if (LicenseCompareHelper.tokensEquivalent(compareToken, nextMatchToken)) {
							instruction.skipNextInstruction();
							return matchTokenCounter;
						} else {
							ParseInstruction nextNormal = instruction.getNextNormalTextInstruction();
							String nextNormalText = LicenseCompareHelper.getFirstLicenseToken(nextNormal.getText());
							if (nextNormalText != null) {
								compareToken = compareToken + nextNormalText;
								String compareWithoutOptional = nextTextToken + nextNormalText;
								if (LicenseCompareHelper.tokensEquivalent(compareToken, nextMatchToken) ||
										LicenseCompareHelper.tokensEquivalent(compareWithoutOptional, nextMatchToken)) {
									instruction.skipNextInstruction();
									nextNormal.setSkipFirstToken(true);
									return matchTokenCounter;
								}
							}
						}
					}
					return -matchTokenCounter;
				}
			}
		}
		return matchTokenCounter;
	}

	/* (non-Javadoc)
	 * @see org.spdx.licenseTemplate.ILicenseTemplateOutputHandler#text(java.lang.String)
	 */
	@Override
	public void text(String text) {
		if (currentOptionalInstruction != null) {
			currentOptionalInstruction.addSubInstruction(new ParseInstruction(null, text, currentOptionalInstruction));
		} else {
			this.topLevelInstruction.addSubInstruction(new ParseInstruction(null, text, null));
		}
	}

	/* (non-Javadoc)
	 * @see org.spdx.licenseTemplate.ILicenseTemplateOutputHandler#variableRule(org.spdx.licenseTemplate.LicenseTemplateRule)
	 */
	@Override
	public void variableRule(LicenseTemplateRule rule) {
		if (currentOptionalInstruction != null) {
			currentOptionalInstruction.addSubInstruction(new ParseInstruction(rule, null, currentOptionalInstruction));
		} else {
			this.topLevelInstruction.addSubInstruction(new ParseInstruction(rule, null, null));
		}
	}


	/* (non-Javadoc)
	 * @see org.spdx.licenseTemplate.ILicenseTemplateOutputHandler#beginOptional(org.spdx.licenseTemplate.LicenseTemplateRule)
	 */
	@Override
	public void beginOptional(LicenseTemplateRule rule) {
		ParseInstruction optionalInstruction = new ParseInstruction(rule, null, currentOptionalInstruction);
		if (currentOptionalInstruction != null) {
			currentOptionalInstruction.addSubInstruction(optionalInstruction);
		} else {
			this.topLevelInstruction.addSubInstruction(optionalInstruction);
		}
		this.currentOptionalInstruction = optionalInstruction;
	}

	/* (non-Javadoc)
	 * @see org.spdx.licenseTemplate.ILicenseTemplateOutputHandler#endOptional(org.spdx.licenseTemplate.LicenseTemplateRule)
	 */
	@Override
	public void endOptional(LicenseTemplateRule rule) {
		if (currentOptionalInstruction != null) {
			currentOptionalInstruction = currentOptionalInstruction.getParent();
			if (currentOptionalInstruction == null || currentOptionalInstruction.getRule() == null || currentOptionalInstruction.getRule().getType() != RuleType.BEGIN_OPTIONAL) {
				currentOptionalInstruction = null;
			}
		}
	}

	/**
	 * Performs the actual parsing if it has not been completed.  NOTE: This should only be called after all text has been added.
	 * @return true if no differences were found
	 * @throws LicenseParserException 
	 */
	public boolean matches() throws LicenseParserException {
		if (!parsingComplete) {
			throw new LicenseParserException("Matches was called prior to completing the parsing.  The method competeParsing() most be called prior to calling matches()");
		}
		return !this.differences.isDifferenceFound();
	}
	
	/**
	 * @return details on the differences found
	 */
	public DifferenceDescription getDifferences() {
		return this.differences;
	}

	/* (non-Javadoc)
	 * @see org.spdx.licenseTemplate.ILicenseTemplateOutputHandler#completeParsing()
	 */
	@Override
	public void completeParsing() throws LicenseParserException {
		this.topLevelInstruction.match(compareTokens, 0, compareTokens.length-1, compareText, differences, tokenToLocation);
		parsingComplete = true;
	}

	/**
	 * Compares the text against the compareText
	 * @param text text to compare
	 * @param startToken token of the compareText to being the comparison
	 * @return next token index (positive) if there is a match, negative first token where this is a miss-match if no match
	 */
	public int textEquivalent(String text, int startToken) {
		Map textLocations = new HashMap();
		String[] textTokens = LicenseCompareHelper.tokenizeLicenseText(LicenseCompareHelper.normalizeText(text), textLocations);
		return this.compareText(textTokens, this.compareTokens, startToken, this.compareTokens.length-1, null);	
	}

}