gr.uom.java.xmi.diff.UMLCommentListDiff Maven / Gradle / Ivy

Go to download
package gr.uom.java.xmi.diff;

import java.text.BreakIterator;
import java.util.ArrayList;
import java.util.Collections;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;

import org.apache.commons.lang3.tuple.Pair;

import gr.uom.java.xmi.UMLComment;

public class UMLCommentListDiff {
	private List> commonComments;
	private List deletedComments;
	private List addedComments;
	private boolean manyToManyReformat;

	public UMLCommentListDiff(List commentsBefore, List commentsAfter) {
		this.commonComments = new ArrayList>();
		this.deletedComments = new ArrayList();
		this.addedComments = new ArrayList();
		List deletedComments = new ArrayList(commentsBefore);
		List addedComments = new ArrayList(commentsAfter);
		if(commentsBefore.size() <= commentsAfter.size()) {
			for(UMLComment comment : commentsBefore) {
				List matchingIndices = findAllMatchingIndices(commentsAfter, comment);
				for(Integer index : matchingIndices) {
					if(!alreadyMatchedComment(comment, commentsAfter.get(index))) {
						Pair pair = Pair.of(comment, commentsAfter.get(index));
						commonComments.add(pair);
						deletedComments.remove(comment);
						addedComments.remove(commentsAfter.get(index));
						break;
					}
				}
			}
		}
		else {
			for(UMLComment comment : commentsAfter) {
				List matchingIndices = findAllMatchingIndices(commentsBefore, comment);
				for(Integer index : matchingIndices) {
					if(!alreadyMatchedComment(commentsBefore.get(index), comment)) {
						Pair pair = Pair.of(commentsBefore.get(index), comment);
						commonComments.add(pair);
						deletedComments.remove(commentsBefore.get(index));
						addedComments.remove(comment);
						break;
					}
				}
			}
		}
		processModifiedComments(deletedComments, addedComments);
	}

	private List findAllMatchingIndices(List fragments, UMLComment comment) {
		List matchingIndices = new ArrayList<>();
		for(int i=0; i deletedComments, List addedComments) {
		//match comments differing only in opening/closing quotes
		if(deletedComments.size() <= addedComments.size()) {
			for(UMLComment deletedComment : new ArrayList<>(deletedComments)) {
				for(UMLComment addedComment : new ArrayList<>(addedComments)) {
					String trimmed1 = deletedComment.getText().replaceAll("^\"|\"$", "");
					String trimmed2 = addedComment.getText().replaceAll("^\"|\"$", "");
					if(trimmed1.equals(trimmed2)) {
						Pair pair = Pair.of(deletedComment, addedComment);
						commonComments.add(pair);
						deletedComments.remove(deletedComment);
						addedComments.remove(addedComment);
					}
				}
			}
		}
		else {
			for(UMLComment addedComment : new ArrayList<>(addedComments)) {
				for(UMLComment deletedComment : new ArrayList<>(deletedComments)) {
					String trimmed1 = deletedComment.getText().replaceAll("^\"|\"$", "");
					String trimmed2 = addedComment.getText().replaceAll("^\"|\"$", "");
					if(trimmed1.equals(trimmed2)) {
						Pair pair = Pair.of(deletedComment, addedComment);
						commonComments.add(pair);
						deletedComments.remove(deletedComment);
						addedComments.remove(addedComment);
					}
				}
			}
		}
		List deletedToBeDeleted = new ArrayList();
		List addedToBeDeleted = new ArrayList();
		if(deletedComments.size() == addedComments.size()) {
			for(int i=0; i pair = Pair.of(deletedComment, addedComment);
					commonComments.add(pair);
					deletedToBeDeleted.add(deletedComment);
					addedToBeDeleted.add(addedComment);
				}
			}
			deletedComments.removeAll(deletedToBeDeleted);
			addedComments.removeAll(addedToBeDeleted);
		}
		//check if all deleted comments match all added comments
		StringBuilder deletedSB = new StringBuilder();
		List deletedTokenSequence = new ArrayList();
		Map> deletedTokenSequenceMap = new LinkedHashMap<>();
		for(UMLComment deletedComment : deletedComments) {
			String text = deletedComment.getText();
			deletedSB.append(text);
			List splitToWords = splitToWords(text);
			deletedTokenSequence.addAll(splitToWords);
			deletedTokenSequenceMap.put(deletedComment, splitToWords);
		}
		StringBuilder addedSB = new StringBuilder();
		List addedTokenSequence = new ArrayList();
		Map> addedTokenSequenceMap = new LinkedHashMap<>();
		for(UMLComment addedComment : addedComments) {
			String text = addedComment.getText();
			addedSB.append(text);
			List splitToWords = splitToWords(text);
			addedTokenSequence.addAll(splitToWords);
			addedTokenSequenceMap.put(addedComment, splitToWords);
		}
		if(deletedSB.toString().replaceAll("\\s", "").equals(addedSB.toString().replaceAll("\\s", ""))) {
			//make all pair combinations
			for(UMLComment deletedComment : deletedComments) {
				for(UMLComment addedComment : addedComments) {
					Pair pair = Pair.of(deletedComment, addedComment);
					commonComments.add(pair);
				}
			}
			if(deletedComments.size() >= 1 && addedComments.size() >= 1) {
				manyToManyReformat = true;
			}
		}
		else {
			//match comments that one contains a subsequence of the other
			if(deletedTokenSequence.size() <= addedTokenSequence.size()) {
				List longestSubSequence = null;
				for(int i=0; i subList = deletedTokenSequence.subList(i,j+1);
						if(subList.size() > 2) {
							int indexOfSubList = Collections.indexOfSubList(addedTokenSequence, subList);
							if(indexOfSubList != -1) {
								if(longestSubSequence == null) {
									longestSubSequence = subList;
								}
								else if(subList.containsAll(longestSubSequence) && subList.size() > longestSubSequence.size()) {
									longestSubSequence = subList;
								}
							}
						}
					}
					if(longestSubSequence != null && longestSubSequence.equals(deletedTokenSequence)) {
						break;
					}
				}
				if(longestSubSequence != null) {
					//make all pair combinations
					for(UMLComment deletedComment : deletedComments) {
						if(containsAnySubSequence(deletedTokenSequenceMap.get(deletedComment), longestSubSequence)) {
							for(UMLComment addedComment : addedComments) {
								if(containsAnySubSequence(addedTokenSequenceMap.get(addedComment), longestSubSequence)) {
									if(!alreadyMatchedComment(deletedComment, addedComment)) {
										Pair pair = Pair.of(deletedComment, addedComment);
										commonComments.add(pair);
										deletedToBeDeleted.add(deletedComment);
										addedToBeDeleted.add(addedComment);
									}
								}
							}
						}
					}
					if(deletedComments.size() >= 1 && addedComments.size() >= 1) {
						manyToManyReformat = true;
					}
					deletedComments.removeAll(deletedToBeDeleted);
					addedComments.removeAll(addedToBeDeleted);
				}
			}
			else {
				List longestSubSequence = null;
				for(int i=0; i subList = addedTokenSequence.subList(i,j+1);
						if(subList.size() > 2) {
							int indexOfSubList = Collections.indexOfSubList(deletedTokenSequence, subList);
							if(indexOfSubList != -1) {
								if(longestSubSequence == null) {
									longestSubSequence = subList;
								}
								else if(subList.containsAll(longestSubSequence) && subList.size() > longestSubSequence.size()) {
									longestSubSequence = subList;
								}
							}
						}
					}
					if(longestSubSequence != null && longestSubSequence.equals(addedTokenSequence)) {
						break;
					}
				}
				if(longestSubSequence != null) {
					//make all pair combinations
					for(UMLComment deletedComment : deletedComments) {
						if(containsAnySubSequence(deletedTokenSequenceMap.get(deletedComment), longestSubSequence)) {
							for(UMLComment addedComment : addedComments) {
								if(containsAnySubSequence(addedTokenSequenceMap.get(addedComment), longestSubSequence)) {
									if(!alreadyMatchedComment(deletedComment, addedComment) ||
											(longestSubSequence.containsAll(deletedTokenSequenceMap.get(deletedComment)) &&
											deletedTokenSequenceMap.get(deletedComment).size() > 1)) {
										Pair pair = Pair.of(deletedComment, addedComment);
										commonComments.add(pair);
										deletedToBeDeleted.add(deletedComment);
										addedToBeDeleted.add(addedComment);
									}
								}
							}
						}
					}
					if(deletedComments.size() >= 1 && addedComments.size() >= 1) {
						manyToManyReformat = true;
					}
					deletedComments.removeAll(deletedToBeDeleted);
					addedComments.removeAll(addedToBeDeleted);
				}
			}
		}
		if(deletedComments.size() > addedComments.size()) {
			for(UMLComment addedComment : addedComments) {
				String text = addedComment.getText();
				for(int i=0; i matches = findConcatenatedMatch(deletedComments, text, i);
					if(matches.size() > 0) {
						for(UMLComment match : matches) {
							Pair pair = Pair.of(match, addedComment);
							commonComments.add(pair);
							deletedToBeDeleted.add(match);
						}
						addedToBeDeleted.add(addedComment);
						break;
					}
				}
			}
		}
		else {
			for(UMLComment deletedComment : deletedComments) {
				String text = deletedComment.getText();
				for(int i=0; i matches = findConcatenatedMatch(addedComments, text, i);
					if(matches.size() > 0) {
						for(UMLComment match : matches) {
							Pair pair = Pair.of(deletedComment, match);
							commonComments.add(pair);
							addedToBeDeleted.add(match);
						}
						deletedToBeDeleted.add(deletedComment);
						break;
					}
				}
			}
		}
		deletedComments.removeAll(deletedToBeDeleted);
		addedComments.removeAll(addedToBeDeleted);
		//match comments that one contains the other
		if(deletedComments.size() <= addedComments.size()) {
			for(UMLComment deletedComment : new ArrayList<>(deletedComments)) {
				if(deletedComment.getText().length() > 2) {
					for(UMLComment addedComment : new ArrayList<>(addedComments)) {
						if(addedComment.getText().length() > 2) {
							if(deletedComment.getText().contains(addedComment.getText()) || addedComment.getText().contains(deletedComment.getText())) {
								Pair pair = Pair.of(deletedComment, addedComment);
								commonComments.add(pair);
								deletedComments.remove(deletedComment);
								addedComments.remove(addedComment);
							}
						}
					}
				}
			}
		}
		else {
			for(UMLComment addedComment : new ArrayList<>(addedComments)) {
				if(addedComment.getText().length() > 2) {
					for(UMLComment deletedComment : new ArrayList<>(deletedComments)) {
						if(deletedComment.getText().length() > 2) {
							if(deletedComment.getText().contains(addedComment.getText()) || addedComment.getText().contains(deletedComment.getText())) {
								Pair pair = Pair.of(deletedComment, addedComment);
								commonComments.add(pair);
								deletedComments.remove(deletedComment);
								addedComments.remove(addedComment);
							}
						}
					}
				}
			}
		}
		this.deletedComments.addAll(deletedComments);
		this.addedComments.addAll(addedComments);
	}

	private boolean alreadyMatchedComment(UMLComment deletedComment, UMLComment addedComment) {
		for(Pair pair : commonComments) {
			if(pair.getLeft() == deletedComment) {
				if(pair.getLeft().getText().contains(pair.getRight().getText()) || pair.getRight().getText().contains(pair.getLeft().getText()))
					return true;
			}
			if(pair.getRight() == addedComment) {
				if(pair.getLeft().getText().contains(pair.getRight().getText()) || pair.getRight().getText().contains(pair.getLeft().getText()))
					return true;
			}
		}
		return false;
	}

	private List findConcatenatedMatch(List comments, String text, int startIndex) {
		StringBuilder concatText = new StringBuilder();
		for(int i=startIndex; i(comments.subList(startIndex, i+1));
			}
		}
		return Collections.emptyList();
	}

	private boolean containsAnySubSequence(List list, List longestSubSequence) {
		if(list.size() > 1 && Collections.indexOfSubList(longestSubSequence, list) != -1)
			return true;
		for(int i=longestSubSequence.size(); i>1; i--) {
			List subList = longestSubSequence.subList(0,i);
			int index = Collections.indexOfSubList(list, subList);
			if(index != -1) {
				return true;
			}
		}
		for(int i=0; i subList = longestSubSequence.subList(i,longestSubSequence.size());
			int index = Collections.indexOfSubList(list, subList);
			if(index != -1) {
				return true;
			}
		}
		return false;
	}

	private List splitToWords(String sentence) {
		ArrayList words = new ArrayList();
		BreakIterator boundary = BreakIterator.getWordInstance();
		boundary.setText(sentence);
		int start = boundary.first();
		for (int end = boundary.next();
				end != BreakIterator.DONE;
				start = end, end = boundary.next()) {
			String word = sentence.substring(start,end);
			if(!word.isBlank())
				words.add(word);
		}
		return words;
	}

	public List> getCommonComments() {
		return commonComments;
	}

	public List getDeletedComments() {
		return deletedComments;
	}

	public List getAddedComments() {
		return addedComments;
	}

	public boolean isManyToManyReformat() {
		return manyToManyReformat;
	}
}