org.conqat.engine.commons.findings.location.LocationAdjuster Maven / Gradle / Ivy
/*-------------------------------------------------------------------------+
| |
| Copyright 2005-2011 the ConQAT Project |
| |
| Licensed under the Apache License, Version 2.0 (the "License"); |
| you may not use this file except in compliance with the License. |
| You may obtain a copy of the License at |
| |
| http://www.apache.org/licenses/LICENSE-2.0 |
| |
| Unless required by applicable law or agreed to in writing, software |
| distributed under the License is distributed on an "AS IS" BASIS, |
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| See the License for the specific language governing permissions and |
| limitations under the License. |
+-------------------------------------------------------------------------*/
package org.conqat.engine.commons.findings.location;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.conqat.lib.commons.algo.Diff;
import org.conqat.lib.commons.algo.Diff.Delta;
import org.conqat.lib.commons.region.LineBasedRegion;
import org.conqat.lib.commons.region.Region;
import org.conqat.lib.commons.string.LineOffsetConverter;
/**
* This class is used for adjusting the offsets used in locations (i.e.
* subclasses of {@link ElementLocation} for text that is slightly modified. The
* main use-case is the update of locations where the local (adjusted) text has
* different line ending, different content due to keyword expansion, or minor
* local modifications compared to the text on which the analysis was executed
* (original text).
*
* Both the original and adjusted text may have arbitrary line endings.
*
* The implementation is based on a token diff, which can lead to minor
* deviations for offsets that are not aligned with token boundaries. A
* character diff would be more precise, but is too performance and memory
* intensive for large files.
*/
public class LocationAdjuster implements ILineAdjuster {
/**
* If the number of tokens in the adjusted region differs by the tokens in the
* original region by more than this factor, the mapping is counted as wrong.
*/
private static final double LOSS_FACTOR = 2;
/** Maximal number of tokens in the diff we accept. */
private static final int MAX_DIFF_SIZE = 5000;
/**
* Pattern defining tokens for the diff. Matches either alphanumeric strings
* (typical identifiers), or single non-whitespace characters.
*/
private static final Pattern TOKEN_PATTERN = Pattern.compile("[a-zA-Z0-9_]+|\\S");
/** The tokens of the original string. */
private final List originalTokens;
/**
* Adjusted tokens corresponding to the {@link #originalTokens}. If there is no
* corresponding token, this list contains null at the index. If the content
* could not be matched/adjusted at all (too many differences), this field is
* null.
*/
private final List mappedAdjustedTokens;
/** Line offset converted for the original text. */
private final LineOffsetConverter originalLineOffsetConverter;
/** Line offset converted for the adjusted text. */
private final LineOffsetConverter adjustedLineOffsetConverter;
/**
* Optional uniform path to correct element locations of
* {@link #adjustLocation(ElementLocation)}. Null if no correction will be
* performed.
*/
private final String adjustedUniformPath;
/**
* Constructor.
*
* WARNING: Creating a location adjuster is very expensive and should only be
* done once per file. In case originalText and adjustedText is identical take a
* look at {@link SimpleValidLinesFilter}.
*
* @param originalText
* the text for which the input locations have been created, i.e. the
* text from the analysis. May be null
, in which case no
* adjustment is performed.
* @param adjustedText
* the text for which the locations should be adjusted, i.e. the
* local text.
* @param adjustedUniformPath
* the adjusted uniform path for adjusted findings.
*/
public LocationAdjuster(String originalText, String adjustedText, String adjustedUniformPath) {
if (originalText == null) {
originalText = adjustedText;
}
this.adjustedUniformPath = adjustedUniformPath;
originalLineOffsetConverter = new LineOffsetConverter(originalText);
adjustedLineOffsetConverter = new LineOffsetConverter(adjustedText);
originalTokens = toTokens(originalText);
mappedAdjustedTokens = calculateMappedAdjustedTokens(adjustedText, originalTokens);
}
/**
* Constructor.
*
* @param originalText
* the text for which the input locations have been created, i.e. the
* text from the analysis.
* @param adjustedText
* the text for which the locations should be adjusted, i.e. the
* local text.
*/
public LocationAdjuster(String originalText, String adjustedText) {
this(originalText, adjustedText, null);
}
/**
* Calculates the #mappedAdjustedTokens based on original tokens and adjusted
* text. May return null if adjustment is not possible due too many changes.
*/
private static List calculateMappedAdjustedTokens(String adjustedText,
List originalTokens) {
List adjustedTokens = toTokens(adjustedText);
Delta delta = Diff.computeDelta(originalTokens, adjustedTokens, MAX_DIFF_SIZE);
if (delta.getSize() >= MAX_DIFF_SIZE) {
return null;
}
return calculateMappedAdjustedTokensFromDelta(delta, originalTokens, adjustedTokens);
}
/**
* Calculates the #mappedAdjustedTokens based on original tokens, adjusted text,
* and delta.
*/
private static List calculateMappedAdjustedTokensFromDelta(Delta delta,
List originalTokens, List adjustedTokens) {
List mappedAdjustedTokens = new ArrayList<>(Collections.nCopies(originalTokens.size(), null));
int originalIndex = 0;
int adjustedIndex = 0;
for (int i = 0; i < delta.getSize(); ++i) {
int position = delta.getPosition(i);
if (position > 0) {
position -= 1;
while (adjustedIndex < position) {
mappedAdjustedTokens.set(originalIndex++, adjustedTokens.get(adjustedIndex++));
}
adjustedIndex += 1;
} else {
position = -position - 1;
while (originalIndex < position) {
mappedAdjustedTokens.set(originalIndex++, adjustedTokens.get(adjustedIndex++));
}
originalIndex += 1;
}
}
while (originalIndex < originalTokens.size()) {
mappedAdjustedTokens.set(originalIndex++, adjustedTokens.get(adjustedIndex++));
}
return mappedAdjustedTokens;
}
/** Splits a string into tokens. */
private static List toTokens(String s) {
List tokens = new ArrayList<>();
Matcher matcher = TOKEN_PATTERN.matcher(s);
while (matcher.find()) {
tokens.add(new AdjusterToken(matcher.group(), matcher.start()));
}
return tokens;
}
/**
* Maps a zero-based offset range (both inclusive) to the adjusted string.
* Returns null
if the region could not be approximately mapped.
*/
public Region getAdjustedRegion(int originalStartOffset, int originalEndOffset) {
if (mappedAdjustedTokens == null) {
return null;
}
Region originalIndexRegion = findOriginalIndexRegion(originalStartOffset, originalEndOffset);
if (originalIndexRegion.isEmpty()) {
return null;
}
int numOriginalTokens = originalIndexRegion.getLength();
int numAdjustedTokens = 0;
AdjusterToken firstAdjustedToken = null;
AdjusterToken lastAdjustedToken = null;
for (int i = originalIndexRegion.getStart(); i <= originalIndexRegion.getEnd(); ++i) {
AdjusterToken adjustedToken = mappedAdjustedTokens.get(i);
if (adjustedToken != null) {
numAdjustedTokens += 1;
if (firstAdjustedToken == null) {
firstAdjustedToken = adjustedToken;
}
lastAdjustedToken = adjustedToken;
}
}
if (firstAdjustedToken == null || lastAdjustedToken == null
|| LOSS_FACTOR * numAdjustedTokens < numOriginalTokens) {
return null;
}
return new Region(firstAdjustedToken.startOffset, lastAdjustedToken.endOffset);
}
/**
* Returns the region of indexes in the {@link #originalTokens} contained in the
* given offsets.
*/
private Region findOriginalIndexRegion(int originalStartOffset, int originalEndOffset) {
AdjusterToken searchToken = new AdjusterToken(null, originalStartOffset, originalEndOffset);
int originalStartTokenIndex = Collections.binarySearch(originalTokens, searchToken,
AdjusterToken.COMPARE_BY_START_OFFSET);
if (originalStartTokenIndex < 0) {
originalStartTokenIndex = -originalStartTokenIndex - 1;
}
int originalEndTokenIndex = Collections.binarySearch(originalTokens, searchToken,
AdjusterToken.COMPARE_BY_END_OFFSET);
if (originalEndTokenIndex < 0) {
// we want insertion point -1
originalEndTokenIndex = -originalEndTokenIndex - 2;
}
return new Region(originalStartTokenIndex, originalEndTokenIndex);
}
/**
* Returns a new location with adjusted offsets (if necessary). Returns
* null
if the location cannot be mapped to the adjusted text.
*/
public ElementLocation adjustLocation(ElementLocation location) {
if (location instanceof TextRegionLocation) {
return adjustLocation((TextRegionLocation) location);
}
// other locations do not have offsets, if the uniform path should not
// be adjusted simply return the original location.
if (adjustedUniformPath == null) {
return location;
}
if (location instanceof QualifiedNameLocation) {
return new QualifiedNameLocation(((QualifiedNameLocation) location).getQualifiedName(),
location.getLocation(), adjustedUniformPath);
}
return new ElementLocation(location.getLocation(), adjustedUniformPath);
}
/**
* Returns a new location with adjusted offsets (if necessary). Returns
* null
if the cannot be mapped to the adjusted text.
*/
public TextRegionLocation adjustLocation(TextRegionLocation location) {
int startOffset = location.getRawStartOffset();
int endOffset = location.getRawEndOffset();
if (startOffset < 0) {
int startLine = location.getRawStartLine();
if (!originalLineOffsetConverter.isValidLine(startLine)) {
return null;
}
startOffset = originalLineOffsetConverter.getOffset(startLine);
}
if (endOffset < 0) {
int endLine = location.getRawEndLine() + 1;
if (!originalLineOffsetConverter.isValidLine(endLine)) {
return null;
}
endOffset = originalLineOffsetConverter.getOffset(endLine) - 1;
}
Region adjustedOffsets = getAdjustedRegion(startOffset, endOffset);
if (adjustedOffsets == null || adjustedOffsets.isEmpty()) {
return null;
}
String uniformPath = location.getUniformPath();
if (adjustedUniformPath != null) {
uniformPath = adjustedUniformPath;
}
int newStartOffset = adjustedOffsets.getStart();
int newEndOffset = adjustedOffsets.getEnd();
return new TextRegionLocation(location.getLocation(), uniformPath, newStartOffset, newEndOffset,
adjustedLineOffsetConverter.getLine(newStartOffset), adjustedLineOffsetConverter.getLine(newEndOffset));
}
/**
* Adjusts the location of a single line. This only respects the token part of a
* line, i.e. leading and trailing whitespace of a line will be ignored. This
* method is robust w.r.t lines numbers that are out of the range of the
* original text. In case of such an invalid line, the line is logged as error
* to the given logger and null
is returned.
*
* @param line
* the one-based line number of be adjusted.
* @param invalidLines
* used for collecting invalid lines.
*
* @return the one-based lines encoded as a region, as a line may map to
* multiple lines after changing. This may also return null, if no
* non-empty lines could be found that correspond to the input line
* after adjustment.
*/
@Override
public LineBasedRegion adjustLine(int line, Set invalidLines) {
if (!originalLineOffsetConverter.isValidLine(line) || !originalLineOffsetConverter.isValidLine(line + 1)) {
invalidLines.add(line);
return null;
}
int originalStartOffset = originalLineOffsetConverter.getOffset(line);
int originalEndOffset = originalLineOffsetConverter.getOffset(line + 1) - 1;
Region adjustedOffsets = getAdjustedRegion(originalStartOffset, originalEndOffset);
if (adjustedOffsets == null) {
return null;
}
int adjustedStartLine = adjustedLineOffsetConverter.getLine(adjustedOffsets.getStart());
int adjustedEndLine = adjustedLineOffsetConverter.getLine(adjustedOffsets.getEnd());
return new LineBasedRegion(adjustedStartLine, adjustedEndLine);
}
/** Returns the line count of the original text */
@Override
public int getOriginalLineCount() {
return originalLineOffsetConverter.getLineCount();
}
/** Simple token representation used in location adjustment. */
private static class AdjusterToken {
/** Compares by start offset. */
private static final Comparator COMPARE_BY_START_OFFSET = Comparator
.comparingInt(token -> token.startOffset);
/** Compares by end offset. */
private static final Comparator COMPARE_BY_END_OFFSET = Comparator
.comparingInt(token -> token.endOffset);
/** The text content. */
private final String text;
/** The start offset in the text. */
private final int startOffset;
/** The inclusive end offset in the text. */
private final int endOffset;
/** Constructor. */
private AdjusterToken(String text, int startOffset) {
this(text, startOffset, startOffset + text.length() - 1);
}
/** Constructor. */
private AdjusterToken(String text, int startOffset, int endOffset) {
this.text = text;
this.startOffset = startOffset;
this.endOffset = endOffset;
}
/** {@inheritDoc} */
@Override
public boolean equals(Object obj) {
return (obj instanceof AdjusterToken) && ((AdjusterToken) obj).text.equals(text);
}
/** {@inheritDoc} */
@Override
public int hashCode() {
return text.hashCode();
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy