All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.conqat.engine.commons.findings.location.LocationAdjuster Maven / Gradle / Ivy

The newest version!
/*
 * Copyright (c) CQSE GmbH
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.conqat.engine.commons.findings.location;

import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import java.util.Set;
import java.util.function.Supplier;

import org.checkerframework.checker.nullness.qual.Nullable;
import org.conqat.lib.commons.algo.Diff;
import org.conqat.lib.commons.algo.Diff.Delta;
import org.conqat.lib.commons.region.LineBasedRegion;
import org.conqat.lib.commons.region.Region;
import org.conqat.lib.commons.string.LineOffsetConverter;

import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Suppliers;

/**
 * This class is used for adjusting the offsets used in locations (i.e. subclasses of
 * {@link ElementLocation} for text that is slightly modified. The main use-case is the update of
 * locations where the local (adjusted) text has different line ending, different content due to
 * keyword expansion, or minor local modifications compared to the text on which the analysis was
 * executed (original text).
 * 

* Both the original and adjusted text may have arbitrary line endings. *

* The implementation is based on a token diff, which can lead to minor deviations for offsets that * are not aligned with token boundaries. A character diff would be more precise, but is too * performance and memory intensive for large files. */ public class LocationAdjuster implements ILineAdjuster { /** * If the number of tokens in the adjusted region differs by the tokens in the original region by * more than this factor, the mapping is counted as wrong. */ private static final double LOSS_FACTOR = 2; /** Maximal number of tokens in the diff we accept. */ private static final int MAX_DIFF_SIZE = 5000; /** The tokens of the original string. */ private final Supplier> lazyOriginalTokens; /** * Adjusted tokens corresponding to the {@link #lazyOriginalTokens}. If there is no corresponding * token, this list contains null at the index. If the content could not be matched/adjusted at all * (too many differences), this field is null. */ private final Supplier> lazyMappedAdjustedTokens; /** Line offset converted for the original text. */ private final LineOffsetConverter originalLineOffsetConverter; /** Line offset converted for the adjusted text. */ private final LineOffsetConverter adjustedLineOffsetConverter; /** * Optional uniform path to correct element locations of {@link #adjustLocation(ElementLocation)}. * Null if no correction will be performed. */ private final String adjustedUniformPath; /** * Constructor. *

* WARNING: Creating a location adjuster is very expensive and should only be done once per file. In * case originalText and adjustedText is identical take a look at {@link SimpleValidLinesFilter}. * * @param originalText * the text for which the input locations have been created, i.e. the text from the * analysis. May be {@code null}, in which case no adjustment is performed. * @param adjustedText * the text for which the locations should be adjusted, i.e. the local text. * @param adjustedUniformPath * the adjusted uniform path for adjusted findings. */ public LocationAdjuster(@Nullable String originalText, String adjustedText, String adjustedUniformPath) { this.adjustedUniformPath = adjustedUniformPath; if (originalText != null) { originalLineOffsetConverter = new LineOffsetConverter(originalText); lazyOriginalTokens = Suppliers.memoize(() -> toTokens(originalText)); } else { originalLineOffsetConverter = new LineOffsetConverter(adjustedText); lazyOriginalTokens = Suppliers.memoize(() -> toTokens(adjustedText)); } adjustedLineOffsetConverter = new LineOffsetConverter(adjustedText); lazyMappedAdjustedTokens = Suppliers .memoize(() -> calculateMappedAdjustedTokens(adjustedText, lazyOriginalTokens.get())); } /** * Constructor. * * @param originalText * the text for which the input locations have been created, i.e. the text from the * analysis. * @param adjustedText * the text for which the locations should be adjusted, i.e. the local text. */ public LocationAdjuster(String originalText, String adjustedText) { this(originalText, adjustedText, null); } /** * Calculates the #mappedAdjustedTokens based on original tokens and adjusted text. May return null * if adjustment is not possible due too many changes. */ private static List calculateMappedAdjustedTokens(String adjustedText, List originalTokens) { List adjustedTokens = toTokens(adjustedText); Delta delta = Diff.computeDelta(originalTokens, adjustedTokens, MAX_DIFF_SIZE); if (delta.getSize() >= MAX_DIFF_SIZE) { return null; } return calculateMappedAdjustedTokensFromDelta(delta, originalTokens, adjustedTokens); } /** * Calculates the #mappedAdjustedTokens based on original tokens, adjusted text, and delta. */ private static List calculateMappedAdjustedTokensFromDelta(Delta delta, List originalTokens, List adjustedTokens) { List mappedAdjustedTokens = new ArrayList<>(Collections.nCopies(originalTokens.size(), null)); int originalIndex = 0; int adjustedIndex = 0; for (int i = 0; i < delta.getSize(); ++i) { int position = delta.getPosition(i); if (position > 0) { position -= 1; while (adjustedIndex < position) { mappedAdjustedTokens.set(originalIndex++, adjustedTokens.get(adjustedIndex++)); } adjustedIndex += 1; } else { position = -position - 1; while (originalIndex < position) { mappedAdjustedTokens.set(originalIndex++, adjustedTokens.get(adjustedIndex++)); } originalIndex += 1; } } while (originalIndex < originalTokens.size()) { mappedAdjustedTokens.set(originalIndex++, adjustedTokens.get(adjustedIndex++)); } return mappedAdjustedTokens; } /** * Splits a string into tokens. A token can be: *

    *
  • A single non-identifier character
  • *
  • A string that consists of java identifier characters (see * {@link Character#isJavaIdentifierPart(char)}
  • *
* This means that abc foo<bar> will be split to: * [abc, foo, <, bar, >] * */ @VisibleForTesting static List toTokens(String s) { List tokens = new ArrayList<>(); int lastWordSeparator = -1; for (int i = 0; i < s.length(); i++) { char currentCharacter = s.charAt(i); if (!Character.isJavaIdentifierPart(currentCharacter)) { if (lastWordSeparator != i - 1) { tokens.add(new AdjusterToken(s.substring(lastWordSeparator + 1, i), lastWordSeparator + 1)); } lastWordSeparator = i; if (!Character.isWhitespace(currentCharacter)) { tokens.add(new AdjusterToken(s.substring(i, i + 1), i)); } } } if (lastWordSeparator <= s.length() - 2) { tokens.add(new AdjusterToken(s.substring(lastWordSeparator + 1), lastWordSeparator + 1)); } return tokens; } /** * Maps a zero-based offset range (both inclusive) to the adjusted string. Returns {@code null} if * the region could not be approximately mapped. */ public @Nullable Region getAdjustedRegion(int originalStartOffset, int originalEndOffset) { List mappedAdjustedTokens = lazyMappedAdjustedTokens.get(); if (mappedAdjustedTokens == null) { return null; } Region originalIndexRegion = findOriginalIndexRegion(originalStartOffset, originalEndOffset); if (originalIndexRegion.isEmpty()) { return null; } int numOriginalTokens = originalIndexRegion.getLength(); int numAdjustedTokens = 0; AdjusterToken firstAdjustedToken = null; AdjusterToken lastAdjustedToken = null; for (int i = originalIndexRegion.getStart(); i <= originalIndexRegion.getEnd(); ++i) { AdjusterToken adjustedToken = mappedAdjustedTokens.get(i); if (adjustedToken != null) { numAdjustedTokens += 1; if (firstAdjustedToken == null) { firstAdjustedToken = adjustedToken; } lastAdjustedToken = adjustedToken; } } if (firstAdjustedToken == null || lastAdjustedToken == null || LOSS_FACTOR * numAdjustedTokens < numOriginalTokens) { return null; } return new Region(firstAdjustedToken.startOffset, lastAdjustedToken.endOffset); } /** * Returns the region of indexes in the {@link #lazyOriginalTokens} contained in the given offsets. */ private Region findOriginalIndexRegion(int originalStartOffset, int originalEndOffset) { List originalTokens = lazyOriginalTokens.get(); AdjusterToken searchToken = new AdjusterToken(null, originalStartOffset, originalEndOffset); int originalStartTokenIndex = Collections.binarySearch(originalTokens, searchToken, AdjusterToken.COMPARE_BY_START_OFFSET); if (originalStartTokenIndex < 0) { originalStartTokenIndex = -originalStartTokenIndex - 1; } int originalEndTokenIndex = Collections.binarySearch(originalTokens, searchToken, AdjusterToken.COMPARE_BY_END_OFFSET); if (originalEndTokenIndex < 0) { // we want insertion point -1 originalEndTokenIndex = -originalEndTokenIndex - 2; } if (originalEndTokenIndex + 1 < originalTokens.size() && originalTokens.get(originalEndTokenIndex + 1).startOffset < originalEndOffset) { originalEndTokenIndex += 1; } return new Region(originalStartTokenIndex, originalEndTokenIndex); } @Override public ElementLocation adjustLocation(ElementLocation location) { if (location instanceof TextRegionLocation) { return adjustLocation((TextRegionLocation) location); } // other locations do not have offsets, if the uniform path should not // be adjusted simply return the original location. if (adjustedUniformPath == null) { return location; } if (location instanceof QualifiedNameLocation) { return new QualifiedNameLocation(((QualifiedNameLocation) location).getQualifiedName(), adjustedUniformPath); } return new ElementLocation(adjustedUniformPath); } /** * Returns a new location with adjusted offsets (if necessary). Returns {@code null} if the location * cannot be mapped to the adjusted text. */ public TextRegionLocation adjustLocation(TextRegionLocation location) { int startOffset = location.getRawStartOffset(); int endOffset = location.getRawEndOffset(); if (startOffset < 0) { int startLine = location.getRawStartLine(); if (!originalLineOffsetConverter.isValidLine(startLine)) { return null; } startOffset = originalLineOffsetConverter.getOffset(startLine); } if (endOffset < 0) { int endLine = location.getRawEndLine() + 1; if (!originalLineOffsetConverter.isValidLine(endLine)) { return null; } endOffset = originalLineOffsetConverter.getOffset(endLine) - 1; } Region adjustedOffsets = getAdjustedRegion(startOffset, endOffset); if (adjustedOffsets == null || adjustedOffsets.isEmpty()) { return null; } String uniformPath = location.getUniformPath(); if (adjustedUniformPath != null) { uniformPath = adjustedUniformPath; } int newStartOffset = adjustedOffsets.getStart(); int newEndOffset = adjustedOffsets.getEnd(); return new TextRegionLocation(uniformPath, newStartOffset, newEndOffset, adjustedLineOffsetConverter.getLine(newStartOffset), adjustedLineOffsetConverter.getLine(newEndOffset)); } /** * Adjusts the location of a single line. This only respects the token part of a line, i.e. leading * and trailing whitespace of a line will be ignored. This method is robust w.r.t lines numbers that * are out of the range of the original text. In case of such an invalid line, the line is logged as * error to the given logger and {@code null} is returned. * * @param line * the one-based line number of be adjusted. * @param invalidLines * used for collecting invalid lines. * * @return the one-based lines encoded as a region, as a line may map to multiple lines after * changing. This may also return null, if no non-empty lines could be found that correspond * to the input line after adjustment. */ @Override public LineBasedRegion adjustLine(int line, Set invalidLines) { if (!originalLineOffsetConverter.isValidLine(line) || !originalLineOffsetConverter.isValidLine(line + 1)) { invalidLines.add(line); return null; } int originalStartOffset = originalLineOffsetConverter.getOffset(line); int originalEndOffset = originalLineOffsetConverter.getOffset(line + 1) - 1; Region adjustedOffsets = getAdjustedRegion(originalStartOffset, originalEndOffset); if (adjustedOffsets == null) { return null; } int adjustedStartLine = adjustedLineOffsetConverter.getLine(adjustedOffsets.getStart()); int adjustedEndLine = adjustedLineOffsetConverter.getLine(adjustedOffsets.getEnd()); return new LineBasedRegion(adjustedStartLine, adjustedEndLine); } /** Returns the line count of the original text */ @Override public int getOriginalLineCount() { return originalLineOffsetConverter.getLineCount(); } /** Simple token representation used in location adjustment. */ static class AdjusterToken { /** Compares by start offset. */ private static final Comparator COMPARE_BY_START_OFFSET = Comparator .comparingInt(token -> token.startOffset); /** Compares by end offset. */ private static final Comparator COMPARE_BY_END_OFFSET = Comparator .comparingInt(token -> token.endOffset); /** The text content. */ private final String text; /** The start offset in the text. */ private final int startOffset; /** The inclusive end offset in the text. */ private final int endOffset; /** Constructor. */ private AdjusterToken(String text, int startOffset) { this(text, startOffset, startOffset + text.length() - 1); } /** Constructor. */ private AdjusterToken(String text, int startOffset, int endOffset) { this.text = text; this.startOffset = startOffset; this.endOffset = endOffset; } /** {@inheritDoc} */ @Override public boolean equals(Object obj) { return (obj instanceof AdjusterToken) && ((AdjusterToken) obj).text.equals(text); } @Override public String toString() { return "AdjusterToken{" + "text='" + text + '\'' + ", startOffset=" + startOffset + ", endOffset=" + endOffset + '}'; } /** {@inheritDoc} */ @Override public int hashCode() { return text.hashCode(); } } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy