org.sonar.plugins.python.indexer.SonarQubePythonIndexer Maven / Gradle / Ivy
The newest version!
/*
* SonarQube Python Plugin
* Copyright (C) 2011-2024 SonarSource SA
* mailto:info AT sonarsource DOT com
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 3 of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program; if not, write to the Free Software Foundation,
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
package org.sonar.plugins.python.indexer;
import java.nio.charset.StandardCharsets;
import java.security.MessageDigest;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.sonar.api.batch.fs.InputFile;
import org.sonar.api.batch.sensor.SensorContext;
import org.sonar.plugins.python.PythonInputFile;
import org.sonar.plugins.python.api.caching.CacheContext;
import org.sonar.plugins.python.caching.Caching;
import org.sonar.python.index.Descriptor;
import org.sonar.python.semantic.DependencyGraph;
import org.sonar.python.semantic.SymbolUtils;
import org.sonar.python.types.TypeShed;
import org.sonarsource.performance.measure.PerformanceMeasure;
import static org.sonar.plugins.python.api.PythonVersionUtils.PYTHON_VERSION_KEY;
public class SonarQubePythonIndexer extends PythonIndexer {
/**
* Describes if an optimized analysis of unchanged by skipping some rules is enabled.
* By default, the property is not set (null), leaving SQ/SC to decide whether to enable this behavior.
* Setting it to true or false, forces the behavior from the analyzer independently of the server.
*/
public static final String SONAR_CAN_SKIP_UNCHANGED_FILES_KEY = "sonar.python.skipUnchanged";
private static final Logger LOG = LoggerFactory.getLogger(SonarQubePythonIndexer.class);
private final Caching caching;
private final Set fullySkippableFiles = new HashSet<>();
private final Set partiallySkippableFiles = new HashSet<>();
private final List inputFiles = new ArrayList<>();
private final Map inputFileToFQN = new HashMap<>();
public SonarQubePythonIndexer(List inputFiles, CacheContext cacheContext, SensorContext context) {
this.projectBaseDirAbsolutePath = context.fileSystem().baseDir().getAbsolutePath();
this.caching = new Caching(cacheContext, getCacheVersion(context));
inputFiles.forEach(f -> {
this.inputFiles.add(f);
inputFileToFQN.put(f, SymbolUtils.fullyQualifiedModuleName(packageName(f), f.wrappedFile().filename()));
});
}
@Override
public void buildOnce(SensorContext context) {
LOG.debug("Input files for indexing: {}", inputFiles);
collectPackageNames(inputFiles);
if (shouldOptimizeAnalysis(context)) {
computeGlobalSymbolsUsingCache(context);
return;
}
PerformanceMeasure.Duration duration = PerformanceMeasure.start("ProjectLevelSymbolTable");
computeGlobalSymbols(inputFiles, context);
duration.stop();
}
private boolean shouldOptimizeAnalysis(SensorContext context) {
return caching.isCacheEnabled()
&& (context.canSkipUnchangedFiles() || context.config().getBoolean(SONAR_CAN_SKIP_UNCHANGED_FILES_KEY).orElse(false))
&& caching.isCacheVersionUpToDate();
}
private void computeGlobalSymbolsUsingCache(SensorContext context) {
loadTypeshedSymbols();
LOG.info("Using cached data to retrieve global symbols.");
Set currentProjectModulesFQNs = new HashSet<>(inputFileToFQN.values());
Set deletedModulesFQNs = deletedModulesFQNs(currentProjectModulesFQNs);
Set allProjectFilesFQNs = Stream.concat(currentProjectModulesFQNs.stream(), deletedModulesFQNs.stream())
.collect(Collectors.toSet());
Map> importsByModule = new HashMap<>();
// Deleted files are considered impactful to their dependents but will not be re-analyzed.
List impactfulFiles = new ArrayList<>();
List impactfulModulesFQNs = new ArrayList<>(deletedModulesFQNs);
for (PythonInputFile inputFile : inputFiles) {
String currFQN = inputFileToFQN.get(inputFile);
boolean isUnimpacted = tryToUseCache(importsByModule, inputFile, currFQN);
if (!isUnimpacted) {
// Failed to retrieve some data: consider the file as impactful.
impactfulFiles.add(inputFile);
impactfulModulesFQNs.add(currFQN);
} else {
partiallySkippableFiles.add(inputFile);
}
}
// Impacted modules are computed from both modified files and deleted ones.
Set impactedModulesFQN = DependencyGraph.from(importsByModule, allProjectFilesFQNs).impactedModules(impactfulModulesFQNs);
inputFiles.stream().filter(f -> !impactedModulesFQN.contains(inputFileToFQN.get(f))).forEach(fullySkippableFiles::add);
LOG.info(
"Cached information of global symbols will be used for {} out of {} main files. Global symbols will be recomputed for the remaining files.",
inputFiles.size() - impactfulFiles.size(),
inputFiles.size());
LOG.info("Fully optimized analysis can be performed for {} out of {} files.", fullySkippableFiles.size(), inputFiles.size());
LOG.info("Partially optimized analysis can be performed for {} out of {} files.", partiallySkippableFiles.size(), inputFiles.size());
// Although we need to analyze all impacted files, we only need to recompute global symbols for modified files (no cross-file dependencies
// in the project symbol table)
computeGlobalSymbols(impactfulFiles, context);
}
/*
* In a full analysis, Typeshed symbols are loaded lazily depending on which module is encountered during parsing.
* SonarSecurity needs all Typeshed symbols used in the project to be properly loaded.
* For that reason, we load all symbols that were used in the previous analysis upfront, even if the file using them will not be parsed.
*/
private void loadTypeshedSymbols() {
TypeShed.builtinSymbols();
Set typeShedModules = caching.readTypeshedModules();
typeShedModules.forEach(TypeShed::symbolsForModule);
}
private boolean tryToUseCache(Map> importsByModule, PythonInputFile inputFile, String currFQN) {
if (!fileIsUnchanged(inputFile)) {
return false;
}
Set imports = caching.readImportMapEntry(inputFile.wrappedFile().key());
if (imports != null) {
importsByModule.put(currFQN, imports);
}
Set descriptors = caching.readProjectLevelSymbolTableEntry(inputFile.wrappedFile().key());
if (descriptors != null && imports != null) {
saveRetrievedDescriptors(inputFile.wrappedFile().key(), descriptors, caching);
return true;
}
return false;
}
private boolean fileIsUnchanged(PythonInputFile inputFile) {
if (!inputFile.wrappedFile().status().equals(InputFile.Status.SAME)) {
return false;
}
byte[] fileHash = caching.readFileContentHash(inputFile.wrappedFile().key());
// InputFile.Status is not reliable in some cases
// We use the hash of the file's content to double-check the content is the same.
var fileInputHash = inputFile.wrappedFile().md5Hash().getBytes(StandardCharsets.UTF_8);
return MessageDigest.isEqual(fileHash, fileInputHash);
}
private void saveRetrievedDescriptors(String fileKey, Set descriptors, Caching caching) {
projectLevelSymbolTable().insertEntry(fileKey, descriptors);
caching.copyFromPrevious(fileKey);
}
public void computeGlobalSymbols(List files, SensorContext context) {
GlobalSymbolsScanner globalSymbolsStep = new GlobalSymbolsScanner(context);
globalSymbolsStep.execute(files, context);
if (caching.isCacheEnabled()) {
saveGlobalSymbolsInCache(files);
saveMainFilesListInCache(new HashSet<>(inputFileToFQN.values()));
Set stubModules = TypeShed.stubModules();
if (!stubModules.isEmpty()) {
caching.writeTypeshedModules(stubModules);
}
caching.writeCacheVersion();
}
}
private void saveGlobalSymbolsInCache(List files) {
for (PythonInputFile inputFile : files) {
String moduleFQN = inputFileToFQN.get(inputFile);
Set descriptors = projectLevelSymbolTable().descriptorsForModule(moduleFQN);
Set imports = projectLevelSymbolTable().importsByModule().get(moduleFQN);
if (descriptors != null && imports != null) {
// Descriptors/imports map may be null if the file failed to parse.
// We don't try to save information in the cache in that case.
writeContentHashToCache(inputFile);
caching.writeProjectLevelSymbolTableEntry(inputFile.wrappedFile().key(), descriptors);
caching.writeImportsMapEntry(inputFile.wrappedFile().key(), imports);
}
}
}
private void writeContentHashToCache(PythonInputFile inputFile) {
var contentHash = inputFile.wrappedFile().md5Hash().getBytes(StandardCharsets.UTF_8);
caching.writeFileContentHash(inputFile.wrappedFile().key(), contentHash);
}
private Set deletedModulesFQNs(Set projectModulesFQNs) {
Set previousAnalysisModulesFQNs = caching.readFilesList();
previousAnalysisModulesFQNs.removeAll(projectModulesFQNs);
return previousAnalysisModulesFQNs;
}
private void saveMainFilesListInCache(Set modulesFQN) {
caching.writeFilesList(new ArrayList<>(modulesFQN));
}
@Override
public boolean canBePartiallyScannedWithoutParsing(PythonInputFile inputFile) {
return partiallySkippableFiles.contains(inputFile) || fullySkippableFiles.contains(inputFile);
}
@Override
public boolean canBeFullyScannedWithoutParsing(PythonInputFile inputFile) {
return fullySkippableFiles.contains(inputFile);
}
@Override
public CacheContext cacheContext() {
return caching.cacheContext();
}
private static String getCacheVersion(SensorContext context) {
String implementationVersion = getImplementationVersion(SonarQubePythonIndexer.class);
var pythonVersions = context.config().getStringArray(PYTHON_VERSION_KEY);
if (pythonVersions.length == 0) {
return implementationVersion;
}
return implementationVersion + ";" + String.join(",", pythonVersions);
}
private static String getImplementationVersion(Class> cls) {
String implementationVersion = cls.getPackage().getImplementationVersion();
if (implementationVersion == null) {
LOG.warn("Implementation version of the Python plugin not found. Cached data may not be invalidated properly, which may lead to inaccurate analysis results.");
return "unknownPluginVersion";
}
return implementationVersion;
}
}