All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.sonar.plugins.python.indexer.SonarQubePythonIndexer Maven / Gradle / Ivy

The newest version!
/*
 * SonarQube Python Plugin
 * Copyright (C) 2011-2024 SonarSource SA
 * mailto:info AT sonarsource DOT com
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 3 of the License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 */
package org.sonar.plugins.python.indexer;

import java.nio.charset.StandardCharsets;
import java.security.MessageDigest;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.sonar.api.batch.fs.InputFile;
import org.sonar.api.batch.sensor.SensorContext;
import org.sonar.plugins.python.PythonInputFile;
import org.sonar.plugins.python.api.caching.CacheContext;
import org.sonar.plugins.python.caching.Caching;
import org.sonar.python.index.Descriptor;
import org.sonar.python.semantic.DependencyGraph;
import org.sonar.python.semantic.SymbolUtils;
import org.sonar.python.types.TypeShed;
import org.sonarsource.performance.measure.PerformanceMeasure;

import static org.sonar.plugins.python.api.PythonVersionUtils.PYTHON_VERSION_KEY;

public class SonarQubePythonIndexer extends PythonIndexer {

  /**
   * Describes if an optimized analysis of unchanged by skipping some rules is enabled.
   * By default, the property is not set (null), leaving SQ/SC to decide whether to enable this behavior.
   * Setting it to true or false, forces the behavior from the analyzer independently of the server.
   */
  public static final String SONAR_CAN_SKIP_UNCHANGED_FILES_KEY = "sonar.python.skipUnchanged";
  private static final Logger LOG = LoggerFactory.getLogger(SonarQubePythonIndexer.class);

  private final Caching caching;
  private final Set fullySkippableFiles = new HashSet<>();
  private final Set partiallySkippableFiles = new HashSet<>();
  private final List inputFiles = new ArrayList<>();
  private final Map inputFileToFQN = new HashMap<>();

  public SonarQubePythonIndexer(List inputFiles, CacheContext cacheContext, SensorContext context) {
    this.projectBaseDirAbsolutePath = context.fileSystem().baseDir().getAbsolutePath();
    this.caching = new Caching(cacheContext, getCacheVersion(context));
    inputFiles.forEach(f -> {
      this.inputFiles.add(f);
      inputFileToFQN.put(f, SymbolUtils.fullyQualifiedModuleName(packageName(f), f.wrappedFile().filename()));
    });
  }

  @Override
  public void buildOnce(SensorContext context) {
    LOG.debug("Input files for indexing: {}", inputFiles);
    collectPackageNames(inputFiles);
    if (shouldOptimizeAnalysis(context)) {
      computeGlobalSymbolsUsingCache(context);
      return;
    }
    PerformanceMeasure.Duration duration = PerformanceMeasure.start("ProjectLevelSymbolTable");
    computeGlobalSymbols(inputFiles, context);
    duration.stop();
  }

  private boolean shouldOptimizeAnalysis(SensorContext context) {
    return caching.isCacheEnabled()
      && (context.canSkipUnchangedFiles() || context.config().getBoolean(SONAR_CAN_SKIP_UNCHANGED_FILES_KEY).orElse(false))
      && caching.isCacheVersionUpToDate();
  }

  private void computeGlobalSymbolsUsingCache(SensorContext context) {
    loadTypeshedSymbols();
    LOG.info("Using cached data to retrieve global symbols.");
    Set currentProjectModulesFQNs = new HashSet<>(inputFileToFQN.values());
    Set deletedModulesFQNs = deletedModulesFQNs(currentProjectModulesFQNs);
    Set allProjectFilesFQNs = Stream.concat(currentProjectModulesFQNs.stream(), deletedModulesFQNs.stream())
      .collect(Collectors.toSet());
    Map> importsByModule = new HashMap<>();
    // Deleted files are considered impactful to their dependents but will not be re-analyzed.
    List impactfulFiles = new ArrayList<>();
    List impactfulModulesFQNs = new ArrayList<>(deletedModulesFQNs);
    for (PythonInputFile inputFile : inputFiles) {
      String currFQN = inputFileToFQN.get(inputFile);
      boolean isUnimpacted = tryToUseCache(importsByModule, inputFile, currFQN);
      if (!isUnimpacted) {
        // Failed to retrieve some data: consider the file as impactful.
        impactfulFiles.add(inputFile);
        impactfulModulesFQNs.add(currFQN);
      } else {
        partiallySkippableFiles.add(inputFile);
      }
    }
    // Impacted modules are computed from both modified files and deleted ones.
    Set impactedModulesFQN = DependencyGraph.from(importsByModule, allProjectFilesFQNs).impactedModules(impactfulModulesFQNs);
    inputFiles.stream().filter(f -> !impactedModulesFQN.contains(inputFileToFQN.get(f))).forEach(fullySkippableFiles::add);
    LOG.info(
      "Cached information of global symbols will be used for {} out of {} main files. Global symbols will be recomputed for the remaining files.",
      inputFiles.size() - impactfulFiles.size(),
      inputFiles.size());
    LOG.info("Fully optimized analysis can be performed for {} out of {} files.", fullySkippableFiles.size(), inputFiles.size());
    LOG.info("Partially optimized analysis can be performed for {} out of {} files.", partiallySkippableFiles.size(), inputFiles.size());
    // Although we need to analyze all impacted files, we only need to recompute global symbols for modified files (no cross-file dependencies
    // in the project symbol table)
    computeGlobalSymbols(impactfulFiles, context);
  }

  /*
   * In a full analysis, Typeshed symbols are loaded lazily depending on which module is encountered during parsing.
   * SonarSecurity needs all Typeshed symbols used in the project to be properly loaded.
   * For that reason, we load all symbols that were used in the previous analysis upfront, even if the file using them will not be parsed.
   */
  private void loadTypeshedSymbols() {
    TypeShed.builtinSymbols();
    Set typeShedModules = caching.readTypeshedModules();
    typeShedModules.forEach(TypeShed::symbolsForModule);
  }

  private boolean tryToUseCache(Map> importsByModule, PythonInputFile inputFile, String currFQN) {
    if (!fileIsUnchanged(inputFile)) {
      return false;
    }

    Set imports = caching.readImportMapEntry(inputFile.wrappedFile().key());
    if (imports != null) {
      importsByModule.put(currFQN, imports);
    }
    Set descriptors = caching.readProjectLevelSymbolTableEntry(inputFile.wrappedFile().key());
    if (descriptors != null && imports != null) {
      saveRetrievedDescriptors(inputFile.wrappedFile().key(), descriptors, caching);
      return true;
    }

    return false;
  }

  private boolean fileIsUnchanged(PythonInputFile inputFile) {
    if (!inputFile.wrappedFile().status().equals(InputFile.Status.SAME)) {
      return false;
    }
    byte[] fileHash = caching.readFileContentHash(inputFile.wrappedFile().key());
    // InputFile.Status is not reliable in some cases
    // We use the hash of the file's content to double-check the content is the same.
    var fileInputHash = inputFile.wrappedFile().md5Hash().getBytes(StandardCharsets.UTF_8);
    return MessageDigest.isEqual(fileHash, fileInputHash);
  }

  private void saveRetrievedDescriptors(String fileKey, Set descriptors, Caching caching) {
    projectLevelSymbolTable().insertEntry(fileKey, descriptors);
    caching.copyFromPrevious(fileKey);
  }

  public void computeGlobalSymbols(List files, SensorContext context) {
    GlobalSymbolsScanner globalSymbolsStep = new GlobalSymbolsScanner(context);
    globalSymbolsStep.execute(files, context);
    if (caching.isCacheEnabled()) {
      saveGlobalSymbolsInCache(files);
      saveMainFilesListInCache(new HashSet<>(inputFileToFQN.values()));
      Set stubModules = TypeShed.stubModules();
      if (!stubModules.isEmpty()) {
        caching.writeTypeshedModules(stubModules);
      }
      caching.writeCacheVersion();
    }
  }

  private void saveGlobalSymbolsInCache(List files) {
    for (PythonInputFile inputFile : files) {
      String moduleFQN = inputFileToFQN.get(inputFile);
      Set descriptors = projectLevelSymbolTable().descriptorsForModule(moduleFQN);
      Set imports = projectLevelSymbolTable().importsByModule().get(moduleFQN);
      if (descriptors != null && imports != null) {
        // Descriptors/imports map may be null if the file failed to parse.
        // We don't try to save information in the cache in that case.
        writeContentHashToCache(inputFile);

        caching.writeProjectLevelSymbolTableEntry(inputFile.wrappedFile().key(), descriptors);
        caching.writeImportsMapEntry(inputFile.wrappedFile().key(), imports);
      }
    }
  }

  private void writeContentHashToCache(PythonInputFile inputFile) {
    var contentHash = inputFile.wrappedFile().md5Hash().getBytes(StandardCharsets.UTF_8);
    caching.writeFileContentHash(inputFile.wrappedFile().key(), contentHash);
  }

  private Set deletedModulesFQNs(Set projectModulesFQNs) {
    Set previousAnalysisModulesFQNs = caching.readFilesList();
    previousAnalysisModulesFQNs.removeAll(projectModulesFQNs);
    return previousAnalysisModulesFQNs;
  }

  private void saveMainFilesListInCache(Set modulesFQN) {
    caching.writeFilesList(new ArrayList<>(modulesFQN));
  }

  @Override
  public boolean canBePartiallyScannedWithoutParsing(PythonInputFile inputFile) {
    return partiallySkippableFiles.contains(inputFile) || fullySkippableFiles.contains(inputFile);
  }

  @Override
  public boolean canBeFullyScannedWithoutParsing(PythonInputFile inputFile) {
    return fullySkippableFiles.contains(inputFile);
  }

  @Override
  public CacheContext cacheContext() {
    return caching.cacheContext();
  }

  private static String getCacheVersion(SensorContext context) {
    String implementationVersion = getImplementationVersion(SonarQubePythonIndexer.class);
    var pythonVersions = context.config().getStringArray(PYTHON_VERSION_KEY);
    if (pythonVersions.length == 0) {
      return implementationVersion;
    }
    return implementationVersion + ";" + String.join(",", pythonVersions);
  }

  private static String getImplementationVersion(Class cls) {
    String implementationVersion = cls.getPackage().getImplementationVersion();
    if (implementationVersion == null) {
      LOG.warn("Implementation version of the Python plugin not found. Cached data may not be invalidated properly, which may lead to inaccurate analysis results.");
      return "unknownPluginVersion";
    }
    return implementationVersion;
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy