All Downloads are FREE. Search and download functionalities are using the official Maven repository.

sootup.java.bytecode.inputlocation.PathBasedAnalysisInputLocation Maven / Gradle / Ivy

There is a newer version: 1.3.0
Show newest version
package sootup.java.bytecode.inputlocation;

import java.io.*;
import java.nio.file.*;
import java.util.*;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;
import javax.annotation.Nonnull;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import org.apache.commons.io.FilenameUtils;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;
import sootup.core.IdentifierFactory;
import sootup.core.frontend.ClassProvider;
import sootup.core.frontend.SootClassSource;
import sootup.core.inputlocation.AnalysisInputLocation;
import sootup.core.inputlocation.FileType;
import sootup.core.model.SourceType;
import sootup.core.transform.BodyInterceptor;
import sootup.core.types.ClassType;
import sootup.core.util.PathUtils;
import sootup.core.util.StreamUtils;
import sootup.core.views.View;
import sootup.java.bytecode.frontend.AsmJavaClassProvider;
import sootup.java.core.*;
import sootup.java.core.types.JavaClassType;

/*-
 * #%L
 * Soot
 * %%
 * Copyright (C) 2018-2020 Manuel Benz, Christian Brüggemann, Markus Schmidt and others
 * %%
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as
 * published by the Free Software Foundation, either version 2.1 of the
 * License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Lesser Public License for more details.
 *
 * You should have received a copy of the GNU General Lesser Public
 * License along with this program.  If not, see
 * .
 * #L%
 */

/**
 * Base class for {@link PathBasedAnalysisInputLocation}s that can be located by a {@link Path}
 * object.
 *
 * @author Manuel Benz created on 22.05.18
 * @author Kaustubh Kelkar updated on 30.07.2020
 */
public abstract class PathBasedAnalysisInputLocation implements AnalysisInputLocation {
  @Nonnull protected Path path;
  @Nonnull protected Collection ignoredPaths;
  @Nonnull protected final SourceType sourceType;
  @Nonnull protected final List bodyInterceptors;

  protected PathBasedAnalysisInputLocation(@Nonnull Path path, @Nonnull SourceType srcType) {
    this(path, srcType, Collections.emptyList());
  }

  protected PathBasedAnalysisInputLocation(
      @Nonnull Path path,
      @Nonnull SourceType srcType,
      @Nonnull List bodyInterceptors) {
    this(path, srcType, bodyInterceptors, Collections.emptyList());
  }

  protected PathBasedAnalysisInputLocation(
      @Nonnull Path path,
      @Nonnull SourceType srcType,
      @Nonnull List bodyInterceptors,
      @Nonnull Collection ignoredPaths) {
    this.path = path;
    this.ignoredPaths =
        ignoredPaths.stream()
            .map(Path::toAbsolutePath)
            .collect(Collectors.toCollection(HashSet::new));
    this.sourceType = srcType;
    this.bodyInterceptors = bodyInterceptors;

    if (!Files.exists(path)) {
      throw new IllegalArgumentException("The provided path '" + path + "' does not exist.");
    }
  }

  @Override
  @Nonnull
  public SourceType getSourceType() {
    return sourceType;
  }

  @Override
  @Nonnull
  public List getBodyInterceptors() {
    return bodyInterceptors;
  }

  @Nonnull
  public static PathBasedAnalysisInputLocation create(
      @Nonnull Path path, @Nonnull SourceType sourceType) {
    return create(path, sourceType, Collections.emptyList());
  }

  @Nonnull
  public static PathBasedAnalysisInputLocation create(
      @Nonnull Path path,
      @Nonnull SourceType srcType,
      @Nonnull List bodyInterceptors) {
    return create(path, srcType, bodyInterceptors, Collections.emptyList());
  }

  @Nonnull
  public static PathBasedAnalysisInputLocation create(
      @Nonnull Path path,
      @Nonnull SourceType srcType,
      @Nonnull List bodyInterceptors,
      @Nonnull Collection ignoredPaths) {

    if (ignoredPaths.stream()
        .anyMatch(ignoPath -> path.toString().startsWith(ignoPath.toString()))) {
      throw new IllegalArgumentException(
          "The Path for the AnalysisInputLocation is in the ignored paths.");
    }

    if (Files.isDirectory(path)) {
      return new DirectoryBasedAnalysisInputLocation(path, srcType, bodyInterceptors, ignoredPaths);
    } else if (PathUtils.isArchive(path)) {
      if (PathUtils.hasExtension(path, FileType.JAR)) {
        return new ArchiveBasedAnalysisInputLocation(path, srcType, bodyInterceptors, ignoredPaths);
      } else if (PathUtils.hasExtension(path, FileType.WAR)) {
        try {
          return new WarArchiveAnalysisInputLocation(path, srcType, bodyInterceptors, ignoredPaths);
        } catch (IOException e) {
          throw new RuntimeException(e);
        }
      }
    }
    throw new IllegalArgumentException(
        "Path '"
            + path.toAbsolutePath()
            + "' has to be pointing to the root of a class container, e.g. directory, jar, zip, apk, war etc.");
  }

  @Nonnull
  Collection walkDirectory(
      @Nonnull Path dirPath,
      @Nonnull IdentifierFactory factory,
      @Nonnull ClassProvider classProvider) {

    final FileType handledFileType = classProvider.getHandledFileType();
    final String moduleInfoFilename = JavaModuleIdentifierFactory.MODULE_INFO_FILE + ".class";
    try (final Stream walk = Files.walk(dirPath)) {
      return walk.filter(
              filePath ->
                  PathUtils.hasExtension(filePath, handledFileType)
                      && !filePath.toString().endsWith(moduleInfoFilename)
                      && ignoredPaths.stream()
                          .noneMatch(p -> filePath.toString().startsWith(p.toString())))
          .flatMap(
              p -> {
                final String fullyQualifiedName = fromPath(dirPath, p);

                return StreamUtils.optionalToStream(
                    classProvider.createClassSource(
                        this, p, factory.getClassType(fullyQualifiedName)));
              })
          .map(src -> (JavaSootClassSource) src)
          .collect(Collectors.toList());

    } catch (IOException e) {
      throw new IllegalArgumentException(e);
    }
  }

  @Nonnull
  protected String fromPath(@Nonnull Path baseDirPath, Path packageNamePathAndClass) {
    return FilenameUtils.removeExtension(
        packageNamePathAndClass
            .subpath(baseDirPath.getNameCount(), packageNamePathAndClass.getNameCount())
            .toString()
            .replace(packageNamePathAndClass.getFileSystem().getSeparator(), "."));
  }

  @Nonnull
  protected Optional getClassSourceInternal(
      @Nonnull JavaClassType signature, @Nonnull Path path, @Nonnull ClassProvider classProvider) {

    Path pathToClass =
        path.resolve(
            path.getFileSystem()
                .getPath(
                    signature.getFullyQualifiedName().replace('.', '/')
                        + classProvider.getHandledFileType().getExtensionWithDot()));

    Optional classSource =
        classProvider.createClassSource(this, pathToClass, signature);

    return classSource.map(src -> (JavaSootClassSource) src);
  }

  protected Optional getSingleClass(
      @Nonnull JavaClassType signature, @Nonnull Path path, @Nonnull ClassProvider classProvider) {

    Path pathToClass = Paths.get(path.toString());

    Optional classSource =
        classProvider.createClassSource(this, pathToClass, signature);

    return classSource.map(src -> (JavaSootClassSource) src);
  }

  public static class ClassFileBasedAnalysisInputLocation extends PathBasedAnalysisInputLocation {

    @Nonnull private final String omittedPackageName;

    public ClassFileBasedAnalysisInputLocation(
        @Nonnull Path classFilePath,
        @Nonnull String omittedPackageName,
        @Nonnull SourceType srcType) {
      this(classFilePath, omittedPackageName, srcType, Collections.emptyList());
    }

    public ClassFileBasedAnalysisInputLocation(
        @Nonnull Path classFilePath,
        @Nonnull String omittedPackageName,
        @Nonnull SourceType srcType,
        @Nonnull List bodyInterceptors) {
      super(classFilePath, srcType, bodyInterceptors);
      this.omittedPackageName = omittedPackageName;

      if (!Files.isRegularFile(classFilePath) || Files.isDirectory(classFilePath)) {
        throw new IllegalArgumentException(
            "Needs to point to a regular file - not to a directory.");
      }
    }

    @Override
    @Nonnull
    public Optional getClassSource(
        @Nonnull ClassType type, @Nonnull View view) {
      return getSingleClass((JavaClassType) type, path, new AsmJavaClassProvider(view));
    }

    @Nonnull
    @Override
    public Collection getClassSources(@Nonnull View view) {
      AsmJavaClassProvider classProvider = new AsmJavaClassProvider(view);
      IdentifierFactory factory = view.getIdentifierFactory();
      Path dirPath = this.path.getParent();

      final String fullyQualifiedName = fromPath(dirPath, path);

      Optional classSource =
          classProvider
              .createClassSource(this, path, factory.getClassType(fullyQualifiedName))
              .map(src -> (JavaSootClassSource) src);
      return Collections.singletonList(classSource.get());
    }

    @Nonnull
    protected String fromPath(@Nonnull Path baseDirPath, Path packageNamePathAndClass) {
      String str =
          FilenameUtils.removeExtension(
              packageNamePathAndClass
                  .subpath(baseDirPath.getNameCount(), packageNamePathAndClass.getNameCount())
                  .toString()
                  .replace(packageNamePathAndClass.getFileSystem().getSeparator(), "."));

      return omittedPackageName.isEmpty() ? str : omittedPackageName + "." + str;
    }
  }

  private static class DirectoryBasedAnalysisInputLocation extends PathBasedAnalysisInputLocation {

    private DirectoryBasedAnalysisInputLocation(@Nonnull Path path, @Nonnull SourceType srcType) {
      this(path, srcType, Collections.emptyList());
    }

    private DirectoryBasedAnalysisInputLocation(
        @Nonnull Path path,
        @Nonnull SourceType srcType,
        @Nonnull List bodyInterceptors) {
      this(path, srcType, bodyInterceptors, Collections.emptyList());
    }

    public DirectoryBasedAnalysisInputLocation(
        @Nonnull Path path,
        @Nonnull SourceType srcType,
        @Nonnull List bodyInterceptors,
        @Nonnull Collection ignoredPaths) {
      super(path, srcType, bodyInterceptors, ignoredPaths);
    }

    @Override
    @Nonnull
    public Collection getClassSources(@Nonnull View view) {
      // FIXME: 1) store the classprovider reference as a field; 2) and above too; and 3) move view
      // which is only used in SootNode to be just there?
      return walkDirectory(path, view.getIdentifierFactory(), new AsmJavaClassProvider(view));
    }

    @Override
    @Nonnull
    public Optional getClassSource(
        @Nonnull ClassType type, @Nonnull View view) {
      return getClassSourceInternal((JavaClassType) type, path, new AsmJavaClassProvider(view));
    }
  }

  private static final class WarArchiveAnalysisInputLocation
      extends DirectoryBasedAnalysisInputLocation {
    public List containedInputLocations = new ArrayList<>();
    public static int maxAllowedBytesToExtract =
        1024 * 1024 * 500; // limit of extracted file size to protect against archive bombs

    private WarArchiveAnalysisInputLocation(@Nonnull Path warPath, @Nonnull SourceType srcType)
        throws IOException {
      this(warPath, srcType, Collections.emptyList(), Collections.emptyList());
    }

    private WarArchiveAnalysisInputLocation(
        @Nonnull Path warPath,
        @Nonnull SourceType srcType,
        @Nonnull List bodyInterceptors,
        @Nonnull Collection ignoredPaths)
        throws IOException {
      super(
          Files.createTempDirectory("sootUp-war-" + warPath.hashCode()).toAbsolutePath(),
          srcType,
          bodyInterceptors,
          ignoredPaths);

      extractWarFile(warPath, path);

      Path webInfPath = path.resolve("WEB-INF");
      // directorystructre as specified in SRV.9.5 of
      // https://download.oracle.com/otn-pub/jcp/servlet-2.4-fr-spec-oth-JSpec/servlet-2_4-fr-spec.pdf?AuthParam=1625059899_16c705c72f7db7f85a8a7926558701fe
      Path classDir = webInfPath.resolve("classes");
      if (Files.exists(classDir)) {
        containedInputLocations.add(
            new DirectoryBasedAnalysisInputLocation(classDir, srcType, bodyInterceptors));
      }

      Path libDir = webInfPath.resolve("lib");
      if (Files.exists(libDir)) {
        try {
          Files.walk(libDir)
              .filter(f -> PathUtils.hasExtension(f, FileType.JAR))
              .forEach(
                  f ->
                      containedInputLocations.add(
                          new ArchiveBasedAnalysisInputLocation(f, srcType, bodyInterceptors)));
        } catch (IOException e) {
          throw new RuntimeException(e);
        }
      }
    }

    public WarArchiveAnalysisInputLocation(
        Path path, SourceType srcType, List bodyInterceptors) throws IOException {
      this(path, srcType, bodyInterceptors, Collections.emptyList());
    }

    @Override
    @Nonnull
    public Collection getClassSources(@Nonnull View view) {

      Set foundClasses = new HashSet<>();

      for (AnalysisInputLocation inputLoc : containedInputLocations) {
        foundClasses.addAll(inputLoc.getClassSources(view));
      }
      return foundClasses.stream()
          .map(src -> (JavaSootClassSource) src)
          .collect(Collectors.toList());
    }

    @Override
    @Nonnull
    public Optional getClassSource(
        @Nonnull ClassType type, @Nonnull View view) {

      for (AnalysisInputLocation inputLocation : containedInputLocations) {
        final Optional classSource =
            inputLocation.getClassSource(type, view);
        if (classSource.isPresent()) {
          return classSource.map(src -> (JavaSootClassSource) src);
        }
      }

      return Optional.empty();
    }

    /**
     * Extracts the war file at the temporary location to analyze underlying class and jar files
     *
     * 

[ms] hint: extracting is necessary to access nested (zip)filesystems with java8/java9 * runtime - nested (zip)filesystems would work with java11 runtime (maybe java10) * * @param warFilePath The path to war file to be extracted */ void extractWarFile(Path warFilePath, final Path destDirectory) { int extractedSize = 0; try { File dest = destDirectory.toFile(); if (!dest.exists()) { if (!dest.mkdir()) { throw new RuntimeException( "Could not create the directory to extract Warfile: " + destDirectory); } dest.deleteOnExit(); } ZipInputStream zis = new ZipInputStream(Files.newInputStream(warFilePath)); ZipEntry zipEntry; while ((zipEntry = zis.getNextEntry()) != null) { Path filepath = destDirectory.resolve(zipEntry.getName()); final File file = filepath.toFile(); file.deleteOnExit(); if (zipEntry.isDirectory()) { file.mkdir(); } else { byte[] incomingValues = new byte[4096]; int readBytesZip; if (file.exists()) { // compare contents -> does it contain the extracted war already? int readBytesExistingFile; final BufferedInputStream bis = new BufferedInputStream(Files.newInputStream(file.toPath())); byte[] bisBuf = new byte[4096]; while ((readBytesZip = zis.read(incomingValues)) != -1) { if (extractedSize > maxAllowedBytesToExtract) { throw new RuntimeException( "The extracted warfile exceeds the size of " + maxAllowedBytesToExtract + " byte. Either the file is a big archive (-> increase PathBasedAnalysisInputLocation.WarArchiveInputLocation.maxAllowedBytesToExtract) or maybe it contains an archive bomb."); } readBytesExistingFile = bis.read(bisBuf, 0, readBytesZip); if (readBytesExistingFile != readBytesZip) { throw new RuntimeException( "Can't extract File \"" + file + "\" as it already exists and has a different size."); } else if (!Arrays.equals(bisBuf, incomingValues)) { throw new RuntimeException( "Can't extract File \"" + file + "\" as it already exists and has a different content which we can't override."); } extractedSize += readBytesZip; } } else { BufferedOutputStream bos = new BufferedOutputStream(Files.newOutputStream(file.toPath())); while ((readBytesZip = zis.read(incomingValues)) != -1) { if (extractedSize > maxAllowedBytesToExtract) { throw new RuntimeException( "The extracted warfile exceeds the size of " + maxAllowedBytesToExtract + " byte. Either the file is a big archive or maybe it contains an archive bomb."); } bos.write(incomingValues, 0, readBytesZip); extractedSize += readBytesZip; } bos.close(); } } zis.closeEntry(); } } catch (IOException e) { throw new RuntimeException(e); } } /** * Parses the web.xml file to search for the servlet-class classes in the extracted directory * after the war file is extracted * *

[ms] helps to set entrypoints for analyses automatically (later) * * @param extractedWARPath The path where the war file is extracted Adds the classes associated * to servlet-class in a {@link ArrayList} of {@link String} */ @Nonnull public List retrieveServletClasses(String extractedWARPath) { List classesInXML = new ArrayList<>(); try { DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); DocumentBuilder builder = factory.newDocumentBuilder(); Document document = builder.parse(new File(extractedWARPath + "/WEB-INF/web.xml")); document.getDocumentElement().normalize(); NodeList nList = document.getElementsByTagName("servlet"); for (int temp = 0; temp < nList.getLength(); temp++) { Node node = nList.item(temp); if (node.getNodeType() == Node.ELEMENT_NODE) { Element eElement = (Element) node; classesInXML.add( eElement.getElementsByTagName("servlet-class").item(0).getTextContent()); } } } catch (ParserConfigurationException | SAXException | IOException e) { throw new RuntimeException(e); } return classesInXML; } } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy