All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.whitesource.agent.archive.ArchiveExtractor Maven / Gradle / Ivy

Go to download

File System Agent is a simple java command line tool which extracts descriptive information from your open source libraries

There is a newer version: 18.9.1.1
Show newest version
/**
 * Copyright (C) 2014 WhiteSource Ltd.
 * 

* Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at *

* http://www.apache.org/licenses/LICENSE-2.0 *

* Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.whitesource.agent.archive; import com.github.junrar.testutil.ExtractArchive; import net.lingala.zip4j.core.ZipFile; import net.lingala.zip4j.exception.ZipException; import net.lingala.zip4j.model.FileHeader; import org.apache.commons.compress.archivers.cpio.CpioArchiveEntry; import org.apache.commons.compress.archivers.cpio.CpioArchiveInputStream; import org.apache.commons.compress.compressors.lzma.LZMACompressorInputStream; import org.apache.commons.io.FileUtils; import org.apache.commons.io.FilenameUtils; import org.apache.commons.io.IOUtils; import org.apache.commons.lang.StringUtils; import org.codehaus.plexus.archiver.tar.TarBZip2UnArchiver; import org.codehaus.plexus.archiver.tar.TarGZipUnArchiver; import org.codehaus.plexus.archiver.tar.TarUnArchiver; import org.codehaus.plexus.archiver.xz.XZUnArchiver; import org.codehaus.plexus.logging.console.ConsoleLogger; import org.redline_rpm.ReadableChannelWrapper; import org.redline_rpm.Util; import org.redline_rpm.header.AbstractHeader; import org.redline_rpm.header.Format; import org.redline_rpm.header.Header; import org.slf4j.Logger; import org.whitesource.agent.utils.LoggerFactory; import org.whitesource.agent.Constants; import org.whitesource.agent.utils.FilesScanner; import org.whitesource.agent.utils.Pair; import org.whitesource.agent.TempFolders; import java.io.*; import java.nio.channels.Channels; import java.nio.channels.ReadableByteChannel; import java.nio.file.FileSystems; import java.nio.file.PathMatcher; import java.nio.file.Paths; import java.text.SimpleDateFormat; import java.util.*; import java.util.concurrent.*; import java.util.function.Predicate; import java.util.stream.Collectors; /** * @author anna.rozin */ public class ArchiveExtractor { public static final String LAYER_TAR = "**/*layer.tar"; /* --- Static members --- */ private final Logger logger = LoggerFactory.getLogger(ArchiveExtractor.class); public static final int LONG_BOUND = 100000; public static final String DEPTH = "_depth_"; public static final String DEPTH_REGEX = DEPTH + "[0-9]"; public static final String GLOB_PREFIX = "glob:"; public static final String NULL_HEADER = "mainheader is null"; private final String JAVA_TEMP_DIR = System.getProperty("java.io.tmpdir"); public static final List ZIP_EXTENSIONS = Arrays.asList("jar", "war", "aar", "ear", "egg", "zip", "whl", "sca", "sda", "nupkg"); public static final List GEM_EXTENSIONS = Collections.singletonList("gem"); public static final List TAR_EXTENSIONS = Arrays.asList("tar.gz", "tar", "tgz", "tar.bz2", "tar.xz", "xz"); public static final List RPM_EXTENSIONS = Collections.singletonList("rpm"); public static final List RAR_EXTENSIONS = Collections.singletonList("rar"); public static final String ZIP_EXTENSION_PATTERN; public static final String GEM_EXTENSION_PATTERN; public static final String TAR_EXTENSION_PATTERN; public static final String RPM_EXTENSION_PATTERN; public static final String RAR_EXTENSION_PATTERN; public static final String RUBY_DATA_FILE = "data.tar.gz"; public static final String TAR_SUFFIX = ".tar"; public static final String GZ_SUFFIX = ".gz"; public static final String BZ_SUFFIX = ".bz2"; public static final String XZ_SUFFIX = ".xz"; public static final String LZMA = "lzma"; public static final String CPIO = ".cpio"; public static final String TGZ_SUFFIX = ".tgz"; public static final String TAR_GZ_SUFFIX = TAR_SUFFIX + GZ_SUFFIX; public static final String TAR_BZ2_SUFFIX = TAR_SUFFIX + BZ_SUFFIX; public static final String UN_ARCHIVER_LOGGER = "unArchiverLogger"; public static final String GLOB_PATTERN_PREFIX = Constants.PATTERN + Constants.DOT; public static final String PATTERN_PREFIX = ".*\\."; public static final String XZ_UN_ARCHIVER_FILE_NAME = "compressedFile.tar"; static { ZIP_EXTENSION_PATTERN = initializePattern(ZIP_EXTENSIONS); GEM_EXTENSION_PATTERN = initializePattern(GEM_EXTENSIONS); TAR_EXTENSION_PATTERN = initializePattern(TAR_EXTENSIONS); RPM_EXTENSION_PATTERN = initializePattern(RPM_EXTENSIONS); RAR_EXTENSION_PATTERN = initializePattern(RAR_EXTENSIONS); } private static String initializePattern(List archiveExtensions) { StringBuilder sb = new StringBuilder(); for (String archiveExtension : archiveExtensions) { sb.append(PATTERN_PREFIX); sb.append(archiveExtension); sb.append(Constants.PIPE); } return sb.toString().substring(0, sb.toString().lastIndexOf(Constants.PIPE)); } /* --- Private members --- */ private final String[] archiveIncludesPattern; private final String[] archiveExcludesPattern; private final String[] filesExcludes; private String randomString; private String tempFolderNoDepth; private boolean fastUnpack = false; /* --- Constructors --- */ public ArchiveExtractor(String[] archiveIncludes, String[] archiveExcludes, String[] filesExcludes, boolean fastUnpack) { this(archiveIncludes, archiveExcludes, filesExcludes); this.fastUnpack = fastUnpack; } public ArchiveExtractor(String[] archiveIncludes, String[] archiveExcludes, String[] filesExcludes) { if (archiveIncludes.length > 0 && StringUtils.isNotBlank(archiveIncludes[0])) { this.archiveIncludesPattern = archiveIncludes; } else { // create ARCHIVE_EXTENSIONS only if archiveIncludes is empty this.archiveIncludesPattern = createArchivesArray(); } this.archiveExcludesPattern = archiveExcludes; this.filesExcludes = filesExcludes; } private String getTempFolder(String scannerBaseDir) { String creationDate = new SimpleDateFormat("yyyyMMddHHmmss").format(new Date()); String tempFolder = JAVA_TEMP_DIR.endsWith(File.separator) ? JAVA_TEMP_DIR + TempFolders.UNIQUE_WHITESOURCE_ARCHIVE_EXTRACTOR_TEMP_FOLDER + File.separator + creationDate : JAVA_TEMP_DIR + File.separator + TempFolders.UNIQUE_WHITESOURCE_ARCHIVE_EXTRACTOR_TEMP_FOLDER + File.separator + creationDate; String destDirectory = tempFolder + Constants.UNDERSCORE + this.randomString; int separatorIndex = scannerBaseDir.lastIndexOf(File.separator); if (separatorIndex != -1) { destDirectory = destDirectory + scannerBaseDir.substring(separatorIndex, scannerBaseDir.length()); try { // this solves the tilda issue in filepath in windows (mangled Windows filenames) destDirectory = new File(destDirectory).getCanonicalPath().toString(); } catch (IOException e) { logger.warn("Error getting the absolute file name ", e); } } return destDirectory; } /* --- Public methods --- */ /** * The Method extracts all the Archive files according to the archiveExtractionDepth. * archiveExtractionDepth defined by the user in the configuration file. *

* The archiveExtractionDepth default value is 0 - no archive scanning, the max value is 3. * By default the method scans jar/war/ear. * If archiveIncludes/archiveExcludes params are defined the method will act accordingly. * * @param scannerBaseDir - directory for scanning. * @param archiveExtractionDepth - drill down hierarchy level in archive files * @param archiveDirectories list of directories * @return the temp directory for the extracted files. */ public String extractArchives(String scannerBaseDir, int archiveExtractionDepth, List archiveDirectories) { this.randomString = String.valueOf(ThreadLocalRandom.current().nextLong(0, LONG_BOUND)); this.tempFolderNoDepth = getTempFolder(scannerBaseDir); logger.debug("Base directory is {}, extraction depth is set to {}", scannerBaseDir, archiveExtractionDepth); Map> allFiles = new HashMap<>(); // Extract again if needed according archiveExtractionDepth parameter for (int curLevel = 0; curLevel < archiveExtractionDepth; curLevel++) { String folderToScan; String folderToExtract; if (curLevel == 0) { folderToScan = scannerBaseDir; } else { folderToScan = getDepthFolder(curLevel - 1); } folderToExtract = getDepthFolder(curLevel); Pair retrieveFilesWithFolder = getSearchedFileNames(folderToScan); if (retrieveFilesWithFolder == null || retrieveFilesWithFolder.getKey().length <= 0) { break; } else { String[] fileNames = retrieveFilesWithFolder.getKey(); folderToScan = retrieveFilesWithFolder.getValue(); Pair> filesFound = new Pair<>(folderToScan, Arrays.stream(fileNames).collect(Collectors.toList())); Map foundFiles; if (fastUnpack) { foundFiles = handleArchiveFilesFast(folderToExtract, filesFound); } else { foundFiles = handleArchiveFiles(folderToExtract, filesFound); } allFiles.put(String.valueOf(curLevel), foundFiles); } } if (!allFiles.isEmpty()) { String parentDirectory = new File(this.tempFolderNoDepth).getParent(); archiveDirectories.add(parentDirectory); return parentDirectory; } else { // if unable to extract, return null return null; } } // extract image layers public void extractDockerImageLayers(File imageTarFile, File imageExtractionDir, Boolean deleteTarFiles) { FilesScanner filesScanner = new FilesScanner(); boolean success = false; // docker layers are saved as TAR file (we save it as TAR) if (imageTarFile.getName().endsWith(TAR_SUFFIX)) { success = unTar(imageTarFile.getName().toLowerCase(), imageExtractionDir.getAbsolutePath(), imageTarFile.getPath()); boolean deleted = false; if (deleteTarFiles) { deleted = imageTarFile.delete(); } if (!deleted) { logger.warn("Was not able to delete {} (docker image TAR file)", imageTarFile.getName()); } } if (success) { String[] fileNames = filesScanner.getDirectoryContent(imageExtractionDir.getAbsolutePath(), new String[]{LAYER_TAR}, new String[]{}, true, false); for (String filename : fileNames) { File layerToExtract = new File(imageExtractionDir + File.separator + filename); extractDockerImageLayers(layerToExtract, layerToExtract.getParentFile(), deleteTarFiles); } } else { logger.warn("Was not able to extract {} (docker image TAR file)", imageTarFile.getName()); } } private String getDepthFolder(int depth) { return this.tempFolderNoDepth + DEPTH + depth; } /* --- Private methods --- */ private String[] createArchivesArray() { Collection archiveExtensions = new ArrayList<>(); archiveExtensions.addAll(ZIP_EXTENSIONS); archiveExtensions.addAll(GEM_EXTENSIONS); archiveExtensions.addAll(TAR_EXTENSIONS); String[] archiveIncludesPattern = new String[archiveExtensions.size()]; int i = 0; for (String extension : archiveExtensions) { archiveIncludesPattern[i++] = GLOB_PATTERN_PREFIX + extension; } return archiveIncludesPattern; } private Pair getSearchedFileNames(String fileOrFolderToScan) { String[] foundFiles = null; File file = new File(fileOrFolderToScan); String folderToScan; if (file.exists()) { FilesScanner filesScanner = new FilesScanner(); if (file.isDirectory()) { // scan directory foundFiles = filesScanner.getDirectoryContent(fileOrFolderToScan, archiveIncludesPattern, archiveExcludesPattern, false, false); folderToScan = fileOrFolderToScan; return new Pair<>(foundFiles, folderToScan); } else { //// handle file passed in -d parameter //// check if file matches archive GLOB patterns boolean included = filesScanner.isIncluded(file, archiveIncludesPattern, archiveExcludesPattern, false, false); if (included) { folderToScan = file.getParent(); String relativeFilePath = new File(folderToScan).toURI().relativize(new File(file.getAbsolutePath()).toURI()).getPath(); foundFiles = new String[]{relativeFilePath}; return new Pair<>(foundFiles, folderToScan); } } filesScanner = null; } return null; } private Map handleArchiveFiles(String baseFolderToExtract, Pair> fileNames) { Map founded = new HashMap<>(); for (String fileName : fileNames.getValue()) { String archivePath = Paths.get(fileNames.getKey(), fileName).toString(); String unpackFolder = Paths.get(baseFolderToExtract, FilenameUtils.removeExtension(fileName)).toString(); Pair dataToUnpack = new Pair<>(archivePath, unpackFolder); Pair foundArchive = getUnpackedResult(dataToUnpack); if (foundArchive != null) { founded.put(foundArchive.getKey(), foundArchive.getValue()); } } return founded; } private Map handleArchiveFilesFast(String baseFolderToExtract, Pair> fileNames) { Collection dataToUnpack = fileNames.getValue().stream().map(fileName -> { String archivePath = Paths.get(fileNames.getKey(), fileName).toString(); String unpackFolder = Paths.get(baseFolderToExtract, FilenameUtils.removeExtension(fileName)).toString(); return new Pair(archivePath, unpackFolder); }).collect(Collectors.toList()); return processCollections(dataToUnpack); } public Map processCollections(Collection unitsOfWork) { int numberOfThreads = Runtime.getRuntime().availableProcessors(); ExecutorService executorService = Executors.newFixedThreadPool(numberOfThreads); List> handles = new ArrayList<>(); List> callableList = new ArrayList<>(); unitsOfWork.stream().forEach(unitOfWork -> callableList.add(() -> getUnpackedResult(unitOfWork))); for (Callable callable : callableList) { Future handle = executorService.submit(callable); handles.add(handle); } Map results = new HashMap<>(); for (Future h : handles) { try { Pair dataToUnpack = h.get(); results.put(dataToUnpack.getKey(), dataToUnpack.getValue()); } catch (InterruptedException e) { logger.warn("Error: {}", e.getMessage()); } catch (ExecutionException e) { logger.warn("Error: {}", e.getMessage()); } } executorService.shutdownNow(); return results; } private Pair getUnpackedResult(Pair dataToUnpack) { boolean foundArchive = false; String innerDir = dataToUnpack.getValue(); String fileKey = dataToUnpack.getKey(); String lowerCaseFileName = fileKey.toLowerCase(); if (lowerCaseFileName.matches(ZIP_EXTENSION_PATTERN)) { foundArchive = unZip(innerDir, fileKey); } else if (lowerCaseFileName.matches(GEM_EXTENSION_PATTERN)) { foundArchive = unTar(lowerCaseFileName, innerDir, fileKey); innerDir = innerDir + File.separator + RUBY_DATA_FILE; foundArchive = unTar(RUBY_DATA_FILE, innerDir + this.randomString, innerDir); innerDir = innerDir + this.randomString; } else if (lowerCaseFileName.matches(TAR_EXTENSION_PATTERN)) { foundArchive = unTar(lowerCaseFileName, innerDir, fileKey); // innerDir = innerDir.replaceAll(TAR_SUFFIX, BLANK); } else if (lowerCaseFileName.matches(RPM_EXTENSION_PATTERN)) { foundArchive = handleRpmFile(innerDir, fileKey); } else if (lowerCaseFileName.matches(RAR_EXTENSION_PATTERN)) { foundArchive = extractRarFile(innerDir, fileKey); } else { logger.warn("Error: {} is unsupported archive type", fileKey); } if (foundArchive) { Pair resultArchive = new Pair(lowerCaseFileName, innerDir); return resultArchive; } else return null; } private boolean extractRarFile(String innerDir, String fileKey) { boolean foundArchive; File destDir = new File(innerDir); if (!destDir.exists()) { destDir.mkdirs(); } try { ExtractArchive.extractArchive(fileKey, innerDir); foundArchive = true; } catch (Exception e) { logger.warn("Error extracting file {}: {}", fileKey, e.getMessage()); try { //if the header is missing try to extract the rar file with zip extension - WSE-450 if (e.getMessage().contains(NULL_HEADER) && new ZipFile(fileKey) instanceof ZipFile) { logger.info("Retrying extraction {}", fileKey); foundArchive = unZip(innerDir, fileKey); } } catch (ZipException e1) { logger.warn("Error extracting file {}: {}", fileKey, e.getMessage()); foundArchive = false; } } return true; } // Open and extract data from zip pattern files private boolean unZip(String innerDir, String archiveFile) { boolean success = true; ZipFile zipFile; try { zipFile = new ZipFile(archiveFile); // Get the list of file headers from the zip file before unpacking List fileHeaderList = zipFile.getFileHeaders(); List matchers = Arrays.stream(filesExcludes).map(fileExclude -> FileSystems.getDefault().getPathMatcher(GLOB_PREFIX + fileExclude)).collect(Collectors.toList()); // Loop through the file headers and extract only files that are not matched by fileExcludes patterns for (int i = 0; i < fileHeaderList.size(); i++) { FileHeader fileHeader = (FileHeader) fileHeaderList.get(i); String fileName = fileHeader.getFileName(); if (filesExcludes.length > 0) { Predicate matchesExcludes = pathMatcher -> pathMatcher.matches(Paths.get(innerDir, fileName)); if (matchers.stream().noneMatch(matchesExcludes)) { zipFile.extractFile(fileHeader, innerDir); } } else { zipFile.extractFile(fileHeader, innerDir); } } } catch (Exception e) { success = false; logger.warn("Error extracting file {}: {}", archiveFile, e.getMessage()); logger.debug("Error extracting file {}: {}", archiveFile, e.getStackTrace()); } finally { // remove reference to zip file zipFile = null; } return success; } // Open and extract data from Tar pattern files private boolean unTar(String fileName, String innerDir, String archiveFile) { boolean success = true; TarUnArchiver unArchiver = new TarUnArchiver(); try { File destDir = new File(innerDir); if (!destDir.exists()) { destDir.mkdirs(); } if (fileName.endsWith(TAR_GZ_SUFFIX) || fileName.endsWith(TGZ_SUFFIX)) { unArchiver = new TarGZipUnArchiver(); } else if (fileName.endsWith(TAR_BZ2_SUFFIX)) { unArchiver = new TarBZip2UnArchiver(); } else if (fileName.endsWith(XZ_SUFFIX)) { String destFileUrl = destDir.getCanonicalPath() + Constants.BACK_SLASH + XZ_UN_ARCHIVER_FILE_NAME; success = unXz(new File(archiveFile), destFileUrl); archiveFile = destFileUrl; } if (success) { unArchiver.enableLogging(new ConsoleLogger(ConsoleLogger.LEVEL_DISABLED, UN_ARCHIVER_LOGGER)); unArchiver.setSourceFile(new File(archiveFile)); unArchiver.setDestDirectory(destDir); unArchiver.extract(); } } catch (Exception e) { success = false; logger.warn("Error extracting file {}: {}", fileName, e.getMessage()); } return success; } // extract xz files public boolean unXz(File srcFileToArchive, String destFilePath) { boolean success = true; try { XZUnArchiver XZUnArchiver = new XZUnArchiver(); XZUnArchiver.enableLogging(new ConsoleLogger(ConsoleLogger.LEVEL_DISABLED, UN_ARCHIVER_LOGGER)); XZUnArchiver.setSourceFile(srcFileToArchive); XZUnArchiver.setDestFile(new File(destFilePath)); XZUnArchiver.extract(); } catch (Exception e) { success = false; logger.warn("Failed to extract Xz file : {} - {}", srcFileToArchive.getPath(), e.getMessage()); } return success; } // Open and extract data from rpm files private boolean handleRpmFile(String innerDir, String archiveFile) { boolean success = true; File rpmFile = new File(archiveFile); FileInputStream rpmFIS = null; try { rpmFIS = new FileInputStream(rpmFile.getPath()); } catch (FileNotFoundException e) { success = false; logger.warn("File not found: {}", archiveFile); } Format format = null; ReadableByteChannel channel = Channels.newChannel(rpmFIS); ReadableChannelWrapper channelWrapper = new ReadableChannelWrapper(channel); try { format = new org.redline_rpm.Scanner().run(channelWrapper); } catch (IOException e) { success = false; logger.warn("Error reading RPM file {}: {}", archiveFile, e.getCause()); } if (format != null) { Header header = format.getHeader(); FileOutputStream cpioOS = null; FileOutputStream cpioEntryOutputStream = null; CpioArchiveInputStream cpioIn = null; File cpioFile = null; try { // extract all .cpio file // get input stream according to payload compressor type InputStream inputStream; AbstractHeader.Entry pcEntry = header.getEntry(Header.HeaderTag.PAYLOADCOMPRESSOR); String[] pc = (String[]) pcEntry.getValues(); if (pc[0].equals(LZMA)) { try { inputStream = new LZMACompressorInputStream(rpmFIS); } catch (Exception e) { throw new IOException("Failed to load LZMA compression stream", e); } } else { inputStream = Util.openPayloadStream(header, rpmFIS); } cpioFile = new File(rpmFile.getPath() + CPIO); cpioOS = new FileOutputStream(cpioFile); IOUtils.copy(inputStream, cpioOS); // extract files from .cpio File extractDestination = new File(innerDir); extractDestination.mkdirs(); cpioIn = new CpioArchiveInputStream(new FileInputStream(cpioFile)); CpioArchiveEntry cpioEntry; while ((cpioEntry = (CpioArchiveEntry) cpioIn.getNextEntry()) != null) { String entryName = cpioEntry.getName(); String lowercaseName = entryName.toLowerCase(); File file = new File(extractDestination, getFileName(entryName)); cpioEntryOutputStream = new FileOutputStream(file); IOUtils.copy(cpioIn, cpioEntryOutputStream); String innerExtractionDir; if (lowercaseName.matches(TAR_EXTENSION_PATTERN)) { innerExtractionDir = innerDir + File.separator + entryName + this.randomString; unTar(file.getName(), innerExtractionDir, file.getPath()); } else if (lowercaseName.matches(ZIP_EXTENSION_PATTERN)) { innerExtractionDir = innerDir + File.separator + entryName + this.randomString; unZip(innerExtractionDir, file.getPath()); } // close closeResource(cpioEntryOutputStream); } } catch (IOException e) { logger.error("Error unpacking rpm file {}: {}", rpmFile.getName(), e.getMessage()); } finally { closeResource(cpioEntryOutputStream); closeResource(cpioIn); closeResource(cpioOS); deleteFile(cpioFile); } } return success; } private void deleteFile(File cpioFile) { try { FileUtils.forceDelete(cpioFile); } catch (IOException e) { logger.warn("Error deleting cpio file {}: {}", cpioFile.getName(), e.getMessage()); } } private void closeResource(Closeable resource) { if (resource != null) { try { resource.close(); } catch (IOException e) { logger.warn("Error closing file {}: {}", resource.toString(), e.getMessage()); } } } // parse name without directories private String getFileName(String name) { //check if the environment is linux or windows if (name.contains(Constants.FORWARD_SLASH)) { name = name.substring(name.lastIndexOf(Constants.FORWARD_SLASH) + 1, name.length()); } else if (name.contains(Constants.BACK_SLASH)) { name = name.substring(name.lastIndexOf(Constants.BACK_SLASH) + 1, name.length()); } return name; } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy