org.metaeffekt.artifact.resolver.deb.index.packages.DebPackagesIndex Maven / Gradle / Ivy

Go to download
package org.metaeffekt.artifact.resolver.deb.index.packages;

import lombok.Getter;
import lombok.NonNull;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.codec.digest.DigestUtils;
import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream;
import org.apache.commons.compress.compressors.xz.XZCompressorInputStream;
import org.apache.commons.io.IOUtils;
import org.metaeffekt.artifact.resolver.deb.index.packages.config.DebIndexConfig;
import org.metaeffekt.artifact.resolver.deb.index.packages.config.PackagesFileType;
import org.metaeffekt.artifact.resolver.deb.index.packages.parser.DebianPackagesEntry;
import org.metaeffekt.artifact.resolver.download.WebAccess;
import org.metaeffekt.artifact.resolver.generic.utils.GenericUtils;
import org.metaeffekt.artifact.resolver.generic.utils.MarkerUtils;
import org.metaeffekt.artifact.resolver.generic.utils.exception.DownloadFailedException;
import org.metaeffekt.artifact.resolver.model.DownloadLocation;

import java.io.*;
import java.net.MalformedURLException;
import java.net.URL;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.StandardOpenOption;
import java.util.*;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.regex.Pattern;

/**
 * Tries to build package indices by reading Sources files.
 * 

 * These files can be acquired from
 */
@Slf4j
public class DebPackagesIndex {
    private final DownloadLocation downloadLocation;
    private final WebAccess webAccess;

    private final DebIndexConfig config;

    // TODO: might want some separation by ecosystem (e.g. debian vs ubuntu)
    @Getter
    private final Map>>> nameToVersionToArchitecture =
            new HashMap<>();

    private static final Pattern fileExtensionPattern = Pattern.compile("\\.[a-zA-Z]{1,16}$");

    private final AtomicBoolean initialized = new AtomicBoolean(false);

    public DebPackagesIndex(DownloadLocation downloadLocation, WebAccess webAccess, DebIndexConfig indexConfig) {
        this.downloadLocation = downloadLocation;
        this.webAccess = webAccess;

        this.config = indexConfig == null ? new DebIndexConfig() : indexConfig;
    }

    @NonNull
    private static PackagesFileType deriveFileTypeFromUrlEnding(@NonNull final String url) {
        if (!url.equals(url.trim())) {
            log.warn("The url [{}] is untrimmed. This may lead to errors.", url);
        }
        if (url.endsWith("/Packages.gz")) {
            return PackagesFileType.GZIP;
        }
        if (url.endsWith("/Packages.xz")) {
            return PackagesFileType.XZ;
        }
        if (url.endsWith("/Packages")) {
            return PackagesFileType.RAW;
        }
        log.warn("Could not detect type of Packages file in [{}]. Assuming raw.", url);
        return PackagesFileType.RAW;
    }

    private void generateIndex(@NonNull final List sourceFileUrls) {
        SortedMap urlToDerivedType = new TreeMap<>();

        for (String url : sourceFileUrls) {
            urlToDerivedType.put(url, null);
        }

        generateIndex(urlToDerivedType);
    }

    private InputStream getDecompressorStreamByType(@NonNull InputStream inputStream,
                                                    @NonNull PackagesFileType fileType)
            throws IOException {
        switch (fileType) {
            case XZ:
                return new XZCompressorInputStream(inputStream, false);
            case GZIP:
                return new GzipCompressorInputStream(inputStream, false);
            case RAW:
                return new BufferedInputStream(inputStream, 8192);
            default:
                log.error("Reached default case with (unknown) enum value [{}] while trying to unpack.", fileType);
                throw new IllegalStateException("Could not unpack given file.");
        }
    }

    private File decompressByType(@NonNull final File toUnpack,
                                  @NonNull final File destination,
                                  @NonNull PackagesFileType fileType) {
        try (final OutputStream outputStream = Files.newOutputStream(destination.toPath(),
                StandardOpenOption.CREATE,
                StandardOpenOption.WRITE,
                StandardOpenOption.TRUNCATE_EXISTING)) {
            try (final InputStream inputStream = Files.newInputStream(toUnpack.toPath());
                 final InputStream decompressor = getDecompressorStreamByType(inputStream, fileType)) {
                IOUtils.copyLarge(decompressor, outputStream);
            }

            return destination;
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
    }

    /**
     * Makeshift way to turn the repository into a filename.
     * 

     * Ideally, we would build this from parsing each repository's metadata. This mechanism requires additional effort,
     * so this simple way is substituted until someone can be bothered to implement better derivation.
     * @param url the url string to be turned into a filename by ways of hashing and trying to keep the ending.
     * @return hash concatenated with the file extension if one can be found
     */
    private String urlToFilename(String url) {
        // FIXME: storing hash-based seems like a hack given our current code infrastructure. is there a better way?
        final int lastDot = url.lastIndexOf('.');
        final String uncleanEnding = lastDot != -1 ? url.substring(lastDot) : "";
        final String clean = fileExtensionPattern.matcher(uncleanEnding).find() ? uncleanEnding : "";
        return "sha256-" + DigestUtils.sha256Hex(url.getBytes(StandardCharsets.UTF_8)) + clean;
    }

    @NonNull
    private File downloadIndex(@NonNull String url) throws DownloadFailedException {
        // download index file from url
        final File downloaded;
        try {
            final URL parsedUrl = new URL(url);
            final File destinationFile = new File(
                    new File(downloadLocation.deriveDownloadFolder(
                            "deb-index",
                            parsedUrl.getHost()),
                            "[" + parsedUrl.getHost() + "-" + urlToFilename(url) + "]"),
                    urlToFilename(url)
            );

            downloaded = GenericUtils.downloadFile(
                            webAccess,
                            url,
                            destinationFile,
                            MarkerUtils.deriveMarkerFileFromDestination(destinationFile),
                            this.getClass().getSimpleName()
            );

            if (downloaded == null) {
                log.error("Could not download url [{}] to [{}].", url, destinationFile);
                throw new DownloadFailedException();
            }

            return downloaded;
        } catch (MalformedURLException e) {
            log.error("Got malformed URL while trying to download index from [{}].", url);
            throw new IllegalArgumentException("Can't download index files from url: ", e);
        }
    }

    private void generateIndex(@NonNull final SortedMap sourceFileUrlsToType) {
        final TreeMap urlToFile = new TreeMap<>();

        // create index with the given urls
        for (final Map.Entry entry : sourceFileUrlsToType.entrySet()) {
            final String url = entry.getKey();

            final File downloaded;
            try {
                downloaded = downloadIndex(url);
            } catch (DownloadFailedException e) {
                log.warn("Failed to download [{}], will be excluded from index.", url);
                continue;
            }

            urlToFile.put(url, downloaded);
        }

        List collectedEntries = urlToFile.entrySet().stream().map(entry -> {
            final String url = entry.getKey();
            final File downloaded = entry.getValue();

            PackagesFileType fileType = sourceFileUrlsToType.get(url);
            if (fileType == null) {
                fileType = deriveFileTypeFromUrlEnding(url);
            }

            //  and process the files

            final List entries;

            log.debug("Reading index from file [{}] of url [{}].", downloaded, url);
            try (final InputStream inputStream = Files.newInputStream(downloaded.toPath());
                 final InputStream decompressor = getDecompressorStreamByType(inputStream, fileType)) {
                entries = DebianPackagesEntry.getValidEntries(decompressor, downloaded.getPath());
            } catch (IOException e) {
                throw new RuntimeException(e);
            }

            if (entries.isEmpty()) {
                log.warn("Pointless empty index content from [{}] at [{}]",
                        url,
                        downloaded);
            }

            return entries;
        }).collect(ArrayList::new, ArrayList::addAll, ArrayList::addAll);

        // sort
        Collections.sort(collectedEntries);

        for (DebianPackagesEntry entry : collectedEntries) {
            final String name = entry.getPackageName();
            final String version = entry.getVersion();
            final String architecture = entry.getArchitecture();

            nameToVersionToArchitecture
                    .computeIfAbsent(name, (ignore) -> new HashMap<>())
                    .computeIfAbsent(version, (ignore) -> new HashMap<>())
                    .computeIfAbsent(architecture, (ignore) -> new TreeSet<>())
                    .add(entry);
        }

        // TODO: check if there are duplicate entries for any name-version-architecture

        // TODO: do this with lucene. maybe have a look at yan's code (Index class which uses lucene)
    }

    public DebianPackagesEntry lookupNameVersionArch(@NonNull String name,
                                                     @NonNull String version,
                                                     @NonNull String arch) {
        synchronized (initialized) {
            if (!initialized.get()) {
                log.debug("Initializing lazy index...");
                generateIndex(this.config.getPackagesDownloadUrls());
                initialized.set(true);
            }
        }

        final Collection result = nameToVersionToArchitecture
                .getOrDefault(name, new HashMap<>())
                .getOrDefault(version, new HashMap<>())
                .getOrDefault(arch, Collections.emptySet());

        if (result.size() > 1) {
            log.warn("More than one result on lookup [{}], [{}], [{}]. Using first.", name, version, arch);
        }

        return result.stream().findFirst().orElse(null);
    }
}