All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.metaeffekt.artifact.resolver.deb.index.packages.DebPackagesIndex Maven / Gradle / Ivy

package org.metaeffekt.artifact.resolver.deb.index.packages;

import lombok.Getter;
import lombok.NonNull;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.codec.digest.DigestUtils;
import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream;
import org.apache.commons.compress.compressors.xz.XZCompressorInputStream;
import org.apache.commons.io.IOUtils;
import org.metaeffekt.artifact.resolver.deb.index.packages.config.DebIndexConfig;
import org.metaeffekt.artifact.resolver.deb.index.packages.config.PackagesFileType;
import org.metaeffekt.artifact.resolver.deb.index.packages.parser.DebianPackagesEntry;
import org.metaeffekt.artifact.resolver.download.WebAccess;
import org.metaeffekt.artifact.resolver.generic.utils.GenericUtils;
import org.metaeffekt.artifact.resolver.generic.utils.MarkerUtils;
import org.metaeffekt.artifact.resolver.generic.utils.exception.DownloadFailedException;
import org.metaeffekt.artifact.resolver.model.DownloadLocation;

import java.io.*;
import java.net.MalformedURLException;
import java.net.URL;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.StandardOpenOption;
import java.util.*;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.regex.Pattern;

/**
 * Tries to build package indices by reading Sources files.
 * 
* These files can be acquired from */ @Slf4j public class DebPackagesIndex { private final DownloadLocation downloadLocation; private final WebAccess webAccess; private final DebIndexConfig config; // TODO: might want some separation by ecosystem (e.g. debian vs ubuntu) @Getter private final Map>>> nameToVersionToArchitecture = new HashMap<>(); private static final Pattern fileExtensionPattern = Pattern.compile("\\.[a-zA-Z]{1,16}$"); private final AtomicBoolean initialized = new AtomicBoolean(false); public DebPackagesIndex(DownloadLocation downloadLocation, WebAccess webAccess, DebIndexConfig indexConfig) { this.downloadLocation = downloadLocation; this.webAccess = webAccess; this.config = indexConfig == null ? new DebIndexConfig() : indexConfig; } @NonNull private static PackagesFileType deriveFileTypeFromUrlEnding(@NonNull final String url) { if (!url.equals(url.trim())) { log.warn("The url [{}] is untrimmed. This may lead to errors.", url); } if (url.endsWith("/Packages.gz")) { return PackagesFileType.GZIP; } if (url.endsWith("/Packages.xz")) { return PackagesFileType.XZ; } if (url.endsWith("/Packages")) { return PackagesFileType.RAW; } log.warn("Could not detect type of Packages file in [{}]. Assuming raw.", url); return PackagesFileType.RAW; } private void generateIndex(@NonNull final List sourceFileUrls) { SortedMap urlToDerivedType = new TreeMap<>(); for (String url : sourceFileUrls) { urlToDerivedType.put(url, null); } generateIndex(urlToDerivedType); } private InputStream getDecompressorStreamByType(@NonNull InputStream inputStream, @NonNull PackagesFileType fileType) throws IOException { switch (fileType) { case XZ: return new XZCompressorInputStream(inputStream, false); case GZIP: return new GzipCompressorInputStream(inputStream, false); case RAW: return new BufferedInputStream(inputStream, 8192); default: log.error("Reached default case with (unknown) enum value [{}] while trying to unpack.", fileType); throw new IllegalStateException("Could not unpack given file."); } } private File decompressByType(@NonNull final File toUnpack, @NonNull final File destination, @NonNull PackagesFileType fileType) { try (final OutputStream outputStream = Files.newOutputStream(destination.toPath(), StandardOpenOption.CREATE, StandardOpenOption.WRITE, StandardOpenOption.TRUNCATE_EXISTING)) { try (final InputStream inputStream = Files.newInputStream(toUnpack.toPath()); final InputStream decompressor = getDecompressorStreamByType(inputStream, fileType)) { IOUtils.copyLarge(decompressor, outputStream); } return destination; } catch (IOException e) { throw new RuntimeException(e); } } /** * Makeshift way to turn the repository into a filename. *
* Ideally, we would build this from parsing each repository's metadata. This mechanism requires additional effort, * so this simple way is substituted until someone can be bothered to implement better derivation. * @param url the url string to be turned into a filename by ways of hashing and trying to keep the ending. * @return hash concatenated with the file extension if one can be found */ private String urlToFilename(String url) { // FIXME: storing hash-based seems like a hack given our current code infrastructure. is there a better way? final int lastDot = url.lastIndexOf('.'); final String uncleanEnding = lastDot != -1 ? url.substring(lastDot) : ""; final String clean = fileExtensionPattern.matcher(uncleanEnding).find() ? uncleanEnding : ""; return "sha256-" + DigestUtils.sha256Hex(url.getBytes(StandardCharsets.UTF_8)) + clean; } @NonNull private File downloadIndex(@NonNull String url) throws DownloadFailedException { // download index file from url final File downloaded; try { final URL parsedUrl = new URL(url); final File destinationFile = new File( new File(downloadLocation.deriveDownloadFolder( "deb-index", parsedUrl.getHost()), "[" + parsedUrl.getHost() + "-" + urlToFilename(url) + "]"), urlToFilename(url) ); downloaded = GenericUtils.downloadFile( webAccess, url, destinationFile, MarkerUtils.deriveMarkerFileFromDestination(destinationFile), this.getClass().getSimpleName() ); if (downloaded == null) { log.error("Could not download url [{}] to [{}].", url, destinationFile); throw new DownloadFailedException(); } return downloaded; } catch (MalformedURLException e) { log.error("Got malformed URL while trying to download index from [{}].", url); throw new IllegalArgumentException("Can't download index files from url: ", e); } } private void generateIndex(@NonNull final SortedMap sourceFileUrlsToType) { final TreeMap urlToFile = new TreeMap<>(); // create index with the given urls for (final Map.Entry entry : sourceFileUrlsToType.entrySet()) { final String url = entry.getKey(); final File downloaded; try { downloaded = downloadIndex(url); } catch (DownloadFailedException e) { log.warn("Failed to download [{}], will be excluded from index.", url); continue; } urlToFile.put(url, downloaded); } List collectedEntries = urlToFile.entrySet().stream().map(entry -> { final String url = entry.getKey(); final File downloaded = entry.getValue(); PackagesFileType fileType = sourceFileUrlsToType.get(url); if (fileType == null) { fileType = deriveFileTypeFromUrlEnding(url); } // and process the files final List entries; log.debug("Reading index from file [{}] of url [{}].", downloaded, url); try (final InputStream inputStream = Files.newInputStream(downloaded.toPath()); final InputStream decompressor = getDecompressorStreamByType(inputStream, fileType)) { entries = DebianPackagesEntry.getValidEntries(decompressor, downloaded.getPath()); } catch (IOException e) { throw new RuntimeException(e); } if (entries.isEmpty()) { log.warn("Pointless empty index content from [{}] at [{}]", url, downloaded); } return entries; }).collect(ArrayList::new, ArrayList::addAll, ArrayList::addAll); // sort Collections.sort(collectedEntries); for (DebianPackagesEntry entry : collectedEntries) { final String name = entry.getPackageName(); final String version = entry.getVersion(); final String architecture = entry.getArchitecture(); nameToVersionToArchitecture .computeIfAbsent(name, (ignore) -> new HashMap<>()) .computeIfAbsent(version, (ignore) -> new HashMap<>()) .computeIfAbsent(architecture, (ignore) -> new TreeSet<>()) .add(entry); } // TODO: check if there are duplicate entries for any name-version-architecture // TODO: do this with lucene. maybe have a look at yan's code (Index class which uses lucene) } public DebianPackagesEntry lookupNameVersionArch(@NonNull String name, @NonNull String version, @NonNull String arch) { synchronized (initialized) { if (!initialized.get()) { log.debug("Initializing lazy index..."); generateIndex(this.config.getPackagesDownloadUrls()); initialized.set(true); } } final Collection result = nameToVersionToArchitecture .getOrDefault(name, new HashMap<>()) .getOrDefault(version, new HashMap<>()) .getOrDefault(arch, Collections.emptySet()); if (result.size() > 1) { log.warn("More than one result on lookup [{}], [{}], [{}]. Using first.", name, version, arch); } return result.stream().findFirst().orElse(null); } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy