org.metaeffekt.artifact.resolver.deb.index.packages.DebPackagesIndex Maven / Gradle / Ivy
package org.metaeffekt.artifact.resolver.deb.index.packages;
import lombok.Getter;
import lombok.NonNull;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.codec.digest.DigestUtils;
import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream;
import org.apache.commons.compress.compressors.xz.XZCompressorInputStream;
import org.apache.commons.io.IOUtils;
import org.metaeffekt.artifact.resolver.deb.index.packages.config.DebIndexConfig;
import org.metaeffekt.artifact.resolver.deb.index.packages.config.PackagesFileType;
import org.metaeffekt.artifact.resolver.deb.index.packages.parser.DebianPackagesEntry;
import org.metaeffekt.artifact.resolver.download.WebAccess;
import org.metaeffekt.artifact.resolver.generic.utils.GenericUtils;
import org.metaeffekt.artifact.resolver.generic.utils.MarkerUtils;
import org.metaeffekt.artifact.resolver.generic.utils.exception.DownloadFailedException;
import org.metaeffekt.artifact.resolver.model.DownloadLocation;
import java.io.*;
import java.net.MalformedURLException;
import java.net.URL;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.StandardOpenOption;
import java.util.*;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.regex.Pattern;
/**
* Tries to build package indices by reading Sources files.
*
* These files can be acquired from
*/
@Slf4j
public class DebPackagesIndex {
private final DownloadLocation downloadLocation;
private final WebAccess webAccess;
private final DebIndexConfig config;
// TODO: might want some separation by ecosystem (e.g. debian vs ubuntu)
@Getter
private final Map>>> nameToVersionToArchitecture =
new HashMap<>();
private static final Pattern fileExtensionPattern = Pattern.compile("\\.[a-zA-Z]{1,16}$");
private final AtomicBoolean initialized = new AtomicBoolean(false);
public DebPackagesIndex(DownloadLocation downloadLocation, WebAccess webAccess, DebIndexConfig indexConfig) {
this.downloadLocation = downloadLocation;
this.webAccess = webAccess;
this.config = indexConfig == null ? new DebIndexConfig() : indexConfig;
}
@NonNull
private static PackagesFileType deriveFileTypeFromUrlEnding(@NonNull final String url) {
if (!url.equals(url.trim())) {
log.warn("The url [{}] is untrimmed. This may lead to errors.", url);
}
if (url.endsWith("/Packages.gz")) {
return PackagesFileType.GZIP;
}
if (url.endsWith("/Packages.xz")) {
return PackagesFileType.XZ;
}
if (url.endsWith("/Packages")) {
return PackagesFileType.RAW;
}
log.warn("Could not detect type of Packages file in [{}]. Assuming raw.", url);
return PackagesFileType.RAW;
}
private void generateIndex(@NonNull final List sourceFileUrls) {
SortedMap urlToDerivedType = new TreeMap<>();
for (String url : sourceFileUrls) {
urlToDerivedType.put(url, null);
}
generateIndex(urlToDerivedType);
}
private InputStream getDecompressorStreamByType(@NonNull InputStream inputStream,
@NonNull PackagesFileType fileType)
throws IOException {
switch (fileType) {
case XZ:
return new XZCompressorInputStream(inputStream, false);
case GZIP:
return new GzipCompressorInputStream(inputStream, false);
case RAW:
return new BufferedInputStream(inputStream, 8192);
default:
log.error("Reached default case with (unknown) enum value [{}] while trying to unpack.", fileType);
throw new IllegalStateException("Could not unpack given file.");
}
}
private File decompressByType(@NonNull final File toUnpack,
@NonNull final File destination,
@NonNull PackagesFileType fileType) {
try (final OutputStream outputStream = Files.newOutputStream(destination.toPath(),
StandardOpenOption.CREATE,
StandardOpenOption.WRITE,
StandardOpenOption.TRUNCATE_EXISTING)) {
try (final InputStream inputStream = Files.newInputStream(toUnpack.toPath());
final InputStream decompressor = getDecompressorStreamByType(inputStream, fileType)) {
IOUtils.copyLarge(decompressor, outputStream);
}
return destination;
} catch (IOException e) {
throw new RuntimeException(e);
}
}
/**
* Makeshift way to turn the repository into a filename.
*
* Ideally, we would build this from parsing each repository's metadata. This mechanism requires additional effort,
* so this simple way is substituted until someone can be bothered to implement better derivation.
* @param url the url string to be turned into a filename by ways of hashing and trying to keep the ending.
* @return hash concatenated with the file extension if one can be found
*/
private String urlToFilename(String url) {
// FIXME: storing hash-based seems like a hack given our current code infrastructure. is there a better way?
final int lastDot = url.lastIndexOf('.');
final String uncleanEnding = lastDot != -1 ? url.substring(lastDot) : "";
final String clean = fileExtensionPattern.matcher(uncleanEnding).find() ? uncleanEnding : "";
return "sha256-" + DigestUtils.sha256Hex(url.getBytes(StandardCharsets.UTF_8)) + clean;
}
@NonNull
private File downloadIndex(@NonNull String url) throws DownloadFailedException {
// download index file from url
final File downloaded;
try {
final URL parsedUrl = new URL(url);
final File destinationFile = new File(
new File(downloadLocation.deriveDownloadFolder(
"deb-index",
parsedUrl.getHost()),
"[" + parsedUrl.getHost() + "-" + urlToFilename(url) + "]"),
urlToFilename(url)
);
downloaded = GenericUtils.downloadFile(
webAccess,
url,
destinationFile,
MarkerUtils.deriveMarkerFileFromDestination(destinationFile),
this.getClass().getSimpleName()
);
if (downloaded == null) {
log.error("Could not download url [{}] to [{}].", url, destinationFile);
throw new DownloadFailedException();
}
return downloaded;
} catch (MalformedURLException e) {
log.error("Got malformed URL while trying to download index from [{}].", url);
throw new IllegalArgumentException("Can't download index files from url: ", e);
}
}
private void generateIndex(@NonNull final SortedMap sourceFileUrlsToType) {
final TreeMap urlToFile = new TreeMap<>();
// create index with the given urls
for (final Map.Entry entry : sourceFileUrlsToType.entrySet()) {
final String url = entry.getKey();
final File downloaded;
try {
downloaded = downloadIndex(url);
} catch (DownloadFailedException e) {
log.warn("Failed to download [{}], will be excluded from index.", url);
continue;
}
urlToFile.put(url, downloaded);
}
List collectedEntries = urlToFile.entrySet().stream().map(entry -> {
final String url = entry.getKey();
final File downloaded = entry.getValue();
PackagesFileType fileType = sourceFileUrlsToType.get(url);
if (fileType == null) {
fileType = deriveFileTypeFromUrlEnding(url);
}
// and process the files
final List entries;
log.debug("Reading index from file [{}] of url [{}].", downloaded, url);
try (final InputStream inputStream = Files.newInputStream(downloaded.toPath());
final InputStream decompressor = getDecompressorStreamByType(inputStream, fileType)) {
entries = DebianPackagesEntry.getValidEntries(decompressor, downloaded.getPath());
} catch (IOException e) {
throw new RuntimeException(e);
}
if (entries.isEmpty()) {
log.warn("Pointless empty index content from [{}] at [{}]",
url,
downloaded);
}
return entries;
}).collect(ArrayList::new, ArrayList::addAll, ArrayList::addAll);
// sort
Collections.sort(collectedEntries);
for (DebianPackagesEntry entry : collectedEntries) {
final String name = entry.getPackageName();
final String version = entry.getVersion();
final String architecture = entry.getArchitecture();
nameToVersionToArchitecture
.computeIfAbsent(name, (ignore) -> new HashMap<>())
.computeIfAbsent(version, (ignore) -> new HashMap<>())
.computeIfAbsent(architecture, (ignore) -> new TreeSet<>())
.add(entry);
}
// TODO: check if there are duplicate entries for any name-version-architecture
// TODO: do this with lucene. maybe have a look at yan's code (Index class which uses lucene)
}
public DebianPackagesEntry lookupNameVersionArch(@NonNull String name,
@NonNull String version,
@NonNull String arch) {
synchronized (initialized) {
if (!initialized.get()) {
log.debug("Initializing lazy index...");
generateIndex(this.config.getPackagesDownloadUrls());
initialized.set(true);
}
}
final Collection result = nameToVersionToArchitecture
.getOrDefault(name, new HashMap<>())
.getOrDefault(version, new HashMap<>())
.getOrDefault(arch, Collections.emptySet());
if (result.size() > 1) {
log.warn("More than one result on lookup [{}], [{}], [{}]. Using first.", name, version, arch);
}
return result.stream().findFirst().orElse(null);
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy