org.metaeffekt.artifact.resolver.deb.ubuntu.UbuntuLaunchpadAdapter Maven / Gradle / Ivy
The newest version!
package org.metaeffekt.artifact.resolver.deb.ubuntu;
import lombok.NonNull;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.codec.digest.DigestUtils;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.exception.ExceptionUtils;
import org.metaeffekt.artifact.resolver.ResolverResult;
import org.metaeffekt.artifact.resolver.deb.ControlFileSourceReference;
import org.metaeffekt.artifact.resolver.deb.DebArtifactReference;
import org.metaeffekt.artifact.resolver.download.WebAccess;
import org.metaeffekt.artifact.resolver.generic.AbstractDownloadingAdapter;
import org.metaeffekt.artifact.resolver.generic.FileLocation;
import org.metaeffekt.artifact.resolver.generic.utils.GenericUtils;
import org.metaeffekt.artifact.resolver.generic.utils.MarkerQueryResult;
import org.metaeffekt.artifact.resolver.generic.utils.MarkerUtils;
import org.metaeffekt.artifact.resolver.model.DownloadLocation;
import java.io.*;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.StandardOpenOption;
import java.util.*;
import java.util.regex.Pattern;
import java.util.zip.ZipEntry;
import java.util.zip.ZipOutputStream;
import static org.metaeffekt.artifact.resolver.generic.utils.MarkerUtils.*;
/**
* Tries to use ubuntu launchpad to find package content.
*
* {@code https://launchpad.net/ubuntu/+source/SOURCE-PACKAGE-NAME/VERSION}.
*
* Example:
* {@code https://launchpad.net/ubuntu/+archive/primary/+sourcefiles/adduser/3.118ubuntu2/adduser_3.118ubuntu2.tar.xz}
*/
@Slf4j
public class UbuntuLaunchpadAdapter extends AbstractDownloadingAdapter {
protected final Pattern lowerHexPattern = Pattern.compile("[0-9a-f]+");
protected final Pattern sizePattern = Pattern.compile("[0-9]+");
protected final Pattern controlCharacter = Pattern.compile("\\p{Cntrl}");
public UbuntuLaunchpadAdapter(DownloadLocation downloadLocation, WebAccess webAccess) {
super(downloadLocation, webAccess);
}
/**
* Do custom parsing of dsc files (since the original parser isn't java) to get to source code.
*
* References are derived from "Files" and "Checksums-*" fields.
* @param dscFile dsc file to parse
* @return a list of references to source files
*/
public List getFilesFromControlFile(@NonNull File dscFile) {
final Map> typesToLines = new HashMap<>();
// get relevant content from the file
try {
String currentField = null;
boolean skipAfterSignatureStart = false;
long lineCount = 0;
for (final String line : Files.readAllLines(dscFile.toPath(), StandardCharsets.UTF_8)) {
lineCount++;
if (StringUtils.isBlank(line)) {
// disregard empty lines
continue;
}
final String lineContent;
if (!line.startsWith(" ") && !line.startsWith("\t")) {
// line does not start with space or tab and is therefore either signature start / end or a value
if (!line.contains(":")) {
if (lineCount == 1 && line.trim().equals("-----BEGIN PGP SIGNED MESSAGE-----")) {
// skip first line if it's the start of a signature
log.trace("Skipping begin PGP signed message line while parsing.");
continue;
}
if (skipAfterSignatureStart || line.trim().contains("-----BEGIN PGP SIGNATURE-----")) {
// pray that we are at the end of the file
skipAfterSignatureStart = !line.trim().contains("-----END PGP SIGNATURE-----");
continue;
}
if (currentField == null) {
log.warn(
"A line [{}] in [{}] didn't start with space or tab but didn't contain colon: [{}].",
lineCount,
dscFile,
line
);
} else {
log.warn(
"A line [{}] after [{}] in [{}] didn't start with space or tab" +
" but didn't contain colon: [{}].",
lineCount,
currentField,
dscFile,
line
);
}
}
// this is a new field.
int colonIndex = line.indexOf(':');
if (colonIndex <= 0) {
// stop parsing, we are probably into a pgp signature from here on.
break;
}
currentField = line.substring(0, colonIndex);
lineContent = line.substring(colonIndex + 1);
} else {
lineContent = line.substring(1);
}
if (currentField == null) {
log.warn("No field before value [{}].", line);
continue;
}
// read checksums and sort them by type
if (currentField.toLowerCase(Locale.ENGLISH).startsWith("checksums-")) {
// derive hashtype from field name
final String hashType = currentField
.toLowerCase(Locale.ENGLISH)
.substring(currentField.indexOf("-") + 1)
.trim();
if (StringUtils.isNotBlank(lineContent.trim())) {
typesToLines
.computeIfAbsent(hashType, (key) -> new ArrayList<>())
.add(lineContent.trim());
}
}
if (currentField.toLowerCase(Locale.ENGLISH).equals("files")) {
if (StringUtils.isNotBlank(lineContent.trim())) {
typesToLines
.computeIfAbsent("md5", (key) -> new ArrayList<>())
.add(lineContent.trim());
}
}
}
} catch (IOException e) {
throw new RuntimeException(e);
}
// map filenames to a holder object
final Map sourceReferenceMap = new HashMap<>();
// build objects from read data.
for (Map.Entry> entry : typesToLines.entrySet()) {
for (String line : entry.getValue()) {
final String[] splitLine = StringUtils.strip(line).split(" ");
// perform some sanity checks to detect errors if custom parser fails
if (splitLine.length != 3) {
log.error("Line [{}] split (around space) into [{}] (not three) fields.", line, splitLine.length);
// can't work with this.
continue;
}
// should be hex
final String hashValue = splitLine[0];
if (!lowerHexPattern.matcher(hashValue).matches()) {
log.warn("Hash value [{}] from dcs [{}] does not look hexadecimal.", hashValue, dscFile);
}
// should be an int
final String sizeString = splitLine[1];
if (!sizePattern.matcher(sizeString).matches()) {
log.warn(
"Size value [{}] from line [{}] of type [{}] from dcs [{}] doesn't look like an integer.",
sizeString,
line,
entry.getKey(),
dscFile
);
}
int sizeInt;
try {
sizeInt = Integer.parseInt(sizeString);
} catch (NumberFormatException e) {
sizeInt = -1;
}
// shouldn't contain weird control characters
final String filename = splitLine[2];
if (filename.contains("\n")
|| filename.contains("\r")
|| filename.contains(" ")
|| controlCharacter.matcher(filename).find()) {
log.warn(
"Filename value [{}] from line [{}] of type [{}] from dcs [{}] contains unusual content.",
filename,
line,
entry.getKey(),
dscFile
);
// run this faily; can't work with suspicious filenames
return null;
}
// add hash to the corresponding control file
ControlFileSourceReference reference = sourceReferenceMap.get(filename);
if (reference == null) {
reference = new ControlFileSourceReference(filename, sizeInt);
sourceReferenceMap.put(filename, reference);
}
final String previousHashValue = reference.addChecksum(entry.getKey(), hashValue);
if (previousHashValue != null) {
log.warn("Oddity: Duplicate checksum of type [{}] with values [{}], [{}]. Might indicate error.",
entry.getKey(), hashValue, previousHashValue);
}
}
}
return new ArrayList<>(sourceReferenceMap.values());
}
protected String getLaunchpadUrl(DebArtifactReference reference, String filename) {
final StringJoiner dscUrlJoiner = new StringJoiner("/");
dscUrlJoiner.add("https://launchpad.net/ubuntu/+archive/primary/+sourcefiles");
dscUrlJoiner.add(reference.getName());
dscUrlJoiner.add(reference.getVersion());
dscUrlJoiner.add(filename);
return dscUrlJoiner.toString();
}
protected File downloadDscFile(DebArtifactReference reference, String dscFilename) {
final FileLocation fileLocation = reference.deriveFileLocation(dscFilename);
final File file = deriveDownloadFile(fileLocation);
final File dscMarkerFile = deriveMarkerFile(fileLocation, "ubuntu_" + dscFilename);
final String dscUrl = getLaunchpadUrl(reference, dscFilename);
return GenericUtils.downloadFile(this.getWebAccess(), dscUrl, file, dscMarkerFile, reference.toString());
}
protected File downloadSourceFile(DebArtifactReference reference, String sourceFilename) {
final FileLocation fileLocation = reference.deriveFileLocation(sourceFilename);
final File file = deriveDownloadFile(fileLocation);
final File dscMarkerFile = deriveMarkerFile(fileLocation, "ubuntu_" + sourceFilename);
final String fileUrl = getLaunchpadUrl(reference, sourceFilename);
return GenericUtils.downloadFile(this.getWebAccess(), fileUrl, file, dscMarkerFile, reference.toString());
}
/**
* Checks whether an output that is expected to exist really does exist and is not empty.
*
* Automatically invalidates given markers if it does not.
* @param fileToCheck check whether this file exists.
* @param correspondingMarker marker that corresponds to this file; may be written to
* @param reference reference to the resolving source package, for logging
* @return true if everything was alright, false if the file didn't exist or was empty
*/
public static boolean sanityCheckOutputExists(@NonNull File fileToCheck,
File correspondingMarker,
DebArtifactReference reference) {
try {
// sanity check previous output
if (!fileToCheck.exists() ||
!Files.isRegularFile(fileToCheck.toPath()) ||
Files.size(fileToCheck.toPath()) <= 0) {
// the previous output is likely invalid: when might an empty archive appear?
log.warn(
"Previous output file [{}] of marker [{}] for ref [{}] failed sanity checks. " +
"Using anyway. Consider removal of its marker and output.",
fileToCheck,
correspondingMarker,
reference
);
return false;
} else {
return true;
}
}catch (IOException e) {
log.trace("IOException while sanity checking existence of file [{}].", fileToCheck);
return false;
}
}
/**
* Get repackaged sources.
*
* Output won't be deterministic: no java library provides a trivial api for deterministic archival.
*
* Deals with markers, so it (hopefully doesn't have to repackage every single time).
*
* @param reference artifact reference to build the source for
* @param targetFilename target filename to write the repackaged file to
* @param srcReferences source fie references
* @param sourceRefToFile maps source reference objects to their actual downloaded files
* @return a file for the written zip
*/
public File repackageSourcesToZip(@NonNull DebArtifactReference reference,
@NonNull String targetFilename,
@NonNull List srcReferences,
@NonNull HashMap sourceRefToFile) {
log.debug("Beginning repackage of sources for [{}] to [{}].", reference, targetFilename);
final FileLocation fileLocation = reference.deriveFileLocation(targetFilename);
final File file = deriveDownloadFile(fileLocation);
final File markerFile = MarkerUtils.deriveMarkerFileFromDestination(file);
// special check: override classic marker logic: retry if input file changed since last repackage.
for (ControlFileSourceReference sourceReference : srcReferences) {
final File sourceFile = sourceRefToFile.get(sourceReference);
if (sourceFile.lastModified() >= file.lastModified()) {
log.debug("Invalidating repackaged file [{}] since source [{}] was newer while resolving [{}].",
file.toPath(),
sourceFile.getPath(),
reference);
if (markerFile.exists() && !markerFile.delete()) {
log.warn("Could not delete marker file [{}] for repackaged sources while resolving [{}].",
markerFile.toPath(),
reference);
}
if (file.exists() && !file.delete()) {
// source file might have changed after past repackaging. rebuild conservatively
log.warn("Could not delete repackage file [{}] while resolving [{}].",
file.toPath(),
reference);
}
}
}
final MarkerQueryResult queryResult = queryMarker(markerFile, reference.toString());
if (queryResult.getFoundTarget() != null) {
return queryResult.getFoundTarget();
} else {
if (queryResult.isAttemptedRecently()) {
// if we attempted this "recently", don't try again and return what is there (either present or null).
// this behaviour means the zipping code mustn't leave behind half-done files!
log.info("Skipping repackage of sources [{}]. Recent attempt failed: [{}].",
reference,
file.getPath());
return null;
}
}
// begin our repackaging attempt
touchMarker(markerFile, reference.toString());
// NOTE: aborting during this will leave behind an empty marker signifying a recent failure.
try (final OutputStream outputStream = Files.newOutputStream(file.toPath(),
StandardOpenOption.CREATE,
StandardOpenOption.WRITE,
StandardOpenOption.TRUNCATE_EXISTING);
final BufferedOutputStream bufferedOutputStream = new BufferedOutputStream(outputStream, 65536);
final ZipOutputStream zipOutputStream = new ZipOutputStream(bufferedOutputStream, StandardCharsets.UTF_8)
) {
zipOutputStream.setLevel(4);
for (final ControlFileSourceReference sourceReference : srcReferences) {
final File sourcePart = sourceRefToFile.get(sourceReference);
// sanity check part before repackaging
if (!sanityCheckOutputExists(sourcePart, null, reference)) {
log.error("Shouldn't happen: source part [{}] of ref [{}] doesn't exist. Code shouldn't get here.",
sourcePart,
reference);
return null;
}
// there shouldn't be any collisions due to how dsc downloads are usually managed.
zipOutputStream.putNextEntry(new ZipEntry(sourceReference.getName()));
try (final InputStream inputStream = Files.newInputStream(sourcePart.toPath())) {
if (IOUtils.copyLarge(inputStream, zipOutputStream) != Files.size(sourcePart.toPath())) {
throw new IOException("Failure to copy: input part size and output size differ.");
}
} catch (IOException e) {
log.error("Failed to write source file [{}] to zip at [{}] for package [{}].",
sourcePart,
file,
reference);
return null;
}
zipOutputStream.closeEntry();
}
markSuccess(file, markerFile, reference.toString());
return file;
} catch (IOException e) {
// zip creation failed. clean up.
log.warn("Failed to create source archive due to exception: [{}]", ExceptionUtils.getStackTrace(e));
try {
// delete partially written zip file
Files.deleteIfExists(file.toPath());
} catch (IOException e2) {
// something is massively wrong here.
log.error("Couldn't delete partially written file at [{}]: [{}]",
file,
ExceptionUtils.getStackTrace(e2));
}
return null;
}
}
/**
* Checks if checksums for the specified source file match what is expected by its source reference.
*
* Invalidates corresponding markers.
* @param sourceRef reference to a source object, containing metadata
* @param sourceFile the source file which got downloaded, corresponding to the source reference
* @param ref (may be non-source) reference of the overarching artifact being processed, used for logging
* @return whether all checksums match the expected values
*/
public boolean checksumOk(@NonNull ControlFileSourceReference sourceRef,
@NonNull File sourceFile,
@NonNull DebArtifactReference ref) {
boolean allMatch = true;
if (sourceRef.getChecksumTypes().isEmpty()) {
// inconsistency: with how control files work, if there is a source file there should be a checksum
log.warn("No checksums to check for file [{}] of sourceRef [{}] of ref [{}].", sourceFile, sourceRef, ref);
}
for (String checksumType : sourceRef.getChecksumTypes()) {
try (InputStream inputStream = Files.newInputStream(sourceFile.toPath())) {
switch (checksumType) {
case "sha256":
final boolean sha256Matches = DigestUtils
.sha256Hex(inputStream)
.toLowerCase(Locale.ENGLISH)
.equals(sourceRef.getChecksum(checksumType));
allMatch &= sha256Matches;
if (!sha256Matches) {
log.warn("Sha256 checksum of [{}] does not match expected [{}].",
sourceFile,
sourceRef.getChecksum("sha256"));
}
break;
case "sha1":
final boolean sha1Matches = DigestUtils
.sha1Hex(inputStream)
.toLowerCase(Locale.ENGLISH)
.equals(sourceRef.getChecksum(checksumType));
allMatch &= sha1Matches;
if (!sha1Matches) {
log.warn("Sha1 checksum of [{}] does not match expected [{}].",
sourceFile,
sourceRef.getChecksum("sha1"));
}
break;
case "md5":
final boolean md5Matches = DigestUtils
.md5Hex(inputStream)
.toLowerCase(Locale.ENGLISH)
.equals(sourceRef.getChecksum(checksumType));
allMatch &= md5Matches;
if (!md5Matches) {
log.warn("Md5 checksum of [{}] does not match expected [{}].",
sourceFile, sourceRef.getChecksum("md5"));
}
break;
default:
log.warn("Can't handle checksum type [{}] in [{}] yet.",
checksumType,
this.getClass().getSimpleName());
}
} catch (IOException e) {
log.warn("IOException while checking checksums for file [{}] of source ref [{}] of ref [{}] : [{}]",
sourceFile,
sourceRef,
ref,
ExceptionUtils.getStackTrace(e));
}
}
if (!allMatch) {
log.warn("Invalidating download marker for [{}] of ref [{}] due to mismatched checksums.",
sourceFile.getAbsolutePath(),
ref);
invalidateMarkerFor(sourceFile, ref.toString());
}
return allMatch;
}
/**
* Get a source artifact for the given reference.
*
* Since I wasn't able to find a canonical source package that was downloadable from launchpad, we download
* each referenced source file, then just bundle them in a zip.
* @param sourcePackageRef reference to the source package; used to derive a launchpad url
* @return repackaged source files in a zip
*/
public ResolverResult downloadSourceArtifact(@NonNull DebArtifactReference sourcePackageRef) {
if (!sourcePackageRef.isValid()) {
return null;
}
if (sourcePackageRef.getNamespace() != DebArtifactReference.DebNamespace.UBUNTU) {
log.warn("Namespace of given ref [{}] is not UBUNTU.", sourcePackageRef);
}
final String versionWithoutEpoch;
// the version in filename doesn't seem to include epoch
int colonIndex = sourcePackageRef.getVersion().indexOf(':');
if (colonIndex == -1) {
versionWithoutEpoch = sourcePackageRef.getVersion();
} else {
versionWithoutEpoch = sourcePackageRef.getVersion().substring(colonIndex + 1);
}
String dscFilename = sourcePackageRef.getName() + "_" + versionWithoutEpoch + ".dsc";
File dscFile = downloadDscFile(sourcePackageRef, dscFilename);
if (dscFile == null) {
log.info("Failed to download dsc file for [{}].", sourcePackageRef);
return null;
}
// remember actual output files for each source package reference. needed for later reference
final HashMap sourceRefToFile = new HashMap<>();
try {
final List sourceReferences = getFilesFromControlFile(dscFile);
if (sourceReferences == null) {
log.warn("Error getting source references for [{}].", sourcePackageRef);
return null;
}
if (sourceReferences.isEmpty()) {
log.trace("List of source references empty for [{}].", sourcePackageRef);
return null;
}
// download and read dsc, derive source filenames
for (final ControlFileSourceReference sourceRef : sourceReferences) {
final File sourceFile = downloadSourceFile(sourcePackageRef, sourceRef.getName());
if (sourceFile == null) {
// return failure for the entire package
return null;
}
if (!checksumOk(sourceRef, sourceFile, sourcePackageRef)) {
// a source part's download is inconsistent with expected checksums. critical failure for package.
return null;
}
// sanity check size (compare to dsc). note unexpected divergence but don't overrule checksum success
try {
long size = Files.size(sourceFile.toPath());
if (sourceRef.getSize() != -1 && size != sourceRef.getSize()) {
log.warn("File size of [{}] isn't expected [{}].", sourceFile.getAbsolutePath(), size);
}
} catch (IOException e) {
log.warn("IOException while trying to check file size of [{}].", sourceFile.getAbsolutePath());
}
sourceRefToFile.put(sourceRef, sourceFile);
}
// kind of hacky: no canonical way to build a source archive for this package.
// package sources: artifact resolver engine only references single files as of writing
final String sourceRepackageFilename = "ae-source-repackage-ubuntu-" + sourcePackageRef.getName() + "-" +
sourcePackageRef.getVersion() + "-" + sourcePackageRef.getArchitecture() + ".zip";
return resolve(repackageSourcesToZip(sourcePackageRef, sourceRepackageFilename, sourceReferences, sourceRefToFile), null);
} catch (Exception e) {
log.warn("Resolver failed for reference [{}] with: [{}].", sourcePackageRef, ExceptionUtils.getStackTrace(e));
return resolveWithError(sourcePackageRef, e);
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy