org.metaeffekt.artifact.resolver.maven.MavenArtifactResolver Maven / Gradle / Ivy
/*
* Copyright 2021-2024 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.metaeffekt.artifact.resolver.maven;
import com.github.packageurl.MalformedPackageURLException;
import com.github.packageurl.PackageURL;
import lombok.NonNull;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.StringUtils;
import org.apache.lucene.document.Document;
import org.jdom2.JDOMException;
import org.metaeffekt.artifact.resolver.ArtifactResolver;
import org.metaeffekt.artifact.resolver.download.WebAccess;
import org.metaeffekt.artifact.resolver.generic.ArtifactIndex;
import org.metaeffekt.artifact.resolver.maven.index.MavenCentralIndex;
import org.metaeffekt.artifact.resolver.model.*;
import org.metaeffekt.core.inventory.processor.model.Artifact;
import org.metaeffekt.core.util.FileUtils;
import java.io.File;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.*;
import java.util.stream.Collectors;
import static org.metaeffekt.artifact.resolver.maven.index.MavenCentralIndex.sanitizeForWildcardQuery;
import static org.metaeffekt.artifact.resolver.model.ArtifactPartType.*;
import static org.metaeffekt.core.inventory.processor.model.Constants.KEY_HASH_SHA256;
@Slf4j
public class MavenArtifactResolver implements ArtifactResolver {
private static final int MAX_HITS = 120;
private final MavenRepositoryAdapter repositoryAdapter;
private final MavenPomAdapter pomAdapter = new MavenPomAdapter();
private final ArtifactIndex artifactIndex;
private MavenCentralIndex mavenCentralIndex;
public MavenArtifactResolver(DownloadLocation downloadLocation, WebAccess webAccess, ArtifactIndex artifactIndex,
MavenArtifactResolverConfig config) {
this.repositoryAdapter = new MavenRepositoryAdapter(downloadLocation, webAccess, config);
this.artifactIndex = artifactIndex;
if (config.getMavenCentralIndexConfig() != null) {
this.mavenCentralIndex =
new MavenCentralIndex(downloadLocation, webAccess, config.getMavenCentralIndexConfig());
}
}
/**
* Creates a reference object for a matched representative document for easier handling.
*
* @param matchedRepresentative document from index containing artifact data to be transferred
* @return a reference filled with data from the given document
*/
private static MavenArtifactReference getReferenceForRepresentatives(
@NonNull final Document matchedRepresentative) {
final MavenArtifactReference ref = new MavenArtifactReference();
ref.setGroupId(matchedRepresentative.get("g"));
ref.setArtifactId(matchedRepresentative.get("a"));
ref.setVersion(matchedRepresentative.get("v"));
ref.setPackaging(matchedRepresentative.get("p"));
ref.setClassifier(matchedRepresentative.get("c"));
return ref;
}
@Override
public ArtifactPartResolvers collectResolvers(@NonNull Artifact artifact) {
final Collection parts = new HashSet<>();
// FIXME: investigate testResolveWithIncompleteId for if Purl and GAV overwrite each other
addArtifactResolverPartForPurl(parts, artifact);
addArtifactResolverPartForGAV(parts, artifact);
return new ArtifactPartResolvers(parts);
}
/**
* Add parts for derived Groupid-Artifactid-Version tuples
*
* @param parts modifiable collection to add to
* @param artifact artifact to process
*/
private void addArtifactResolverPartForGAV(Collection parts, Artifact artifact) {
addPartResolvers(artifact, parts, new MavenArtifactReference(artifact));
}
private boolean isChecksumSame(@NonNull String artifactMd5ChecksumHex,
@NonNull MavenArtifactReference clonedReference,
@NonNull MavenArtifactReference representativeReference) throws IOException {
// additionally check for checksum match if available.
if (artifactMd5ChecksumHex.length() != 32) {
log.error("Input hash not of size 32 for: reference [{}], md5 [{}].", clonedReference, artifactMd5ChecksumHex);
// FIXME: this should be reporting into the inventory as error
return false;
}
// reference to md5 hash of the jar
final MavenArtifactReference hashRef = new MavenArtifactReference(representativeReference,
// HACK: bend classifier to make artifact download work; NA classifiers cause invalid filenames
representativeReference.getClassifier() == null ||
"NA".equals(representativeReference.getClassifier()) ? "" :
representativeReference.getClassifier(),
StringUtils.firstNonBlank(clonedReference.getPackaging(), clonedReference.getType(), "jar") + ".md5");
final File md5HashFile = repositoryAdapter.downloadArtifact(hashRef);
if (md5HashFile == null) {
log.debug("Could not download md5 file for reference [{}].", clonedReference);
return false;
}
final String fileContent = FileUtils.readFileToString(md5HashFile, StandardCharsets.UTF_8).trim();
return StringUtils.equalsIgnoreCase(fileContent, artifactMd5ChecksumHex);
}
private void addPartResolvers(Artifact artifact, Collection parts,
MavenArtifactReference mavenArtifactReference) {
final MavenArtifactReference clonedReference = new MavenArtifactReference(mavenArtifactReference, null, null);
if (StringUtils.isBlank(clonedReference.getGroupId())) {
final MavenArtifactReference mappedReference = artifactIndex.map(artifact.getId(), artifact.getChecksum());
if (mappedReference != null) {
clonedReference.setGroupId(mappedReference.getGroupId());
clonedReference.setArtifactId(mappedReference.getArtifactId());
if (StringUtils.isBlank(clonedReference.getVersion())) {
clonedReference.setVersion(mappedReference.getVersion());
}
}
}
// try to get more data about the artifact by deriving potential artifacts from a central index
if ((StringUtils.isBlank(clonedReference.getGroupId()) || StringUtils.isBlank(clonedReference.getVersion()))
&& StringUtils.isNotBlank(clonedReference.getObservedFileName()) && mavenCentralIndex != null) {
try {
mavenCentralIndex.ensureInitialized();
final String observedFileName = clonedReference.getObservedFileName();
final Collection matchedRepresentatives = getRepresentativesByFilename(observedFileName);
if (!matchedRepresentatives.isEmpty()) {
log.debug("[Filename], [(comma-separated) matched representatives]: [{}], [{}]", observedFileName,
matchedRepresentatives.stream()
.map(rep -> new StringJoiner(":").add(rep.get("g")).add(rep.get("a"))
.add(rep.get("v")).add(rep.get("c")).add(rep.get("p")).toString())
.collect(Collectors.joining(", ")));
}
for (Document matchedRepresentative : matchedRepresentatives) {
if (matchedRepresentative == null) {
continue;
}
final MavenArtifactReference repRef = getReferenceForRepresentatives(matchedRepresentative);
if (artifact.getChecksum() != null &&
!isChecksumSame(artifact.getChecksum().trim(), clonedReference, repRef)) {
// if we have a checksum to work with, it should match the representative
continue;
}
// TODO: how to evaluate multiple representatives for LATER hash-/assessment-based merge?
// maybe write to separate fields, assess merge later (and / or if hashes match).
// fill information from this found artifact
clonedReference.setGroupId(repRef.getGroupId());
clonedReference.setArtifactId(repRef.getArtifactId());
clonedReference.setVersion(repRef.getVersion());
// use first good
break;
}
} catch (IOException e) {
throw new RuntimeException(e);
}
}
if (clonedReference.isValid()) {
parts.add(new ArtifactPartResolver(artifact, DESCRIPTOR,
() -> repositoryAdapter.resolvePom(clonedReference),
rap -> enrich(clonedReference, rap))
);
parts.add(new ArtifactPartResolver(artifact, BINARY_ARTIFACT,
() -> repositoryAdapter.resolveBinaryArtifact(clonedReference),
rap -> enrich(clonedReference, rap))
);
parts.add(new ArtifactPartResolver(artifact, SOURCE_ARTIFACT,
() -> repositoryAdapter.resolveSourceArtifact(clonedReference),
rap -> enrich(clonedReference, rap))
);
}
}
/**
* Constructs queries from the observed filename and returns matches.
*
* @param observedFileName filename to form the base of queries
* @return matched documents
* @throws IOException on failure to read index
*/
@NonNull
private Collection getRepresentativesByFilename(final String observedFileName) throws IOException {
String queryString = sanitizeForWildcardQuery(observedFileName);
List documents = mavenCentralIndex.queryByFilename(queryString, MAX_HITS);
if (documents.size() == 1) {
return documents;
}
if (observedFileName.endsWith(".jar")) {
queryString = sanitizeForWildcardQuery(
observedFileName.substring(0, observedFileName.length() - 4) + ".pom.sha") + "*";
documents = mavenCentralIndex.queryByFilename(queryString, MAX_HITS);
if (!documents.isEmpty()) {
return documents;
}
} else {
documents =
mavenCentralIndex.queryByFilename(sanitizeForWildcardQuery(observedFileName + ".") + "*", MAX_HITS);
if (!documents.isEmpty()) {
return documents;
}
}
return Collections.emptyList();
}
private Artifact enrich(MavenArtifactReference mavenArtifactReference, ResolvedArtifactPart resolveArtifactPart) {
// create new artifact and transfer derived values
final Artifact enrichedArtifact = new Artifact(resolveArtifactPart.getOriginalArtifact());
final File resolvedFile = resolveArtifactPart.getResolvedFile();
final ArtifactPartType artifactPartType = resolveArtifactPart.getArtifactPartType();
enrichedArtifact.set(artifactPartType.modulatePathAttribute(), resolvedFile.getAbsolutePath());
// propagate the SHA-256 hash for all parts; use may be selective
enrichedArtifact.set(artifactPartType.modulateAttributeInContext(KEY_HASH_SHA256), FileUtils.computeSHA256Hash(resolvedFile));
// additionally enhance with information from pom
if (DESCRIPTOR.equals(artifactPartType)) {
try {
pomAdapter.augmentLicenseMetaData(enrichedArtifact, mavenArtifactReference, resolvedFile);
} catch (IOException | JDOMException e) {
log.warn("Cannot resolve descriptor of [{}].", resolvedFile.getAbsolutePath());
}
}
return enrichedArtifact;
}
/**
* Adds resolver parts based on a derived purl.
*
* @param parts modifiable collection to add to
* @param artifact artifact to process
*/
private void addArtifactResolverPartForPurl(Collection parts, Artifact artifact) {
final String purl = artifact.get(Constants.KEY_PURL);
if (!StringUtils.isEmpty(purl)) {
try {
final MavenArtifactReference mavenArtifactReference = new MavenArtifactReference(new PackageURL(purl));
addPartResolvers(artifact, parts, mavenArtifactReference);
} catch (MalformedPackageURLException e) {
artifact.append("Errors", String.format("[PURL [%s] malformed.]", purl), "\n");
}
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy