All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.metaeffekt.artifact.resolver.maven.MavenArtifactResolver Maven / Gradle / Ivy

There is a newer version: 0.134.0
Show newest version
/*
 * Copyright 2021-2024 the original author or authors.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.metaeffekt.artifact.resolver.maven;

import com.github.packageurl.MalformedPackageURLException;
import com.github.packageurl.PackageURL;
import lombok.NonNull;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.StringUtils;
import org.apache.lucene.document.Document;
import org.jdom2.JDOMException;
import org.metaeffekt.artifact.resolver.ArtifactResolver;
import org.metaeffekt.artifact.resolver.download.WebAccess;
import org.metaeffekt.artifact.resolver.generic.ArtifactIndex;
import org.metaeffekt.artifact.resolver.maven.index.MavenCentralIndex;
import org.metaeffekt.artifact.resolver.model.*;
import org.metaeffekt.core.inventory.processor.model.Artifact;
import org.metaeffekt.core.util.FileUtils;

import java.io.File;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.*;
import java.util.stream.Collectors;

import static org.metaeffekt.artifact.resolver.maven.index.MavenCentralIndex.sanitizeForWildcardQuery;
import static org.metaeffekt.artifact.resolver.model.ArtifactPartType.*;
import static org.metaeffekt.core.inventory.processor.model.Constants.KEY_HASH_SHA256;

@Slf4j
public class MavenArtifactResolver implements ArtifactResolver {

    private static final int MAX_HITS = 120;

    private final MavenRepositoryAdapter repositoryAdapter;

    private final MavenPomAdapter pomAdapter = new MavenPomAdapter();

    private final ArtifactIndex artifactIndex;

    private MavenCentralIndex mavenCentralIndex;

    public MavenArtifactResolver(DownloadLocation downloadLocation, WebAccess webAccess, ArtifactIndex artifactIndex,
                                 MavenArtifactResolverConfig config) {
        this.repositoryAdapter = new MavenRepositoryAdapter(downloadLocation, webAccess, config);
        this.artifactIndex = artifactIndex;

        if (config.getMavenCentralIndexConfig() != null) {
            this.mavenCentralIndex =
                    new MavenCentralIndex(downloadLocation, webAccess, config.getMavenCentralIndexConfig());
        }
    }

    /**
     * Creates a reference object for a matched representative document for easier handling.
     *
     * @param matchedRepresentative document from index containing artifact data to be transferred
     * @return a reference filled with data from the given document
     */
    private static MavenArtifactReference getReferenceForRepresentatives(
            @NonNull final Document matchedRepresentative) {
        final MavenArtifactReference ref = new MavenArtifactReference();

        ref.setGroupId(matchedRepresentative.get("g"));
        ref.setArtifactId(matchedRepresentative.get("a"));
        ref.setVersion(matchedRepresentative.get("v"));
        ref.setPackaging(matchedRepresentative.get("p"));
        ref.setClassifier(matchedRepresentative.get("c"));

        return ref;
    }

    @Override
    public ArtifactPartResolvers collectResolvers(@NonNull Artifact artifact) {
        final Collection parts = new HashSet<>();
        // FIXME: investigate testResolveWithIncompleteId for if Purl and GAV overwrite each other
        addArtifactResolverPartForPurl(parts, artifact);
        addArtifactResolverPartForGAV(parts, artifact);
        return new ArtifactPartResolvers(parts);
    }

    /**
     * Add parts for derived Groupid-Artifactid-Version tuples
     *
     * @param parts    modifiable collection to add to
     * @param artifact artifact to process
     */
    private void addArtifactResolverPartForGAV(Collection parts, Artifact artifact) {
        addPartResolvers(artifact, parts, new MavenArtifactReference(artifact));
    }

    private boolean isChecksumSame(@NonNull String artifactMd5ChecksumHex,
                                   @NonNull MavenArtifactReference clonedReference,
                                   @NonNull MavenArtifactReference representativeReference) throws IOException {
        // additionally check for checksum match if available.

        if (artifactMd5ChecksumHex.length() != 32) {
            log.error("Input hash not of size 32 for: reference [{}], md5 [{}].", clonedReference, artifactMd5ChecksumHex);

            // FIXME: this should be reporting into the inventory as error
            return false;
        }

        // reference to md5 hash of the jar
        final MavenArtifactReference hashRef = new MavenArtifactReference(representativeReference,
                // HACK: bend classifier to make artifact download work; NA classifiers cause invalid filenames
                representativeReference.getClassifier() == null ||
                        "NA".equals(representativeReference.getClassifier()) ? "" :
                        representativeReference.getClassifier(),
                StringUtils.firstNonBlank(clonedReference.getPackaging(), clonedReference.getType(), "jar") + ".md5");

        final File md5HashFile = repositoryAdapter.downloadArtifact(hashRef);
        if (md5HashFile == null) {
            log.debug("Could not download md5 file for reference [{}].", clonedReference);
            return false;
        }

        final String fileContent = FileUtils.readFileToString(md5HashFile, StandardCharsets.UTF_8).trim();
        return StringUtils.equalsIgnoreCase(fileContent, artifactMd5ChecksumHex);
    }

    private void addPartResolvers(Artifact artifact, Collection parts,
                                  MavenArtifactReference mavenArtifactReference) {
        final MavenArtifactReference clonedReference = new MavenArtifactReference(mavenArtifactReference, null, null);

        if (StringUtils.isBlank(clonedReference.getGroupId())) {
            final MavenArtifactReference mappedReference = artifactIndex.map(artifact.getId(), artifact.getChecksum());

            if (mappedReference != null) {
                clonedReference.setGroupId(mappedReference.getGroupId());
                clonedReference.setArtifactId(mappedReference.getArtifactId());

                if (StringUtils.isBlank(clonedReference.getVersion())) {
                    clonedReference.setVersion(mappedReference.getVersion());
                }
            }
        }

        // try to get more data about the artifact by deriving potential artifacts from a central index
        if ((StringUtils.isBlank(clonedReference.getGroupId()) || StringUtils.isBlank(clonedReference.getVersion()))
                && StringUtils.isNotBlank(clonedReference.getObservedFileName()) && mavenCentralIndex != null) {
            try {
                mavenCentralIndex.ensureInitialized();

                final String observedFileName = clonedReference.getObservedFileName();
                final Collection matchedRepresentatives = getRepresentativesByFilename(observedFileName);

                if (!matchedRepresentatives.isEmpty()) {
                    log.debug("[Filename], [(comma-separated) matched representatives]: [{}], [{}]", observedFileName,
                            matchedRepresentatives.stream()
                                    .map(rep -> new StringJoiner(":").add(rep.get("g")).add(rep.get("a"))
                                            .add(rep.get("v")).add(rep.get("c")).add(rep.get("p")).toString())
                                    .collect(Collectors.joining(", ")));
                }

                for (Document matchedRepresentative : matchedRepresentatives) {
                    if (matchedRepresentative == null) {
                        continue;
                    }

                    final MavenArtifactReference repRef = getReferenceForRepresentatives(matchedRepresentative);

                    if (artifact.getChecksum() != null &&
                            !isChecksumSame(artifact.getChecksum().trim(), clonedReference, repRef)) {
                        // if we have a checksum to work with, it should match the representative
                        continue;
                    }

                    // TODO: how to evaluate multiple representatives for LATER hash-/assessment-based merge?
                    //  maybe write to separate fields, assess merge later (and / or if hashes match).

                    // fill information from this found artifact
                    clonedReference.setGroupId(repRef.getGroupId());
                    clonedReference.setArtifactId(repRef.getArtifactId());
                    clonedReference.setVersion(repRef.getVersion());

                    // use first good
                    break;
                }
            } catch (IOException e) {
                throw new RuntimeException(e);
            }
        }

        if (clonedReference.isValid()) {
            parts.add(new ArtifactPartResolver(artifact, DESCRIPTOR,
                () -> repositoryAdapter.resolvePom(clonedReference),
                rap -> enrich(clonedReference, rap))
            );

            parts.add(new ArtifactPartResolver(artifact, BINARY_ARTIFACT,
                () -> repositoryAdapter.resolveBinaryArtifact(clonedReference),
                rap -> enrich(clonedReference, rap))
            );

            parts.add(new ArtifactPartResolver(artifact, SOURCE_ARTIFACT,
                () -> repositoryAdapter.resolveSourceArtifact(clonedReference),
                rap -> enrich(clonedReference, rap))
            );
        }
    }

    /**
     * Constructs queries from the observed filename and returns matches.
     *
     * @param observedFileName filename to form the base of queries
     * @return matched documents
     * @throws IOException on failure to read index
     */
    @NonNull
    private Collection getRepresentativesByFilename(final String observedFileName) throws IOException {
        String queryString = sanitizeForWildcardQuery(observedFileName);
        List documents = mavenCentralIndex.queryByFilename(queryString, MAX_HITS);

        if (documents.size() == 1) {
            return documents;
        }

        if (observedFileName.endsWith(".jar")) {
            queryString = sanitizeForWildcardQuery(
                    observedFileName.substring(0, observedFileName.length() - 4) + ".pom.sha") + "*";
            documents = mavenCentralIndex.queryByFilename(queryString, MAX_HITS);
            if (!documents.isEmpty()) {
                return documents;
            }
        } else {
            documents =
                    mavenCentralIndex.queryByFilename(sanitizeForWildcardQuery(observedFileName + ".") + "*", MAX_HITS);
            if (!documents.isEmpty()) {
                return documents;
            }
        }

        return Collections.emptyList();
    }

    private Artifact enrich(MavenArtifactReference mavenArtifactReference, ResolvedArtifactPart resolveArtifactPart) {
        // create new artifact and transfer derived values
        final Artifact enrichedArtifact = new Artifact(resolveArtifactPart.getOriginalArtifact());

        final File resolvedFile = resolveArtifactPart.getResolvedFile();
        final ArtifactPartType artifactPartType = resolveArtifactPart.getArtifactPartType();
        enrichedArtifact.set(artifactPartType.modulatePathAttribute(), resolvedFile.getAbsolutePath());

        // propagate the SHA-256 hash for all parts; use may be selective
        enrichedArtifact.set(artifactPartType.modulateAttributeInContext(KEY_HASH_SHA256), FileUtils.computeSHA256Hash(resolvedFile));

        // additionally enhance with information from pom
        if (DESCRIPTOR.equals(artifactPartType)) {
            try {
                pomAdapter.augmentLicenseMetaData(enrichedArtifact, mavenArtifactReference, resolvedFile);
            } catch (IOException | JDOMException e) {
                log.warn("Cannot resolve descriptor of [{}].", resolvedFile.getAbsolutePath());
            }
        }

        return enrichedArtifact;
    }

    /**
     * Adds resolver parts based on a derived purl.
     *
     * @param parts    modifiable collection to add to
     * @param artifact artifact to process
     */
    private void addArtifactResolverPartForPurl(Collection parts, Artifact artifact) {
        final String purl = artifact.get(Constants.KEY_PURL);
        if (!StringUtils.isEmpty(purl)) {
            try {
                final MavenArtifactReference mavenArtifactReference = new MavenArtifactReference(new PackageURL(purl));
                addPartResolvers(artifact, parts, mavenArtifactReference);
            } catch (MalformedPackageURLException e) {
                artifact.append("Errors", String.format("[PURL [%s] malformed.]", purl), "\n");
            }
        }
    }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy