All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.metaeffekt.artifact.analysis.metascan.MetaScanSupport Maven / Gradle / Ivy

The newest version!
/*
 * Copyright 2021-2024 the original author or authors.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.metaeffekt.artifact.analysis.metascan;

import com.metaeffekt.artifact.analysis.model.PropertyProvider;
import com.metaeffekt.artifact.analysis.preprocess.filter.TextSieve;
import com.metaeffekt.artifact.analysis.utils.FileUtils;
import com.metaeffekt.artifact.analysis.utils.InventoryUtils;
import com.metaeffekt.artifact.analysis.utils.PropertyUtils;
import com.metaeffekt.artifact.analysis.utils.StringStats;
import com.metaeffekt.artifact.analysis.utils.StringUtils;
import com.metaeffekt.artifact.terms.model.FileSegment;
import com.metaeffekt.artifact.terms.model.FileSegmentation;
import com.metaeffekt.artifact.terms.model.NormalizationMetaData;
import com.metaeffekt.artifact.terms.model.ScanResultPart;
import com.metaeffekt.artifact.terms.model.TermsMetaData;
import org.apache.tools.ant.DirectoryScanner;
import org.json.JSONArray;
import org.json.JSONObject;
import org.metaeffekt.core.inventory.processor.model.Artifact;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Properties;
import java.util.Set;

import static com.metaeffekt.artifact.analysis.metascan.Constants.KEY_IDENTIFIED_TERMS;

public class MetaScanSupport extends AbstractScanSupport {

    private static final Logger LOG = LoggerFactory.getLogger(MetaScanSupport.class);

    public static final String FOLDER_INCOMPLETE_MATCH = "incomplete-match";
    public static final String FOLDER_INCOMPLETE_MATCH_FILES = FOLDER_INCOMPLETE_MATCH + "-files";

    public static final String FOLDER_INDICATED_EXCEPTION = "indicated-exception";
    public static final String FOLDER_INDICATED_EXCEPTIONS_FILES = FOLDER_INDICATED_EXCEPTION + "-files";

    public static final String FOLDER_LICENSING_OPTION = "licensing-option";
    public static final String FOLDER_LICENSING_OPTION_FILES = FOLDER_LICENSING_OPTION + "-files";

    public static final String FOLDER_UNSPECIFIC_LICENSE = "unspecific-license";
    public static final String FOLDER_UNSPECIFIC_LICENSES_FILES = FOLDER_UNSPECIFIC_LICENSE + "-files";

    public static final String FOLDER_INSUFFICIENT_SEGMENTATION = "insufficient-segmentation";
    public static final String FOLDER_INSUFFICIENT_SEGMENTATION_FILES = FOLDER_INSUFFICIENT_SEGMENTATION + "-files";

    // FIXME: this is a yet undocumented issue type. We need to decide whether we keep this.
    public static final String FOLDER_INSUFFICIENT_LICENSE_DETAILS = "insufficient-license-details";

    private transient TextSieve textSieve;

    public MetaScanSupport(NormalizationMetaData normalizationMetaData, PropertyProvider propertyProvider) {
        super(normalizationMetaData, propertyProvider);

        // produce / retrieve wordlist if not available (works only with fully available tmd)
        try {
            if (normalizationMetaData.getWordlist() == null || normalizationMetaData.getWordlist().isEmpty()) {
                normalizationMetaData.generateAndSetWordlist();
            }
        } catch (Exception e) {
            LOG.warn("Failure while generating wordlist: [{}]", e.getMessage(), e);
        }

        if (normalizationMetaData.getWordlist() != null && !normalizationMetaData.getWordlist().isEmpty()) {
            textSieve = TextSieve.builder()
                    .wordlist(normalizationMetaData.getWordlist())
                    .build();
        }
    }

    public boolean execute(Artifact artifact, File unpackedDir) throws IOException {
        return execute(artifact, unpackedDir, "no context");
    }

    public boolean execute(Artifact artifact, File unpackedDir, String context) throws IOException {

        final File targetFolder = deriveAnalysisFolder(unpackedDir);
        final File intermediateFolder = deriveIntermediateFolder(unpackedDir);
        final File scratchFolder = deriveScratchFolder(unpackedDir);

        final String filename = artifact.getId().replace("/", "_");
        final File resultPropertiesFile = new File(targetFolder, filename + "_license.properties");
        final File resultJsonFile = new File(targetFolder, filename + "_metascan.json");

        final File logFile = new File(targetFolder, filename + "_license-scan.txt");
        final File segmentFile = new File(targetFolder, filename + "_license-scan-segments.txt");
        final File segmentDebugFile = new File(targetFolder, filename + "_license-scan-segments_debug.txt");

        final boolean metaScanEnabled = getPropertyProvider().isProperty("analyze.scan.license.enabled", "true", "false");

        // collect licenses per artifact
        final Set derivedLicenses = new HashSet<>();

        String artifactScanConfiguration = artifact.get("Scan Configuration");
        boolean artifactScanOverwrite = false;
        if (artifactScanConfiguration != null) {
            artifactScanOverwrite = artifactScanConfiguration.contains("analyze.scan.license.overwrite=true");
        }

        long resultFileTimestamp = resultPropertiesFile.lastModified();

        // use a reference timestamp to determine whether a new scan is required
        long overwriteResultsOlderThan = Long.parseLong(getPropertyProvider().
                getProperty("analyze.scan.license.overwrite.timestamp", "0"));

        boolean outdatedResult = resultFileTimestamp < overwriteResultsOlderThan;

        boolean overwrite = outdatedResult || artifactScanOverwrite || getPropertyProvider().
                isProperty("analyze.scan.license.overwrite", "true", "false");

        boolean overwriteOnUnknown = getPropertyProvider().isProperty("analyze.scan.license.overwrite.unknown", "true", "false");
        boolean overwriteOnIncompleteMatch = getPropertyProvider().isProperty("analyze.scan.license.overwrite.incomplete", "true", "false");

        // check existing results to determine whether scan needs to be redone
        if (!overwrite && resultPropertiesFile.exists() && (overwriteOnUnknown || overwriteOnIncompleteMatch)) {
            final Properties p = PropertyUtils.loadProperties(resultPropertiesFile);

            final String oldIncompleteMatch = p.getProperty("incomplete.match");
            if (overwriteOnIncompleteMatch && "true".equalsIgnoreCase(oldIncompleteMatch)) {
                LOG.info("{} Rescanning due to incomplete match.", context);
                overwrite = true;
            }

            final String oldDerivedLicenses = p.getProperty("derived.licenses");

            // decompose into atomic licenses
            final List licenses = InventoryUtils.tokenizeLicense(oldDerivedLicenses, false, false);
            boolean detectedUnknown = false;
            for (String license : licenses) {
                final TermsMetaData termsMetaData = InventoryUtils.getNormalizationMetaData().getTermsMetaData(license);
                if (termsMetaData == null) {
                    String updatedCanonicalName = InventoryUtils.getNormalizationMetaData().getUpdatedCanonicalName(license);
                    if (updatedCanonicalName.equalsIgnoreCase(license)) {
                        LOG.info("{} Rescanning due to unknown license [{}].", context, license);
                        detectedUnknown = true;
                        break;
                    }
                }
            }

            if (overwriteOnUnknown && detectedUnknown) {
                overwrite = true;
            }
        }

        // attempt parsing the results file; if not possible manage overwrite
        if (resultJsonFile.exists()) {
            try {
                new JSONArray(FileUtils.readFileToString(resultJsonFile, StandardCharsets.UTF_8));
            } catch (Exception e) {
                // cannot parse; manage overwrite
                overwrite = true;
                LOG.info("{} Rescanning due to incomplete result file [{}].", context, resultJsonFile.getAbsolutePath());
            }
        }

        // in case overwrite is not set and a result file exists, we apply the already evaluated result and return fast.
        if (!overwrite && resultPropertiesFile.exists() && intermediateFolder.exists()) {
            final Properties p = PropertyUtils.loadProperties(resultPropertiesFile);
            applyToArtifact(artifact, p);
            return false;
        }

        // in case scan is not enabled we skip all further processing
        if (!metaScanEnabled) {
            return false;
        }

        // the properties file serves as semaphore
        FileUtils.deleteQuietly(resultPropertiesFile);
        FileUtils.deleteQuietly(resultJsonFile);

        final File incompleteMatchesFolder = new File(targetFolder, filename + "-" + FOLDER_INCOMPLETE_MATCH);
        final File incompleteMatchesFileFolder = new File(targetFolder, filename + "-" + FOLDER_INCOMPLETE_MATCH_FILES);

        final File indicatedExceptionsFolder = new File(targetFolder, filename + "-" + FOLDER_INDICATED_EXCEPTION);
        final File indicatedExceptionsFileFolder = new File(targetFolder, filename + "-" + FOLDER_INDICATED_EXCEPTIONS_FILES);

        final File licenseOptionFolder = new File(targetFolder, filename + "-" + FOLDER_LICENSING_OPTION);
        final File licenseOptionFileFolder = new File(targetFolder, filename + "-" + FOLDER_LICENSING_OPTION_FILES);

        final File unspecificLicenseFolder = new File(targetFolder, filename + "-" + FOLDER_UNSPECIFIC_LICENSE);
        final File unspecificLicenseFileFolder = new File(targetFolder, filename + "-" + FOLDER_UNSPECIFIC_LICENSES_FILES);

        final File unsufficientSegmentationFolder = new File(targetFolder, filename + "-" + FOLDER_INSUFFICIENT_SEGMENTATION);
        final File unsufficientSegmentationFileFolder = new File(targetFolder, filename + "-" + FOLDER_INSUFFICIENT_SEGMENTATION_FILES);

        final File insufficientLicenseDetailsFolder = new File(targetFolder, filename + "-" + FOLDER_INSUFFICIENT_LICENSE_DETAILS);

        final File reportFolder = new File(targetFolder, filename + "-reports");

        if (incompleteMatchesFolder.exists()) FileUtils.deleteDir(incompleteMatchesFolder);
        if (incompleteMatchesFileFolder.exists()) FileUtils.deleteDir(incompleteMatchesFileFolder);

        if (licenseOptionFolder.exists()) FileUtils.deleteDir(licenseOptionFolder);
        if (licenseOptionFileFolder.exists()) FileUtils.deleteDir(licenseOptionFileFolder);

        if (unspecificLicenseFolder.exists()) FileUtils.deleteDir(unspecificLicenseFolder);
        if (unspecificLicenseFileFolder.exists()) FileUtils.deleteDir(unspecificLicenseFileFolder);

        if (indicatedExceptionsFolder.exists()) FileUtils.deleteDir(indicatedExceptionsFolder);
        if (indicatedExceptionsFileFolder.exists()) FileUtils.deleteDir(indicatedExceptionsFileFolder);

        if (unsufficientSegmentationFolder.exists()) FileUtils.deleteDir(unsufficientSegmentationFolder);
        if (unsufficientSegmentationFileFolder.exists()) FileUtils.deleteDir(unsufficientSegmentationFileFolder);

        if (insufficientLicenseDetailsFolder.exists()) FileUtils.deleteDir(insufficientLicenseDetailsFolder);

        if (reportFolder.exists()) FileUtils.deleteDir(reportFolder);

        if (intermediateFolder.exists()) {
            FileUtils.cleanDirectory(intermediateFolder);
        }

        final String[] scanIncludes = getPropertyProvider().getProperty("analyze.metascan.license.includes", "**/*").split(",");
        final String[] scanExcludes = getPropertyProvider().getProperty("analyze.metascan.license.excludes", "**/.git/**/*").split(",");

        final boolean debugSegments = getPropertyProvider().isProperty("analyze.metascan.license.debug.enabled", "true", "false");

        boolean enableReport = getPropertyProvider().isProperty("analyze.metascan.report.enable", "true", "false");
        boolean forceReport = getPropertyProvider().isProperty("analyze.metascan.report.force", "true", "false");

        // evaluate sieve support; currently disabled by default
        boolean useTextSieve = getPropertyProvider().isProperty("analyze.sieve.enabled", "true", "false");

        final NormalizationMetaData normalizationMetaData = getNormalizationMetaData();

        final DirectoryScanner scanner = new DirectoryScanner();
        scanner.setBasedir(unpackedDir);
        scanner.setIncludes(scanIncludes);
        scanner.setExcludes(scanExcludes);
        scanner.scan();

        final String[] filesToScan = scanner.getIncludedFiles();

        init(logFile, unpackedDir.getName());
        init(segmentFile, unpackedDir.getName());
        if (debugSegments) {
            init(segmentDebugFile, unpackedDir.getName());
        }

        boolean[] resultJsonFileSemaphore = new boolean[1];
        resultJsonFileSemaphore[0] = true;
        FileUtils.forceMkDirQuietly(intermediateFolder);

        // Begin JSON file
        FileUtils.write(resultJsonFile, "[", StandardCharsets.UTF_8);

        int size = filesToScan.length;
        int i = 0;
        for (String fileToScan : filesToScan) {
            i++;

            // collect licenses per file
            Set derivedLicensesForFile = new HashSet<>();

            final File file = new File(unpackedDir, fileToScan);

            // don't care for symlinks
            if (FileUtils.isSymlink(file)) {
                continue;
            }

            if (FileUtils.matches(file.getAbsolutePath(), scanExcludes)) {
                continue;
            }

            LOG.info("{} ({}/{}) Analyzing file [{}]...", context, i, size, file.getAbsolutePath());

            final String relativeFilePath = extractRelativePath(unpackedDir, file);

            try {
                // detect encoding
                final String detectedEncoding = FileUtils.detectEncoding(file);

                // read content (applying textSieve or fallback to non-sieve loading)
                String fileContent;
                if (useTextSieve && textSieve != null) {
                    try {
                        final Charset detectedCharset = Charset.forName(detectedEncoding);
                        fileContent = textSieve.loadFiltered(file, detectedCharset, scratchFolder).toString();
                    } catch (Exception e) {
                        LOG.warn("Could not use TextSieve due to exception: [{}]", e.getMessage(), e);
                        // in case of an exception with encoding, charset or loading we return to the non-sieve mode
                        fileContent = FileUtils.readFileToString(file, FileUtils.detectEncoding(file));
                    }
                } else {
                    // non-sieve mode
                    fileContent = FileUtils.readFileToString(file, FileUtils.detectEncoding(file));
                }

                // FIXME: AE-690 determine when masking is applied

                // FIXME: isolate the whole segmentation aspect in a separate SegmentationSupport class.
                final FileSegmentation fileSegmentation = new FileSegmentation(fileContent, normalizationMetaData);

                // TODO log debug information for segments
                if (debugSegments) {
                    log(segmentDebugFile, fileSegmentation.getMarkedSegmentsString());
                }
                log(segmentFile, String.format("%n>>>> [%s] analysis START:", relativeFilePath));

                // process the individual segments
                for (int j = 0; j < fileSegmentation.getSegmentCount(); j++) {
                    final StringBuilder resultSummary = new StringBuilder();

                    final FileSegment fileSegment = fileSegmentation.getFileSegment(j);
                    final String segmentContent = fileSegment.getContent();

                    final String id = relativeFilePath + "/" + j;

                    log(segmentFile, String.format("%n>>> Segment %d [%s] analysis:%n", j, relativeFilePath));
                    final StringStats licenseTextStats = fileSegment.getNormalizedContent();

                    final ScanResultPart normalizedLicensesSRP = normalizationMetaData.doAnalyze(licenseTextStats);

                    final List matchedLicenses = normalizedLicensesSRP.getMatchedTerms();
                    for (String license : matchedLicenses) {
                        if (org.springframework.util.StringUtils.hasText(license) && !"[]".equals(license)) {
                            final String message = String.format(">  Matched license [%s] in file [%s/%s]", license, relativeFilePath, j);
                            log(segmentFile, message);
                            resultSummary.append(message).append("\n");
                        }
                    }

                    // TODO: filter name matches that are equal to "represented as" of evidence matched results

                    // process
                    normalizedLicensesSRP.process(normalizationMetaData, true, true);
                    fileSegment.setNormalizedSRP(normalizedLicensesSRP);

                    final List matchedTerms = normalizedLicensesSRP.getMatchedTerms();

                    // extract variable-content for all licenses with variables
                    try {
                        if (segmentHasVariableLicense(matchedTerms)) {
                            fileSegment.setLicenseVariables(getVariablesPerLicenseInSegment(matchedTerms, licenseTextStats));
                        }
                    } catch (Exception e) {
                        LOG.warn("Variable extraction failed: {}. Execution continued.", e.getMessage(), e);
                    }

                    if (matchedTerms.isEmpty()) {
                        final String message = String.format("> No terms resolved in file [%s/%s]", relativeFilePath, j);
                        log(segmentFile, message);
                        resultSummary.append(message).append("\n");
                    } else {
                        for (String license : new LinkedHashSet<>(matchedTerms)) {
                            final String message = String.format("> Resolved terms [%s] in file [%s/%s]", license, relativeFilePath, j);
                            log(segmentFile, message);
                            resultSummary.append(message).append("\n");
                            derivedLicensesForFile.add(license);
                        }
                    }

                    // diff the partial matches to detect not fully matched licenses
                    // iterate through matched licenses and resolve the pre-computed partial matches
                    final Set aggregatedPartialMatches = new HashSet<>();
                    final Set aggregatedExcludeMatches = new HashSet<>();
                    for (String matchedLicense : matchedLicenses) {
                        final TermsMetaData lmd = normalizationMetaData.getTermsMetaData(matchedLicense);
                        List partialMatches = lmd.getPartialMatches();
                        List excludeMatches = lmd.getExcludedMatches();
                        if (partialMatches != null) aggregatedPartialMatches.addAll(partialMatches);
                        if (excludeMatches != null) aggregatedExcludeMatches.addAll(excludeMatches);
                    }

                    final List retainedPartialMatches = normalizedLicensesSRP.getPartialMatchedTerms();
                    retainedPartialMatches.removeAll(aggregatedPartialMatches);
                    retainedPartialMatches.removeAll(aggregatedExcludeMatches);

                    // also remove those identified (by name)
                    retainedPartialMatches.removeAll(matchedTerms);

                    // markers do not contribute to incomplete matches
                    InventoryUtils.removeMarkers(retainedPartialMatches, normalizationMetaData);

                    if (LOG.isDebugEnabled()) {
                        LOG.debug(" Aggregates PMs: {}", aggregatedPartialMatches);
                        LOG.debug(" Excluded PMs: {}", aggregatedExcludeMatches);
                        LOG.debug(" Matched PMs: {}", normalizedLicensesSRP.getPartialMatchedTerms());
                        LOG.debug(" Retained PMs: {}", retainedPartialMatches);
                    }

                    final boolean hasIncompleteMatches = !retainedPartialMatches.isEmpty();
                    final String fileCopyName = FileUtils.computeChecksum(file) + "-" + file.getName();
                    if (hasIncompleteMatches) {
                        final String message = String.format("> Incomplete license identification in file [%s/%s]: individual matches indicate one of %s", relativeFilePath, j, retainedPartialMatches);
                        log(logFile, message);
                        LOG.info("{} ({}/{}) {}", context, i, size, message);
                        log(segmentFile, message);
                        resultSummary.append(message).append("\n");
                        derivedLicensesForFile.add("Incomplete Match");

                        if (enableReport) {
                            // create a html report when an incomplete match was detected
                            createHtmlReport("Incomplete Match " + relativeFilePath, normalizedLicensesSRP,
                                    incompleteMatchesFolder, licenseTextStats, id, retainedPartialMatches,
                                    "incomplete-match", filename);
                        }

                        FileUtils.copyFile(file, new File(incompleteMatchesFileFolder, fileCopyName));
                    }

                    boolean hasIndicatedExceptions = isIndicatedExceptionWithoutReference(normalizedLicensesSRP.getMatches());
                    if (hasIndicatedExceptions) {
                        String message = String.format("> Indicated exception without reference detected in file [%s/%s].", relativeFilePath, j);
                        log(logFile, message);
                        LOG.info("{} ({}/{}) {}", context, i, size, message);
                        log(segmentFile, message);
                        resultSummary.append(message).append("\n");
                        derivedLicensesForFile.add("Indicated Exception");

                        if (enableReport) {
                            // create a html report when an incomplete match was detected
                            createHtmlReport("Indicated Exception " + relativeFilePath, normalizedLicensesSRP,
                                    indicatedExceptionsFolder, licenseTextStats, id, retainedPartialMatches,
                                    "indicated-exceptions", filename);
                        }

                        FileUtils.copyFile(file, new File(indicatedExceptionsFileFolder, fileCopyName));
                    }

                    boolean hasUnspecificLicenses = containsUnspecificLicenses(normalizedLicensesSRP.getMatches());
                    if (hasUnspecificLicenses) {
                        String message = String.format("> Unspecific license detected in file [%s/%s].", relativeFilePath, j);
                        log(logFile, message);
                        LOG.info("{} ({}/{}) {}", context, i, size, message);
                        log(segmentFile, message);
                        resultSummary.append(message).append("\n");

                        if (enableReport) {
                            if (ReportController.getInstance().createReportFor(normalizedLicensesSRP, licenseTextStats)) {
                                createHtmlReport("Unspecific License " + relativeFilePath, normalizedLicensesSRP,
                                        unspecificLicenseFolder, licenseTextStats, id, retainedPartialMatches,
                                        "unspecific-licenses", filename);

                            }
                        }

                        FileUtils.copyFile(file, new File(unspecificLicenseFileFolder, fileCopyName));
                    }

                    boolean hasSegmentationIssue = hasSegmentationIssue(normalizedLicensesSRP.getTextMatchedTerms(), matchedTerms);
                    if (hasSegmentationIssue) {
                        String message = String.format("> Segmentation Issue detected in file [%s/%s].", relativeFilePath, j);
                        log(logFile, message);
                        LOG.info("{} ({}/{}) {}", context, i, size, message);
                        log(segmentFile, message);
                        resultSummary.append(message).append("\n");
                        if (enableReport) {
                            createHtmlReport("Segmentation Issue " + relativeFilePath, normalizedLicensesSRP,
                                    unsufficientSegmentationFolder, licenseTextStats, id, retainedPartialMatches,
                                    "segmentation-issue", filename);
                        }

                        FileUtils.copyFile(file, new File(unsufficientSegmentationFileFolder, fileCopyName));
                    }

                    boolean hasLicensingOption =
                            // legacy marker name
                            matchedTerms.contains("Licensing Option") ||
                            // current marker name; FIXME: some markers are hardcoded; here we need to be careful
                            matchedTerms.contains("License Option Marker");
                    if (hasLicensingOption) {
                        String message = String.format("> License options detected in file [%s/%s].", relativeFilePath, j);
                        log(logFile, message);
                        LOG.info("{} ({}/{}) {}", context, i, size, message);

                        log(segmentFile, message);
                        resultSummary.append(message).append("\n");

                        if (enableReport) {
                            createHtmlReport("Licensing Option " + relativeFilePath, normalizedLicensesSRP,
                                    licenseOptionFolder, licenseTextStats, id, retainedPartialMatches,
                                    "licensing-option", filename);
                        }

                        FileUtils.copyFile(file, new File(licenseOptionFileFolder, fileCopyName));
                    }

                    if (forceReport) {
                        createHtmlReport("Scan Report " + relativeFilePath, normalizedLicensesSRP,
                                reportFolder, licenseTextStats, id, retainedPartialMatches,
                                "scan-report", filename);
                    }

                    // log the content
                    log(segmentFile, String.format("%n>>> Segment %d [%s] content START: >>>%n%n%s%n%n<<< Segment %d [%s] content END <<<%n", j, relativeFilePath, segmentContent, j, relativeFilePath));

                    // log summary (below content)
                    log(segmentFile, resultSummary);

                    derivedLicenses.addAll(derivedLicensesForFile);
                }

                writeIntermediateFileStructure(unpackedDir, file, fileSegmentation, intermediateFolder, resultJsonFile, resultJsonFileSemaphore);

            } catch (Exception e) {
                LOG.error("EM1: " + e.getMessage(), e);
            }

            // insufficientLicenseDetails issue
            final HashSet insufficientLicenseDetails = new HashSet<>();
            for (String license : derivedLicenses) {
                TermsMetaData tmd = normalizationMetaData.getTermsMetaData(license);
                if (tmd != null) {
                    if (tmd.isException() || tmd.isExpression() || tmd.isMarker()) continue;
                    if (tmd.isUnspecific()) continue;
                    if (tmd.allowLaterVersions()) continue;

                    if (tmd.getRequiresLicenseText() == null || tmd.getRequiresCopyright() == null) {
                        insufficientLicenseDetails.add(license);
                    }
                }
            }
            if (!insufficientLicenseDetails.isEmpty()) {
                FileUtils.forceMkdir(insufficientLicenseDetailsFolder);
                File output = new File(insufficientLicenseDetailsFolder + "/insufficientLicenseDetails.txt");
                FileWriter writer = new FileWriter(output);
                for (String s : insufficientLicenseDetails) {
                    writer.write(s + System.lineSeparator());
                }
                writer.close();
            }

            // FIXME-2020: here would be the place to consolidate derivedLicensesPerFile
            final Set removableLicenses = InventoryUtils.collectCoveredRemovableLicenses(derivedLicensesForFile);
            derivedLicensesForFile.removeAll(removableLicenses);

            // log summary to segment file
            if (derivedLicensesForFile.size() > 0) {
                log(segmentFile, String.format("<<<< Resolved license set for [%s]:%n %s%n", fileToScan, derivedLicensesForFile));
                log(logFile, String.format("<<<< Resolved license set for [%s]:%n %s", fileToScan, derivedLicensesForFile));
            }
            if (removableLicenses.size() > 0) {
                log(segmentFile, String.format("<<<< Removed license set for [%s]:%n %s%n", fileToScan, removableLicenses));
                log(logFile, String.format("<<<< Removed license set for [%s]:%n %s", fileToScan, removableLicenses));
            }

            log(segmentFile, String.format("<<<< [%s] analysis END <<<<", relativeFilePath));

            if (derivedLicensesForFile.size() > 0) {
                LOG.info("{} ({}/{}) Analyzing file [{}] resolved {}", context, i, size, file.getAbsolutePath(), derivedLicensesForFile);
            } else {
                LOG.info("{} ({}/{}) Analyzing file [{}].", context, i, size, file.getAbsolutePath());
            }
            if (removableLicenses.size() > 0) {
                LOG.info("{} ({}/{}) Analyzing file [{}] removed [{}]", context, i, size, file.getAbsolutePath(), removableLicenses);
            }
        }

        // complete resultJsonFile
        FileUtils.write(resultJsonFile, "]", StandardCharsets.UTF_8, true);

        String deriveLicenseResult = "";
        if (!derivedLicenses.isEmpty()) {
            ArrayList orderedList = new ArrayList<>(derivedLicenses);
            Collections.sort(orderedList, String.CASE_INSENSITIVE_ORDER);
            deriveLicenseResult = StringUtils.toString(orderedList);
        }

        // store the derived licenses in a property file service as cache
        Properties result = new Properties();
        result.setProperty("derived.licenses", deriveLicenseResult);
        result.setProperty("incomplete.match", String.valueOf(derivedLicenses.contains("Incomplete Match")));

        applyToArtifact(artifact, result);
        PropertyUtils.saveProperties(resultPropertiesFile, result);

        return true;
    }

    public static File deriveIntermediateFolder(File analysisDir) {
        return new File(analysisDir.getParentFile(), analysisDir.getName() + "-intermediate");
    }

    public static File deriveAnalysisFolder(File analysisDir) {
        return new File(analysisDir.getParentFile(), analysisDir.getName() + "-analysis");
    }

    public static File deriveScratchFolder(File analysisDir) {
        return new File(analysisDir.getParentFile(), analysisDir.getName() + "-scratch");
    }

    /**
     * Determines if segmentation issues occured by checking if a license has no relevant matches.
     *
     * @param licenses          The licenses which need to be checked
     * @param relevantMatches   The relevant matches for checking if a license has issues
     */
    private boolean hasSegmentationIssue(List licenses, List relevantMatches) {
        licenses.removeIf(license -> !relevantMatches.contains(license));
        InventoryUtils.removeMarkers(licenses, getNormalizationMetaData());
        return licenses.size() > 1;
    }

    /**
     * Write result of scanning into intermediate folder.
     *
     * @param unpackBaseDir             The directory in which the intermediate Folder is saved
     * @param file                      The path of the license file
     * @param fileSegmentation          The FileSegmentation for processing the license
     * @param intermediateFolder        The folder in which the intermediate results of a license scan are saved
     * @param resultJsonFile            The JSON file containing the results of a scan
     * @param resultJsonFileSemaphore   The semaphore of the result JSON file
     */
    private void writeIntermediateFileStructure(File unpackBaseDir, File file, FileSegmentation fileSegmentation,
                    File intermediateFolder, File resultJsonFile, boolean[] resultJsonFileSemaphore) {

        final String filePath = FileUtils.asRelativePath(unpackBaseDir, file);
        final File intermediateFile = new File(intermediateFolder, filePath);

        try {
            JSONObject jsonFileObject = new JSONObject();
            JSONObject jsonSegmentsObject = new JSONObject();

            jsonFileObject.put("file", filePath);
            List segmentFoldersForScancode = fileSegmentation.combineSegmentsAndWriteFoldersForScancode(intermediateFile);
            jsonFileObject.put("segmentCount", segmentFoldersForScancode.size());

            for (int j = 0; j < segmentFoldersForScancode.size(); j++) {
                FileSegment fileSegment = segmentFoldersForScancode.get(j);
                ScanResultPart normalizedSRP = fileSegment.getNormalizedSRP();
                if (normalizedSRP != null) {
                    JSONObject segmentResult = new JSONObject();
                    List nameMatchedLicenses = normalizedSRP.getNameMatchedTerms();
                    List textMatchedLicenses = normalizedSRP.getTextMatchedTerms();
                    List resolvedLicenses = normalizedSRP.getMatchedTerms();
                    if (!nameMatchedLicenses.isEmpty()) {
                        segmentResult.put("nameMatches", nameMatchedLicenses);
                    }
                    if (!textMatchedLicenses.isEmpty()) {
                        segmentResult.put("textMatches", textMatchedLicenses);
                    }
                    if (!resolvedLicenses.isEmpty()) {
                        segmentResult.put("resolvedLicenses", resolvedLicenses);
                    }
                    if (fileSegment.getLicenseVariables() != null) {
                        segmentResult.put("variables", fileSegment.getLicenseVariables());
                    }
                    jsonSegmentsObject.put("segment-" + j, segmentResult);
                }
                jsonFileObject.put("segments", jsonSegmentsObject);
            }
            if (!resultJsonFileSemaphore[0]) {
                FileUtils.write(resultJsonFile, ",", StandardCharsets.UTF_8, true);
            }
            FileUtils.write(resultJsonFile, jsonFileObject.toString(), StandardCharsets.UTF_8, true);
            resultJsonFileSemaphore[0] = false;
        } catch (Exception e) {
            LOG.warn("Creating folder for scancode failed for {}: {}. Execution continued.", file.getName(), e.getMessage());
        }
    }

    /**
     * Determines, if terms meta data contains unspecific licenses.
     *
     * @param termsMetaData The license terms meta data
     */
    private boolean containsUnspecificLicenses(List termsMetaData) {
        for (TermsMetaData tmd : termsMetaData) {
            if (tmd != null) {
                if (tmd.isUnspecific()) {
                    return true;
                }
            }
        }
        return false;
    }

    /**
     * Apply property to artifact.
     *
     * @param artifact  The artifact to be analyzed
     * @param p         The property to apply to the artifact
     */
    protected void applyToArtifact(Artifact artifact, Properties p) {
        String derivedLicensesFromP = p.getProperty("derived.licenses");
        if (derivedLicensesFromP == null) {
            derivedLicensesFromP = p.getProperty("identified.terms");
        }
        if (derivedLicensesFromP == null) {
            derivedLicensesFromP = "";
        }
        if (derivedLicensesFromP.contains("Incomplete Match")) {
            final String incompleteMatch = p.getProperty("incomplete.match", "false");
            artifact.set("Incomplete Match", incompleteMatch);
        }
        artifact.set(KEY_IDENTIFIED_TERMS, derivedLicensesFromP);
    }

    /**
     * Creates the HTML report for a license.
     *
     * @param htmlReportTitle           Title of the report
     * @param scanResultPart            FIXME: Add description of variable
     * @param targetDir                 Directory for saving the report
     * @param textStats                 FIXME: Add description of variable
     * @param segmentId                 ID of the segment of ehich the segment consists
     * @param retainedPartialMatches    FIXME: Add description of variable
     * @param type                      The type of the report
     * @param id                        The ID of the report
     */
    protected void createHtmlReport(String htmlReportTitle, ScanResultPart scanResultPart, File targetDir, StringStats textStats, String segmentId, List retainedPartialMatches, String type, String id) {
        try {
            File htmlReportFile = new File(targetDir, id + "_" + type + segmentId.replace("/", "_") + ".html");
            TermsMetaData tempLicenseMetaData = new TermsMetaData();
            tempLicenseMetaData.setCanonicalName(htmlReportTitle);
            tempLicenseMetaData.createMatchReportHtml(textStats, scanResultPart, htmlReportFile, retainedPartialMatches);
        } catch (Throwable e) {
            // rationale for throwable: we may get an out of memory error on binaries (based on string size limits;
            // for the error not to cause the process to break we catch throwable)
            LOG.error("Cannot generate HTML Report!", e);
        }
    }

    /**
     * Extracts the relative path of a given file.
     *
     * @param baseDir The base directory of a given file for comparison.
     * @param file    The file from which the relative path is being extracted.
     *
     * @return The relative path from baseDir to file.
     */
    public String extractRelativePath(File baseDir, File file) {
        final String filePath = file.getPath();
        final String baseDirPath = baseDir.getPath();
        if (filePath.startsWith(baseDirPath)) {
            return filePath.substring(baseDirPath.length());
        }
        return filePath;
    }

    // FIXME: use dedicated java class to capture the variables
    private JSONArray getVariablesPerLicenseInSegment(List normalizedLicenses, StringStats licenseTextStats) {
        JSONArray licenseVariablesArray = new JSONArray();
        for (String license : normalizedLicenses) {
            JSONObject licenseVariables = new JSONObject();
            if (licenseHasVariable(license)) {
                licenseVariables.put(license, getVariableKeyValuePerLicense(license, licenseTextStats));
                licenseVariablesArray.put(licenseVariables);
            }
        }
        return licenseVariablesArray;
    }

    /**
     * Determines if a segment has variable licenses.
     *
     * @param matchedLicenses   The licenses to be checked
     */
    private boolean segmentHasVariableLicense(List matchedLicenses) {
        for (String license : matchedLicenses) {
            if (licenseHasVariable(license)) return true;
        }
        return false;
    }

    /**
     * Determines if a license has variables.
     *
     * @param license The license to be checked
     */
    private boolean licenseHasVariable(String license) {
        TermsMetaData termsMetaData = getNormalizationMetaData().getTermsMetaData(license);
        if (termsMetaData != null) {
            if (termsMetaData.getLicenseTemplate() != null) {
                String licenseTemplate = termsMetaData.getLicenseTemplate();
                if (licenseTemplate.matches(".*\\{\\{([^\\}]+)}}.*")) {
                    return true;
                } else {
                    return false;
                }
            }
        }
        return false;
    }

    protected JSONObject getVariableKeyValuePerLicense(String license, StringStats licenseTextStats) {
        final JSONObject keyValuePairs = new JSONObject();
        final List processedKeys = new ArrayList<>();

        final String licenseTemplateOriginal = getNormalizationMetaData().getTermsMetaData(license).getLicenseTemplate();
        final StringStats normalizedLicenseTemplate = StringStats.normalize(licenseTemplateOriginal, false);
        String licenseTemplate = normalizedLicenseTemplate.getNormalizedString();
        licenseTemplate = licenseTemplate.replaceAll("\\{ \\{ ", "{{").replaceAll(" } }", "}}");

        // FIXME: Normalize licenseTemplate
        licenseTemplate = licenseTemplate.replaceAll("\\<.*?\\>", "");
        licenseTemplate = licenseTemplate.replaceAll("\"", "");
        licenseTemplate = licenseTemplate.replaceAll(" {2,}", " ");

        licenseTemplate = licenseTemplate.replaceAll(" ?\\{\\{", "˜{{").replaceAll("}} ?", "}}˜");
        String[] licenseTemplateWord = licenseTemplate.split((" |˜"));
        // FIXME: DO WITH LOOKAROUNDS

        // iterating through every word of the licenseTemplate
        for (int i = 0; i < licenseTemplateWord.length; i++) {
            // if the word is a variable key and variable key is not existing already in keyValueMap
            if (licenseTemplateWord[i].matches("\\{\\{.*?}}") && !processedKeys.contains(licenseTemplateWord[i])) {
                boolean matched = false;
                boolean failed = false;
                int index = 1;
                while (!matched) {
                    String before = "";
                    String after = "";

                    // find the word before and after the variableKey (licenseTemplate)
                    for (int b = index; b > 0; b--) {
                        if ((i - b) < 0) {
                            failed = true;
                            break;
                        }
                        if (licenseTemplateWord[i - b] != null) {
                            for (int a = b; a > 0; a--) {
                                before = (before + " " + licenseTemplateWord[i - a]).trim();
                            }
                            break;
                        }
                    }
                    for (int b = index; b > 0; b--) {
                        if (i + b < licenseTemplateWord.length) {
                            for (int a = 1; a <= b; a++) {
                                after = (after + " " + licenseTemplateWord[i + a]).trim();
                            }
                            break;
                        }
                    }

                    if ((before.equals("")) && (after.equals("")) || failed) {
                        keyValuePairs.put(licenseTemplateWord[i].replace("{{", "").replace("}}", ""), "");
                        processedKeys.add(licenseTemplateWord[i]);
                        matched = true;
                        break;
                    }

                    StringStats stringStatsBefore = StringStats.normalize(before, true);
                    StringStats stringStatsAfter = StringStats.normalize(after, true);

                    // all matches in the actual SegmentText of the words before and after the variableKey
                    int[] beforeMatches = licenseTextStats.allMatchesOriginalString(stringStatsBefore);
                    int[] afterMatches = licenseTextStats.allMatchesOriginalString(stringStatsAfter);

                    // FIXME: Safe index of allMatches if its unique

                    // FIXME: BSD 4-Clause Problem

                    // TODO: Enable content extraction if matches are not all unique
                    // if matches unique then extract content
                    if (beforeMatches.length == 1 && afterMatches.length == 1 && beforeMatches[0] < afterMatches[0]) {
                        String content = licenseTextStats.getNormalizedString().substring(beforeMatches[0] + before.length(), afterMatches[0] - 1);
                        if (!(noSpaceBeforePlaceholder(licenseTemplateOriginal, licenseTemplateWord[i]) && content.startsWith(" "))) {
                            content = content.trim();
                        }
                        // TODO: re-normalize Content
                        keyValuePairs.put(licenseTemplateWord[i].replace("{{", "").replace("}}", ""), "\"" + content + "\"");
                        matched = true;
                        processedKeys.add(licenseTemplateWord[i]);
                    } else if (beforeMatches.length == 1 && afterMatches.length == 1 && beforeMatches[0] > afterMatches[0]) {
                        keyValuePairs.put(licenseTemplateWord[i].replace("{{", "").replace("}}", ""), "\n" + "");
                        matched = true;
                        processedKeys.add(licenseTemplateWord[i]);
                    } else if (beforeMatches.length == 0 | afterMatches.length == 0) {
                        keyValuePairs.put(licenseTemplateWord[i].replace("{{", "").replace("}}", ""), "");
                        matched = true;
                        processedKeys.add(licenseTemplateWord[i]);
                    } else {
                        Arrays.fill(beforeMatches, -1);
                        Arrays.fill(afterMatches, -1);
                        index++;
                    }
                }
            }
        }
        return keyValuePairs;
    }

    /**
     * Determines if there is a whitespace before a placeholder, returns true if there is a whitespace, returns false if not.
     *
     * @param licenseTemplate   The given license template
     * @param placeholder       A placeholder within the license tempalte
     */
    private boolean noSpaceBeforePlaceholder(String licenseTemplate, String placeholder) {
        int i = licenseTemplate.indexOf(placeholder);
        char c = licenseTemplate.charAt(i - 1);
        if (c == ' ' | c == '>') {
            return false;
        } else {
            return true;
        }
    }

    /**
     * Determines, if a license without reference is an exception, returns true if it's an exception, returns false if not.
     *
     * @param terms license terms meta data
     */
    private boolean isIndicatedExceptionWithoutReference(List terms) {
        for (TermsMetaData tmd : terms) {
            if (!tmd.isException()) continue;
            return true;
        }
        return false;
    }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy