com.metaeffekt.mirror.index.other.EpssIndex Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of ae-artifact-analysis Show documentation
The newest version!
package com.metaeffekt.mirror.index.other;

import com.metaeffekt.artifact.analysis.utils.FileUtils;
import com.metaeffekt.mirror.download.documentation.MirrorMetadata;
import com.metaeffekt.mirror.contents.epss.EpssData;
import com.metaeffekt.mirror.download.other.EpssDownload;
import com.metaeffekt.mirror.index.Index;
import lombok.extern.slf4j.Slf4j;
import org.apache.lucene.document.Document;

import java.io.File;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.Collection;
import java.util.Collections;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;

/**
 * This index processes EPSS (Exploit Prediction Scoring System) data, which is provided in CSV format.
 * Each file contains various entries that are mapped to an internal document format using Apache Lucene for indexing and later retrieval.
 * This index is primarily used for ranking vulnerabilities by the likelihood of exploitation based on historical data.
 * It is used to add priority information to existing vulnerabilities.
 *
 * CSV Structure:
 * cve_id,epss_score,percentile
 * CVE-2022-12345,0.97,99.7
 * CVE-2021-54321,0.45,50.2
 * ...
 *
 * The EPSS data files are processed as follows:
 * 
 * Each file is parsed, and non-CSV files are skipped.
 * The first two lines of the CSV file (header and metadata) are discarded.
 * Each line is read, split into individual fields, and stored as a document object.
 * 
 *
 * 
 * 
 * 
 * 
 * 
 * 
 * 
 * 
 * 
 * 
 * 
 * 
 * 
 * 
 * 
 * 
 * 
 * 
 * 
 * 
 * 
 * 
 * Mapping of CSV content to EpssData fieldsCSV Field Mapped Document Field
cve_id cveId
epss_score epssScore
percentile percentile
 */
@Slf4j
@MirrorMetadata(directoryName = "epss", mavenPropertyName = "epssIndex")
public class EpssIndex extends Index {

    public EpssIndex(File baseMirrorDirectory) {
        super(baseMirrorDirectory, EpssIndex.class, Collections.singletonList(EpssDownload.class), Collections.emptyList());
    }

    @Override
    protected Map createIndexDocuments() {
        final Map documents = new ConcurrentHashMap<>();

        final Collection files = super.getAllFilesRecursively(super.requiredDownloads[0]);
        for (File file : files) {
            if (!file.getName().endsWith(".csv")) {
                continue;
            }

            log.info("Processing file: {}", file.getName());

            try {
                String contents = FileUtils.readFileToString(file, StandardCharsets.UTF_8);
                contents = contents.substring(contents.indexOf("\n") + 1);
                contents = contents.substring(contents.indexOf("\n") + 1);

                for (String line : contents.split("\n")) {
                    String[] fields = line.split(",");
                    EpssData epssData = new EpssData(fields[0], Float.parseFloat(fields[1]), Float.parseFloat(fields[2]));
                    documents.put(fields[0], epssData.toDocument());
                }

            } catch (IOException e) {
                throw new RuntimeException("Failed to read file: " + file.getAbsolutePath(), e);
            }
        }

        return documents;
    }

}
CSV Field	Mapped Document Field
`cve_id`	`cveId`
`epss_score`	`epssScore`
`percentile`	`percentile`