All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.metaeffekt.mirror.index.other.EpssIndex Maven / Gradle / Ivy

package com.metaeffekt.mirror.index.other;

import com.metaeffekt.artifact.analysis.utils.FileUtils;
import com.metaeffekt.mirror.download.documentation.MirrorMetadata;
import com.metaeffekt.mirror.contents.epss.EpssData;
import com.metaeffekt.mirror.download.other.EpssDownload;
import com.metaeffekt.mirror.index.Index;
import lombok.extern.slf4j.Slf4j;
import org.apache.lucene.document.Document;

import java.io.File;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.Collection;
import java.util.Collections;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;

/**
 * 

This index processes EPSS (Exploit Prediction Scoring System) data, which is provided in CSV format. * Each file contains various entries that are mapped to an internal document format using Apache Lucene for indexing and later retrieval. * This index is primarily used for ranking vulnerabilities by the likelihood of exploitation based on historical data. * It is used to add priority information to existing vulnerabilities.

* *

CSV Structure:

*
cve_id,epss_score,percentile
 * CVE-2022-12345,0.97,99.7
 * CVE-2021-54321,0.45,50.2
 * ...
* *

The EPSS data files are processed as follows:

*
    *
  1. Each file is parsed, and non-CSV files are skipped.
  2. *
  3. The first two lines of the CSV file (header and metadata) are discarded.
  4. *
  5. Each line is read, split into individual fields, and stored as a document object.
  6. *
* * * * * * * * * * * * * * * * * * * * * * * *
Mapping of CSV content to EpssData fields
CSV FieldMapped Document Field
cve_idcveId
epss_scoreepssScore
percentilepercentile
*/ @Slf4j @MirrorMetadata(directoryName = "epss", mavenPropertyName = "epssIndex") public class EpssIndex extends Index { public EpssIndex(File baseMirrorDirectory) { super(baseMirrorDirectory, EpssIndex.class, Collections.singletonList(EpssDownload.class), Collections.emptyList()); } @Override protected Map createIndexDocuments() { final Map documents = new ConcurrentHashMap<>(); final Collection files = super.getAllFilesRecursively(super.requiredDownloads[0]); for (File file : files) { if (!file.getName().endsWith(".csv")) { continue; } log.info("Processing file: {}", file.getName()); try { String contents = FileUtils.readFileToString(file, StandardCharsets.UTF_8); contents = contents.substring(contents.indexOf("\n") + 1); contents = contents.substring(contents.indexOf("\n") + 1); for (String line : contents.split("\n")) { String[] fields = line.split(","); EpssData epssData = new EpssData(fields[0], Float.parseFloat(fields[1]), Float.parseFloat(fields[2])); documents.put(fields[0], epssData.toDocument()); } } catch (IOException e) { throw new RuntimeException("Failed to read file: " + file.getAbsolutePath(), e); } } return documents; } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy