com.metaeffekt.mirror.index.other.EpssIndex Maven / Gradle / Ivy
package com.metaeffekt.mirror.index.other;
import com.metaeffekt.artifact.analysis.utils.FileUtils;
import com.metaeffekt.mirror.download.documentation.MirrorMetadata;
import com.metaeffekt.mirror.contents.epss.EpssData;
import com.metaeffekt.mirror.download.other.EpssDownload;
import com.metaeffekt.mirror.index.Index;
import lombok.extern.slf4j.Slf4j;
import org.apache.lucene.document.Document;
import java.io.File;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.Collection;
import java.util.Collections;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
/**
* This index processes EPSS (Exploit Prediction Scoring System) data, which is provided in CSV format.
* Each file contains various entries that are mapped to an internal document format using Apache Lucene for indexing and later retrieval.
* This index is primarily used for ranking vulnerabilities by the likelihood of exploitation based on historical data.
* It is used to add priority information to existing vulnerabilities.
*
* CSV Structure:
* cve_id,epss_score,percentile
* CVE-2022-12345,0.97,99.7
* CVE-2021-54321,0.45,50.2
* ...
*
* The EPSS data files are processed as follows:
*
* - Each file is parsed, and non-CSV files are skipped.
* - The first two lines of the CSV file (header and metadata) are discarded.
* - Each line is read, split into individual fields, and stored as a document object.
*
*
*
* Mapping of CSV content to EpssData
fields
*
*
* CSV Field
* Mapped Document Field
*
*
*
*
* cve_id
* cveId
*
*
* epss_score
* epssScore
*
*
* percentile
* percentile
*
*
*
*/
@Slf4j
@MirrorMetadata(directoryName = "epss", mavenPropertyName = "epssIndex")
public class EpssIndex extends Index {
public EpssIndex(File baseMirrorDirectory) {
super(baseMirrorDirectory, EpssIndex.class, Collections.singletonList(EpssDownload.class), Collections.emptyList());
}
@Override
protected Map createIndexDocuments() {
final Map documents = new ConcurrentHashMap<>();
final Collection files = super.getAllFilesRecursively(super.requiredDownloads[0]);
for (File file : files) {
if (!file.getName().endsWith(".csv")) {
continue;
}
log.info("Processing file: {}", file.getName());
try {
String contents = FileUtils.readFileToString(file, StandardCharsets.UTF_8);
contents = contents.substring(contents.indexOf("\n") + 1);
contents = contents.substring(contents.indexOf("\n") + 1);
for (String line : contents.split("\n")) {
String[] fields = line.split(",");
EpssData epssData = new EpssData(fields[0], Float.parseFloat(fields[1]), Float.parseFloat(fields[2]));
documents.put(fields[0], epssData.toDocument());
}
} catch (IOException e) {
throw new RuntimeException("Failed to read file: " + file.getAbsolutePath(), e);
}
}
return documents;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy