com.metaeffekt.mirror.download.nvd.NvdCveApiDownload Maven / Gradle / Ivy
/*
* Copyright 2021-2024 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.metaeffekt.mirror.download.nvd;
import com.metaeffekt.artifact.analysis.utils.FileUtils;
import com.metaeffekt.artifact.analysis.utils.TimeUtils;
import com.metaeffekt.mirror.download.documentation.MirrorMetadata;
import com.metaeffekt.mirror.Retry;
import com.metaeffekt.mirror.download.Download;
import com.metaeffekt.mirror.download.ResourceLocation;
import org.json.JSONArray;
import org.json.JSONException;
import org.json.JSONObject;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.File;
import java.io.IOException;
import java.net.URL;
import java.nio.charset.StandardCharsets;
import java.text.SimpleDateFormat;
import java.util.*;
import java.util.concurrent.atomic.AtomicReference;
import java.util.stream.Collectors;
import static com.metaeffekt.mirror.download.nvd.NvdCveApiDownload.ResourceLocationNvd.CVE_API_LIST_ALL;
import static com.metaeffekt.mirror.download.nvd.NvdCveApiDownload.ResourceLocationNvd.CVE_API_START_END_DATE;
/**
* References:
*
* - API endpoint reference: Vulnerability API
* - API functionality overview: Developers - Start here
* - API workflow: API User Workflow
* - Request an API Key: NVD - API Request
*
*
* The new NVD API provides access to CVE and CPE. This download will use the CVE API to create one JSON file per year from
* 1999 to the current year. Note that you will need to
* request an API Key to increase the rate limit:
*
*
* - without key: 5 requests in a rolling 30-second window (delay: 6400ms)
* - with key: 50 requests in a rolling 30-second window (delay: 600ms)
*
*
* The API works on a pagination principle: It will only return the first (CVE: 2000, CPE: dict=10000/match=5000) matching
* entries from a query, for the rest, further requests with a startIndex
parameter have to be made. The different
* amounts of results per request have most likely been picked that way, so that all data sources require roughly the same
* amount of requests, which is currently (2023-02) for all three of them ~100.
*
* The advantages of this new API are not seen on an initial mirror (it takes quite a bit longer, actually), but rather on
* successive calls, where only few new/updated vulnerabilities are pulled using the lastModStartDate
and
* lastModEndDate
parameters.
*
* The actual implementation of this works on a supplier-consumer pattern. It starts multiple threads that will
* continuously make requests to the API (as long as there are requests to be made) and append those to a list of cached
* JSON responses. As soon as this list reaches a size of 10 or if the end is reached, a consumer thread will take these
* and sort them into yearly JSON files, where they are merged with existing ones in case of an update or appended in case
* of a new vulnerability.
* This multi-file approach drastically reduces the amount of searching and loading of vulnerabilities when checking
* whether the vulnerability already exists. The threshold of 10 requests has been picked as it showed to be the most
* memory/speed efficient one.
* In the end, files from 1999 to the current year will be present in the download directory:
* .
* ├── 1999.json
* ├── 2000.json
* ├── 2001.json
* ├── 2002.json
* ...
* ├── 2022.json
* └── 2023.json
*
*/
@MirrorMetadata(directoryName = "nvd", mavenPropertyName = "nvdCveDownload")
public class NvdCveApiDownload extends Download {
private final static Logger LOG = LoggerFactory.getLogger(NvdCveApiDownload.class);
private static final SimpleDateFormat ISO_8601_DATE_FORMAT = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'");
/**
* The NVD API limits authorized requests that provide an API Key to 50 per rolling 30 seconds window.
*/
private static final int API_DELAY_BETWEEN_AUTHORIZED_REQUESTS = 30 * 1000 / 50;
/**
* The NVD API limits unauthorized requests to 5 per rolling 30 seconds window.
*/
private static final int API_DELAY_BETWEEN_UNAUTHORIZED_REQUESTS = 30 * 1000 / 5 + 400;
private String apiKey;
public NvdCveApiDownload(File baseMirrorDirectory) {
super(baseMirrorDirectory, NvdCveApiDownload.class);
}
public NvdCveApiDownload setApiKey(String apiKey) {
this.apiKey = apiKey;
return this;
}
/**
* Based on NVD API User Workflow
*/
@Override
protected void performDownload() {
final boolean fullMirrorRequired = isFullMirrorRequired();
if (fullMirrorRequired) {
clearDownload();
LOG.info("Downloading initial NVD data from NVD API");
} else {
LOG.info("Existing mirror detected. Downloading incremental NVD data from NVD API");
}
LOG.info("Requests {} be authorized with an API key, delay between requests [{}]",
apiKey == null ? "will not" : "will",
TimeUtils.formatTimeDiff(apiKey == null ? API_DELAY_BETWEEN_UNAUTHORIZED_REQUESTS : API_DELAY_BETWEEN_AUTHORIZED_REQUESTS));
final long lastModified = getDownloadDirectoryLastModified();
final Date lastModifiedDate = new Date(lastModified);
final Date now = new Date(TimeUtils.utcNow());
int currentStartIndex = 0;
final List apiResponseDataToBeProcessed = new ArrayList<>();
while (true) {
final long processingStartTime = System.currentTimeMillis();
int finalCurrentStartIndex = currentStartIndex;
final AtomicReference json = new AtomicReference<>();
new Retry(() -> {
if (fullMirrorRequired) {
json.set(downloadCvePage(finalCurrentStartIndex));
} else {
json.set(downloadCvePage(finalCurrentStartIndex, lastModifiedDate, now));
}
})
.withDelay(API_DELAY_BETWEEN_UNAUTHORIZED_REQUESTS)
.onException(Exception.class)
.retryCount(8)
.run();
final int totalResults = json.get().getInt("totalResults");
final int resultsPerPage = json.get().getInt("resultsPerPage");
currentStartIndex += resultsPerPage;
if (totalResults == 0) {
LOG.info("No CVEs found for the given range.");
break;
}
LOG.info("Downloaded CVEs [{}] to [{}] of [{}] [{} %]", currentStartIndex - resultsPerPage, currentStartIndex, totalResults, (currentStartIndex * 100 / totalResults));
final JSONArray vulnerabilities = json.get().getJSONArray("vulnerabilities");
apiResponseDataToBeProcessed.add(vulnerabilities);
if (apiResponseDataToBeProcessed.size() >= 10) {
final JSONArray merged = this.mergeCveItems(apiResponseDataToBeProcessed);
this.processApiCveItems(merged);
apiResponseDataToBeProcessed.clear();
}
final long processingDuration = System.currentTimeMillis() - processingStartTime;
if (currentStartIndex >= totalResults) {
break;
}
try {
final long sleepDuration = (apiKey == null ? API_DELAY_BETWEEN_UNAUTHORIZED_REQUESTS : API_DELAY_BETWEEN_AUTHORIZED_REQUESTS) - processingDuration;
if (sleepDuration > 0) {
LOG.info("Sleeping for [{}] to avoid rate limit", TimeUtils.formatTimeDiff(sleepDuration));
Thread.sleep(sleepDuration);
}
} catch (InterruptedException e) {
}
}
if (apiResponseDataToBeProcessed.size() > 0) {
final JSONArray merged = this.mergeCveItems(apiResponseDataToBeProcessed);
this.processApiCveItems(merged);
}
LOG.info("Finished processing all CVEs");
}
private void processApiCveItems(JSONArray jsonArray) {
final Map yearCves = sortCvesIntoYears(jsonArray);
LOG.info("Processing CVE data from years: [{}]", yearCves.entrySet().stream().sorted(Map.Entry.comparingByValue(Comparator.comparing(JSONArray::length).reversed())).map(e -> e.getKey() + " = " + e.getValue().length()).collect(Collectors.joining("; ")));
for (Map.Entry entry : yearCves.entrySet()) {
final int year = entry.getKey();
final JSONArray cves = entry.getValue();
final JSONArray existingJson = parseCveItemsFromDownloadedYear(year);
final JSONArray mergedJson = mergeCveItems(Arrays.asList(cves, existingJson));
final File cveFile = new File(super.downloadIntoDirectory, year + ".json");
try {
FileUtils.write(cveFile, mergedJson.toString(), StandardCharsets.UTF_8);
} catch (IOException e) {
throw new RuntimeException("Unable to write NVD CVE year file " + cveFile.getAbsolutePath(), e);
}
}
}
/**
* Method to merge two CVE JSON arrays into a single array.
* The merge is based on the ID of the CVE, but the new array is preferred if the ID is present in both arrays.
* This will also deduplicate the CVEs in the array.
*
* @param cves the new CVEs, which will be preferred if the ID is present in both arrays
* @return the merged CVEs
*/
private JSONArray mergeCveItems(Collection cves) {
final JSONArray mergedJson = new JSONArray();
final Set knownIds = new HashSet<>();
for (JSONArray array : cves) {
for (int i = 0; i < array.length(); i++) {
final JSONObject cve = array.getJSONObject(i).has("cve") ? array.getJSONObject(i).getJSONObject("cve") : array.getJSONObject(i);
if (knownIds.add(cve.getString("id"))) {
mergedJson.put(cve);
}
}
}
return mergedJson;
}
private Map sortCvesIntoYears(JSONArray jsonArray) {
final Map yearCves = new HashMap<>();
for (int i = 0; i < jsonArray.length(); i++) {
final JSONObject jsonObject = jsonArray.getJSONObject(i);
final JSONObject cve = jsonObject.has("cve") ? jsonObject.getJSONObject("cve") : jsonObject;
final String cveId = cve.getString("id");
final int year = Integer.parseInt(cveId.substring(4, 8));
yearCves.computeIfAbsent(year, k -> new JSONArray())
.put(jsonObject);
}
return yearCves;
}
private JSONArray parseCveItemsFromDownloadedYear(int year) {
final File cveFile = new File(super.downloadIntoDirectory, year + ".json");
if (!cveFile.exists()) {
return new JSONArray();
}
try {
final String content = FileUtils.readFileToString(cveFile, StandardCharsets.UTF_8);
return new JSONArray(content);
} catch (IOException e) {
throw new RuntimeException("Unable to read NVD CVE year file " + cveFile.getAbsolutePath(), e);
}
}
/**
* @param offset 0-based CVE index
*/
private JSONObject downloadCvePage(int offset) {
final URL pageUrl = super.getRemoteResourceLocationUrl(CVE_API_LIST_ALL, offset);
final List response = super.downloader.fetchResponseBodyFromUrlAsList(pageUrl, Collections.singletonMap("apiKey", apiKey));
try {
return new JSONObject(String.join("", response));
} catch (JSONException e) {
throw new RuntimeException("Unable to parse NVD CVE API response: " + response + "\nRequest URL: " + pageUrl, e);
}
}
/**
* Date values must be entered in the extended ISO-8061 date/time format:
* [YYYY][“-”][MM][“-”][DD][“T”][HH][“:”][MM][“:”][SS][Z]
*
* @param offset 0-based CVE index
* @param lastModStartDate ISO-8061 start date
* @param lastModEndDate ISO-8061 end date
* @throws IllegalArgumentException if the difference between dates is larger than 120 days
*/
private JSONObject downloadCvePage(int offset, Date lastModStartDate, Date lastModEndDate) {
final long diff = lastModEndDate.getTime() - lastModStartDate.getTime();
if (diff > 120L * 24 * 60 * 60 * 1000) {
throw new IllegalArgumentException("Difference between lastModStartDate and lastModEndDate must not be greater than 120 days");
}
final String startDate = ISO_8601_DATE_FORMAT.format(lastModStartDate);
final String endDate = ISO_8601_DATE_FORMAT.format(lastModEndDate);
final URL pageUrl = super.getRemoteResourceLocationUrl(CVE_API_START_END_DATE, offset, startDate, endDate);
final List response = super.downloader.fetchResponseBodyFromUrlAsList(pageUrl, Collections.singletonMap("apiKey", apiKey));
try {
return new JSONObject(String.join("", response));
} catch (JSONException e) {
throw new RuntimeException("Unable to parse NVD CVE API response: " + response + "\nRequest URL: " + pageUrl, e);
}
}
private boolean isFullMirrorRequired() {
final File[] downloadFiles = super.downloadIntoDirectory.listFiles();
if (downloadFiles == null) {
LOG.info("No CVE JSON files found in download directory, performing full mirror");
return true;
}
final List files = Arrays.stream(downloadFiles).map(File::getName).collect(Collectors.toList());
if (files.stream().noneMatch(file -> file.endsWith(".json"))) {
LOG.info("No CVE JSON files found in download directory, performing full mirror");
return true;
}
final int latestCheckYear = Calendar.getInstance().get(Calendar.YEAR) - 1;
for (int year = 1999; year <= latestCheckYear; year++) {
if (!files.contains(year + ".json")) {
LOG.info("Missing CVE JSON file for year [{}], performing full mirror", year);
return true;
}
}
long days120 = 120L * 24 * 60 * 60 * 1000;
final long directoryLastModified = getDownloadDirectoryLastModified();
if (super.isUpdatedAgeOlderThan(directoryLastModified, days120)) {
LOG.info("Download directory last modified date is older than 120 days, performing full mirror");
return true;
}
return false;
}
@Override
protected boolean additionalIsDownloadRequired() {
if (isFullMirrorRequired()) {
return true;
}
final long lastModified = getDownloadDirectoryLastModified();
final Date lastModifiedDate = new Date(lastModified);
final Date now = new Date(TimeUtils.utcNow());
final JSONObject changes = downloadCvePage(0, lastModifiedDate, now);
if (changes.has("totalResults") && changes.getInt("totalResults") > 0) {
LOG.info("NVD CVE API reports [{}] new/changed CVEs since last download", changes.getInt("totalResults"));
return true;
}
return false;
}
@Override
public void setRemoteResourceLocation(String location, String url) {
super.setRemoteResourceLocation(ResourceLocationNvd.valueOf(location), url);
}
public enum ResourceLocationNvd implements ResourceLocation {
/**
*
* startIndex
0-based index of the first CVE to be returned in the response data
*
*/
CVE_API_LIST_ALL("https://services.nvd.nist.gov/rest/json/cves/2.0?startIndex=%d"),
/**
* The maximum allowable range when using any date range parameters is 120 consecutive days.
* Values must be entered in the extended ISO-8061 date/time format:
* [YYYY][“-”][MM][“-”][DD][“T”][HH][“:”][MM][“:”][SS][Z]
*
* startIndex
0-based index of the first CVE to be returned in the response data
* lastModStartDate
the start date
* lastModEndDate
the end date
*
*/
CVE_API_START_END_DATE("https://services.nvd.nist.gov/rest/json/cves/2.0/?startIndex=%d&lastModStartDate=%s&lastModEndDate=%s");
private final String defaultValue;
ResourceLocationNvd(String defaultValue) {
this.defaultValue = defaultValue;
}
@Override
public String getDefault() {
return this.defaultValue;
}
}
}