All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.metaeffekt.mirror.download.nvd.NvdCveApiDownload Maven / Gradle / Ivy

The newest version!
/*
 * Copyright 2021-2024 the original author or authors.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.metaeffekt.mirror.download.nvd;

import com.metaeffekt.artifact.analysis.utils.FileUtils;
import com.metaeffekt.artifact.analysis.utils.TimeUtils;
import com.metaeffekt.mirror.download.documentation.MirrorMetadata;
import com.metaeffekt.mirror.Retry;
import com.metaeffekt.mirror.download.Download;
import com.metaeffekt.mirror.download.ResourceLocation;
import org.json.JSONArray;
import org.json.JSONException;
import org.json.JSONObject;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.File;
import java.io.IOException;
import java.net.URL;
import java.nio.charset.StandardCharsets;
import java.text.SimpleDateFormat;
import java.util.*;
import java.util.concurrent.atomic.AtomicReference;
import java.util.stream.Collectors;

import static com.metaeffekt.mirror.download.nvd.NvdCveApiDownload.ResourceLocationNvd.CVE_API_LIST_ALL;
import static com.metaeffekt.mirror.download.nvd.NvdCveApiDownload.ResourceLocationNvd.CVE_API_START_END_DATE;

/**
 * 

References:

* *

* The new NVD API provides access to CVE and CPE. This download will use the CVE API to create one JSON file per year from * 1999 to the current year. Note that you will need to * request an API Key to increase the rate limit: *

*
    *
  • without key: 5 requests in a rolling 30-second window (delay: 6400ms)
  • *
  • with key: 50 requests in a rolling 30-second window (delay: 600ms)
  • *
*

* The API works on a pagination principle: It will only return the first (CVE: 2000, CPE: dict=10000/match=5000) matching * entries from a query, for the rest, further requests with a startIndex parameter have to be made. The different * amounts of results per request have most likely been picked that way, so that all data sources require roughly the same * amount of requests, which is currently (2023-02) for all three of them ~100. *

* The advantages of this new API are not seen on an initial mirror (it takes quite a bit longer, actually), but rather on * successive calls, where only few new/updated vulnerabilities are pulled using the lastModStartDate and * lastModEndDate parameters. *

*

The actual implementation of this works on a supplier-consumer pattern. It starts multiple threads that will * continuously make requests to the API (as long as there are requests to be made) and append those to a list of cached * JSON responses. As soon as this list reaches a size of 10 or if the end is reached, a consumer thread will take these * and sort them into yearly JSON files, where they are merged with existing ones in case of an update or appended in case * of a new vulnerability.

*

This multi-file approach drastically reduces the amount of searching and loading of vulnerabilities when checking * whether the vulnerability already exists. The threshold of 10 requests has been picked as it showed to be the most * memory/speed efficient one.

*

In the end, files from 1999 to the current year will be present in the download directory:

*
.
 * ├── 1999.json
 * ├── 2000.json
 * ├── 2001.json
 * ├── 2002.json
 * ...
 * ├── 2022.json
 * └── 2023.json
 * 
*/ @MirrorMetadata(directoryName = "nvd", mavenPropertyName = "nvdCveDownload") public class NvdCveApiDownload extends Download { private final static Logger LOG = LoggerFactory.getLogger(NvdCveApiDownload.class); private static final SimpleDateFormat ISO_8601_DATE_FORMAT = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'"); /** * The NVD API limits authorized requests that provide an API Key to 50 per rolling 30 seconds window. */ private static final int API_DELAY_BETWEEN_AUTHORIZED_REQUESTS = 30 * 1000 / 50; /** * The NVD API limits unauthorized requests to 5 per rolling 30 seconds window. */ private static final int API_DELAY_BETWEEN_UNAUTHORIZED_REQUESTS = 30 * 1000 / 5 + 400; private String apiKey; public NvdCveApiDownload(File baseMirrorDirectory) { super(baseMirrorDirectory, NvdCveApiDownload.class); } public NvdCveApiDownload setApiKey(String apiKey) { this.apiKey = apiKey; return this; } /** * Based on NVD API User Workflow */ @Override protected void performDownload() { final boolean fullMirrorRequired = isFullMirrorRequired(); if (fullMirrorRequired) { clearDownload(); LOG.info("Downloading initial NVD data from NVD API"); } else { LOG.info("Existing mirror detected. Downloading incremental NVD data from NVD API"); } LOG.info("Requests {} be authorized with an API key, delay between requests [{}]", apiKey == null ? "will not" : "will", TimeUtils.formatTimeDiff(apiKey == null ? API_DELAY_BETWEEN_UNAUTHORIZED_REQUESTS : API_DELAY_BETWEEN_AUTHORIZED_REQUESTS)); final long lastModified = getDownloadDirectoryLastModified(); final Date lastModifiedDate = new Date(lastModified); final Date now = new Date(TimeUtils.utcNow()); int currentStartIndex = 0; final List apiResponseDataToBeProcessed = new ArrayList<>(); while (true) { final long processingStartTime = System.currentTimeMillis(); int finalCurrentStartIndex = currentStartIndex; final AtomicReference json = new AtomicReference<>(); new Retry(() -> { if (fullMirrorRequired) { json.set(downloadCvePage(finalCurrentStartIndex)); } else { json.set(downloadCvePage(finalCurrentStartIndex, lastModifiedDate, now)); } }) .withDelay(API_DELAY_BETWEEN_UNAUTHORIZED_REQUESTS) .onException(Exception.class) .retryCount(8) .run(); final int totalResults = json.get().getInt("totalResults"); final int resultsPerPage = json.get().getInt("resultsPerPage"); currentStartIndex += resultsPerPage; if (totalResults == 0) { LOG.info("No CVEs found for the given range."); break; } LOG.info("Downloaded CVEs [{}] to [{}] of [{}] [{} %]", currentStartIndex - resultsPerPage, currentStartIndex, totalResults, (currentStartIndex * 100 / totalResults)); final JSONArray vulnerabilities = json.get().getJSONArray("vulnerabilities"); apiResponseDataToBeProcessed.add(vulnerabilities); if (apiResponseDataToBeProcessed.size() >= 10) { final JSONArray merged = this.mergeCveItems(apiResponseDataToBeProcessed); this.processApiCveItems(merged); apiResponseDataToBeProcessed.clear(); } final long processingDuration = System.currentTimeMillis() - processingStartTime; if (currentStartIndex >= totalResults) { break; } try { final long sleepDuration = (apiKey == null ? API_DELAY_BETWEEN_UNAUTHORIZED_REQUESTS : API_DELAY_BETWEEN_AUTHORIZED_REQUESTS) - processingDuration; if (sleepDuration > 0) { LOG.info("Sleeping for [{}] to avoid rate limit", TimeUtils.formatTimeDiff(sleepDuration)); Thread.sleep(sleepDuration); } } catch (InterruptedException e) { } } if (apiResponseDataToBeProcessed.size() > 0) { final JSONArray merged = this.mergeCveItems(apiResponseDataToBeProcessed); this.processApiCveItems(merged); } LOG.info("Finished processing all CVEs"); } private void processApiCveItems(JSONArray jsonArray) { final Map yearCves = sortCvesIntoYears(jsonArray); LOG.info("Processing CVE data from years: [{}]", yearCves.entrySet().stream().sorted(Map.Entry.comparingByValue(Comparator.comparing(JSONArray::length).reversed())).map(e -> e.getKey() + " = " + e.getValue().length()).collect(Collectors.joining("; "))); for (Map.Entry entry : yearCves.entrySet()) { final int year = entry.getKey(); final JSONArray cves = entry.getValue(); final JSONArray existingJson = parseCveItemsFromDownloadedYear(year); final JSONArray mergedJson = mergeCveItems(Arrays.asList(cves, existingJson)); final File cveFile = new File(super.downloadIntoDirectory, year + ".json"); try { FileUtils.write(cveFile, mergedJson.toString(), StandardCharsets.UTF_8); } catch (IOException e) { throw new RuntimeException("Unable to write NVD CVE year file " + cveFile.getAbsolutePath(), e); } } } /** * Method to merge two CVE JSON arrays into a single array.
* The merge is based on the ID of the CVE, but the new array is preferred if the ID is present in both arrays.
* This will also deduplicate the CVEs in the array. * * @param cves the new CVEs, which will be preferred if the ID is present in both arrays * @return the merged CVEs */ private JSONArray mergeCveItems(Collection cves) { final JSONArray mergedJson = new JSONArray(); final Set knownIds = new HashSet<>(); for (JSONArray array : cves) { for (int i = 0; i < array.length(); i++) { final JSONObject cve = array.getJSONObject(i).has("cve") ? array.getJSONObject(i).getJSONObject("cve") : array.getJSONObject(i); if (knownIds.add(cve.getString("id"))) { mergedJson.put(cve); } } } return mergedJson; } private Map sortCvesIntoYears(JSONArray jsonArray) { final Map yearCves = new HashMap<>(); for (int i = 0; i < jsonArray.length(); i++) { final JSONObject jsonObject = jsonArray.getJSONObject(i); final JSONObject cve = jsonObject.has("cve") ? jsonObject.getJSONObject("cve") : jsonObject; final String cveId = cve.getString("id"); final int year = Integer.parseInt(cveId.substring(4, 8)); yearCves.computeIfAbsent(year, k -> new JSONArray()) .put(jsonObject); } return yearCves; } private JSONArray parseCveItemsFromDownloadedYear(int year) { final File cveFile = new File(super.downloadIntoDirectory, year + ".json"); if (!cveFile.exists()) { return new JSONArray(); } try { final String content = FileUtils.readFileToString(cveFile, StandardCharsets.UTF_8); return new JSONArray(content); } catch (IOException e) { throw new RuntimeException("Unable to read NVD CVE year file " + cveFile.getAbsolutePath(), e); } } /** * @param offset 0-based CVE index */ private JSONObject downloadCvePage(int offset) { final URL pageUrl = super.getRemoteResourceLocationUrl(CVE_API_LIST_ALL, offset); final List response = super.downloader.fetchResponseBodyFromUrlAsList(pageUrl, Collections.singletonMap("apiKey", apiKey)); try { return new JSONObject(String.join("", response)); } catch (JSONException e) { throw new RuntimeException("Unable to parse NVD CVE API response: " + response + "\nRequest URL: " + pageUrl, e); } } /** * Date values must be entered in the extended ISO-8061 date/time format:
* [YYYY][“-”][MM][“-”][DD][“T”][HH][“:”][MM][“:”][SS][Z] * * @param offset 0-based CVE index * @param lastModStartDate ISO-8061 start date * @param lastModEndDate ISO-8061 end date * @throws IllegalArgumentException if the difference between dates is larger than 120 days */ private JSONObject downloadCvePage(int offset, Date lastModStartDate, Date lastModEndDate) { final long diff = lastModEndDate.getTime() - lastModStartDate.getTime(); if (diff > 120L * 24 * 60 * 60 * 1000) { throw new IllegalArgumentException("Difference between lastModStartDate and lastModEndDate must not be greater than 120 days"); } final String startDate = ISO_8601_DATE_FORMAT.format(lastModStartDate); final String endDate = ISO_8601_DATE_FORMAT.format(lastModEndDate); final URL pageUrl = super.getRemoteResourceLocationUrl(CVE_API_START_END_DATE, offset, startDate, endDate); final List response = super.downloader.fetchResponseBodyFromUrlAsList(pageUrl, Collections.singletonMap("apiKey", apiKey)); try { return new JSONObject(String.join("", response)); } catch (JSONException e) { throw new RuntimeException("Unable to parse NVD CVE API response: " + response + "\nRequest URL: " + pageUrl, e); } } private boolean isFullMirrorRequired() { final File[] downloadFiles = super.downloadIntoDirectory.listFiles(); if (downloadFiles == null) { LOG.info("No CVE JSON files found in download directory, performing full mirror"); return true; } final List files = Arrays.stream(downloadFiles).map(File::getName).collect(Collectors.toList()); if (files.stream().noneMatch(file -> file.endsWith(".json"))) { LOG.info("No CVE JSON files found in download directory, performing full mirror"); return true; } final int latestCheckYear = Calendar.getInstance().get(Calendar.YEAR) - 1; for (int year = 1999; year <= latestCheckYear; year++) { if (!files.contains(year + ".json")) { LOG.info("Missing CVE JSON file for year [{}], performing full mirror", year); return true; } } long days120 = 120L * 24 * 60 * 60 * 1000; final long directoryLastModified = getDownloadDirectoryLastModified(); if (super.isUpdatedAgeOlderThan(directoryLastModified, days120)) { LOG.info("Download directory last modified date is older than 120 days, performing full mirror"); return true; } return false; } @Override protected boolean additionalIsDownloadRequired() { if (isFullMirrorRequired()) { return true; } final long lastModified = getDownloadDirectoryLastModified(); final Date lastModifiedDate = new Date(lastModified); final Date now = new Date(TimeUtils.utcNow()); final JSONObject changes = downloadCvePage(0, lastModifiedDate, now); if (changes.has("totalResults") && changes.getInt("totalResults") > 0) { LOG.info("NVD CVE API reports [{}] new/changed CVEs since last download", changes.getInt("totalResults")); return true; } return false; } @Override public void setRemoteResourceLocation(String location, String url) { super.setRemoteResourceLocation(ResourceLocationNvd.valueOf(location), url); } public enum ResourceLocationNvd implements ResourceLocation { /** *
    *
  1. startIndex 0-based index of the first CVE to be returned in the response data
  2. *
*/ CVE_API_LIST_ALL("https://services.nvd.nist.gov/rest/json/cves/2.0?startIndex=%d"), /** * The maximum allowable range when using any date range parameters is 120 consecutive days.
* Values must be entered in the extended ISO-8061 date/time format:
* [YYYY][“-”][MM][“-”][DD][“T”][HH][“:”][MM][“:”][SS][Z] *
    *
  1. startIndex 0-based index of the first CVE to be returned in the response data
  2. *
  3. lastModStartDate the start date
  4. *
  5. lastModEndDate the end date
  6. *
*/ CVE_API_START_END_DATE("https://services.nvd.nist.gov/rest/json/cves/2.0/?startIndex=%d&lastModStartDate=%s&lastModEndDate=%s"); private final String defaultValue; ResourceLocationNvd(String defaultValue) { this.defaultValue = defaultValue; } @Override public String getDefault() { return this.defaultValue; } } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy