All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.metaeffekt.mirror.download.nvd.NvdCpeApiDownload Maven / Gradle / Ivy

There is a newer version: 0.132.0
Show newest version
/*
 * Copyright 2021-2024 the original author or authors.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.metaeffekt.mirror.download.nvd;

import com.metaeffekt.artifact.analysis.utils.FileUtils;
import com.metaeffekt.artifact.analysis.utils.TimeUtils;
import com.metaeffekt.mirror.download.documentation.MirrorMetadata;
import com.metaeffekt.mirror.Retry;
import com.metaeffekt.mirror.download.Download;
import com.metaeffekt.mirror.download.ResourceLocation;
import org.apache.commons.lang3.ObjectUtils;
import org.json.JSONArray;
import org.json.JSONException;
import org.json.JSONObject;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.File;
import java.io.IOException;
import java.net.URL;
import java.nio.charset.StandardCharsets;
import java.text.SimpleDateFormat;
import java.util.*;
import java.util.concurrent.atomic.AtomicReference;
import java.util.stream.Collectors;

/**
 * 

See the NVD CVE download for more details on the general data format and references of the data source. * The data feed specific to the CPE data is split into two parts: The CPE Dictionary and the CPE Matches with each their own endpoint:

*
    *
  • dictionary: https://services.nvd.nist.gov/rest/json/cpes/2.0
    * allows 10000 results per request: ~1000000 CPE entries --> ~100 requests
    * The dictionary contains a list of CPE that can be used to identify products. However, (most of) these CPEs do not contain any version information. *
  • *
  • match: https://services.nvd.nist.gov/rest/json/cpematch/2.0
    * allows 5000 results per request: ~450000 CPE entries --> ~90 requests
    * The CPE match contains the versions that are missing from the dictionary. It contains almost no new CPEs, but the version information is added to the previously found CPEs. *
  • *
*

Since our version matching algorithm does not rely solely on the versions provided by the CPE entries, * the relations between the different entries are not relevant in our context and the hierarchical structure from dict/match is flattened, * normalized and stored in a single data structure.

*

The relevant keys that are stored in the local files for each CPE are cpeName, cpeNameId, lastModified, created and * deprecated. Additionally, they can have titles and refs for several titles in different languages and references * with a title and a link.

*/ @MirrorMetadata(directoryName = "cpe-dict", mavenPropertyName = "nvdCpeDownload") public class NvdCpeApiDownload extends Download { private final static Logger LOG = LoggerFactory.getLogger(NvdCpeApiDownload.class); private static final SimpleDateFormat ISO_8601_DATE_FORMAT = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'"); /** * The NVD API limits authorized requests that provide an API Key to 50 per rolling 30 seconds window.
* 20ms are added to ensure the limit is not reached by unfortunate timings. */ private static final int API_DELAY_BETWEEN_AUTHORIZED_REQUESTS = (30 * 1000 / 50) + 20; /** * The NVD API limits unauthorized requests to 5 per rolling 30 seconds window.
* 400ms are added to ensure the limit is not reached by unfortunate timings. */ private static final int API_DELAY_BETWEEN_UNAUTHORIZED_REQUESTS = 30 * 1000 / 5 + 400; private String apiKey; private final List apiResponseDataToBeProcessed = Collections.synchronizedList(new ArrayList<>()); public NvdCpeApiDownload(File baseMirrorDirectory) { super(baseMirrorDirectory, NvdCpeApiDownload.class); } public NvdCpeApiDownload setApiKey(String apiKey) { this.apiKey = apiKey; return this; } /** * Based on NVD API User Workflow */ @Override protected void performDownload() { final boolean fullMirrorRequired = isFullMirrorRequired(); if (fullMirrorRequired) { LOG.info("Downloading initial NVD data from NVD API"); } else { LOG.info("Existing mirror detected. Downloading incremental NVD data from NVD API"); } final long baseSleepDuration = apiKey == null ? API_DELAY_BETWEEN_UNAUTHORIZED_REQUESTS : API_DELAY_BETWEEN_AUTHORIZED_REQUESTS; super.executor.setSize(4); super.executor.setDelay(baseSleepDuration); LOG.info("Requests {} be authorized with an API key, delay between requests [{}]", apiKey == null ? "will not" : "will", TimeUtils.formatTimeDiff(baseSleepDuration)); final long lastModified = getDownloadDirectoryLastModified(); final Date lastModifiedDate = new Date(lastModified); final Date now = new Date(TimeUtils.utcNow()); downloadCpeDictionaryApiData(fullMirrorRequired, lastModifiedDate, now); downloadCpeMatchApiData(fullMirrorRequired, lastModifiedDate, now); } private void downloadCpeDictionaryApiData(boolean fullMirrorRequired, Date lastModifiedDate, Date now) { LOG.info("Downloading NVD CPE Dictionary API data..."); downloadCpeApiDataFromSource(fullMirrorRequired, lastModifiedDate, now, ResourceLocationNvd.CPE_API_LIST_ALL, ResourceLocationNvd.CPE_API_START_END_DATE); LOG.info("Finished processing NVD CPE Dictionary API data"); } private void downloadCpeMatchApiData(boolean fullMirrorRequired, Date lastModifiedDate, Date now) { LOG.info("Downloading NVD CPE Match API data..."); downloadCpeApiDataFromSource(fullMirrorRequired, lastModifiedDate, now, ResourceLocationNvd.CPE_MATCH_API_LIST_ALL, ResourceLocationNvd.CPE_MATCH_API_START_END_DATE); LOG.info("Finished processing NVD CPE Match API data"); } private void downloadCpeApiDataFromSource(boolean fullMirrorRequired, Date lastModifiedDate, Date now, ResourceLocationNvd cpeApiListAll, ResourceLocationNvd cpeApiStartEndDate) { createDownloadThreads(fullMirrorRequired, lastModifiedDate, now, cpeApiListAll, cpeApiStartEndDate); // check at least 5 times if the executor is still running for (int i = 0; i < 5; i++) { processResponseDataUntilDone(); } } private void processResponseDataUntilDone() { super.executor.start(); do { final List copy; synchronized (apiResponseDataToBeProcessed) { if (apiResponseDataToBeProcessed.size() >= 4) { copy = new ArrayList<>(apiResponseDataToBeProcessed); apiResponseDataToBeProcessed.clear(); } else { copy = new ArrayList<>(); } } if (!copy.isEmpty()) { this.processApiCpeItems(copy); } try { Thread.sleep(1000); } catch (InterruptedException ignored) { } } while (super.executor.isRunning()); final List copy; synchronized (apiResponseDataToBeProcessed) { if (!apiResponseDataToBeProcessed.isEmpty()) { copy = new ArrayList<>(apiResponseDataToBeProcessed); apiResponseDataToBeProcessed.clear(); } else { copy = new ArrayList<>(); } } if (!copy.isEmpty()) { this.processApiCpeItems(copy); } } private void createDownloadThreads(boolean fullMirrorRequired, Date lastModifiedDate, Date now, ResourceLocationNvd locationAll, ResourceLocationNvd locationStartEndDate) { super.executor.submit(() -> { final JSONObject json = downloadCpeDecideWhatTimeFrame(fullMirrorRequired, lastModifiedDate, now, 0, locationAll, locationStartEndDate); final int totalResults = json.getInt("totalResults"); final int resultsPerPage = json.getInt("resultsPerPage"); int currentStartIndex = 0; if (totalResults == 0) { LOG.info("No CPEs found for the given range."); return; } LOG.info("Downloaded CPEs [{}] to [{}] of [{}]", currentStartIndex, currentStartIndex + resultsPerPage, totalResults); appendJsonToProcessResponseCache(json); while (currentStartIndex < totalResults) { currentStartIndex += resultsPerPage; final int finalCurrentStartIndex = currentStartIndex; super.executor.submit(() -> { final JSONObject subJson = downloadCpeDecideWhatTimeFrame(fullMirrorRequired, lastModifiedDate, now, finalCurrentStartIndex, locationAll, locationStartEndDate); final int subTotalResults = subJson.getInt("totalResults"); final int subResultsPerPage = subJson.getInt("resultsPerPage"); LOG.info("Downloaded CPEs [{}] to [{}] of [{}]", finalCurrentStartIndex, finalCurrentStartIndex + subResultsPerPage, subTotalResults); appendJsonToProcessResponseCache(subJson); }); } }); } private void appendJsonToProcessResponseCache(JSONObject json) { final JSONArray arr = ObjectUtils.firstNonNull(json.optJSONArray("products"), json.optJSONArray("matchStrings")); if (arr == null) { LOG.error("Unable to find 'products' or 'matchStrings' in the JSON response: {}", json.keySet()); } else { synchronized (apiResponseDataToBeProcessed) { apiResponseDataToBeProcessed.add(arr); } } } private JSONObject downloadCpeDecideWhatTimeFrame(boolean fullMirrorRequired, Date lastModifiedDate, Date now, int offset, ResourceLocationNvd locationAll, ResourceLocationNvd locationStartEndDate) { final AtomicReference json = new AtomicReference<>(); new Retry(() -> { if (fullMirrorRequired) { json.set(downloadCpePage(offset, locationAll)); } else { json.set(downloadCpePage(offset, lastModifiedDate, now, locationStartEndDate)); } }) .withDelay((int) (API_DELAY_BETWEEN_UNAUTHORIZED_REQUESTS * 1.5d)) // fallback to the unauthorized API access delay in case of fail .onException(Exception.class) .retryCount(8) .run(); return json.get(); } private void processApiCpeItems(Collection cpesArrays) { final List convertedProducts = new ArrayList<>(); for (JSONArray products : cpesArrays) { final JSONArray convertedProductArray = new JSONArray(); for (int i = 0; i < products.length(); i++) { final JSONObject product = products.getJSONObject(i); final JSONObject unwrapped = unwrapCpeEntry(product); final JSONObject converted = convertCpeMatchToCpeDictItem(unwrapped); convertedProductArray.put(converted); } convertedProducts.add(convertedProductArray); } final Map yearCves = new HashMap<>(); for (JSONArray cpesArray : convertedProducts) { final Map byYear = sortCpesIntoYears(cpesArray); for (Map.Entry yearEntry : byYear.entrySet()) { final JSONArray appendArray = yearCves.computeIfAbsent(yearEntry.getKey(), k -> new JSONArray()); for (int i = 0; i < yearEntry.getValue().length(); i++) { appendArray.put(yearEntry.getValue().getJSONObject(i)); } } } final int sizeBefore = cpesArrays.stream().mapToInt(JSONArray::length).sum(); final int sizeAfter = yearCves.values().stream().mapToInt(JSONArray::length).sum(); if (sizeBefore != sizeAfter) { LOG.error("Dropped at least one CPE whilst sorting CPEs into yearly files: [{}] -> [{}]", sizeBefore, sizeAfter); } processApiCpeItems(yearCves); } private void processApiCpeItems(Map yearCves) { if (yearCves.size() == 0) { LOG.warn("No CPEs to process from the API."); return; } LOG.info("Processing CPE data from years: [{}]", yearCves.entrySet().stream().sorted(Map.Entry.comparingByValue(Comparator.comparing(JSONArray::length).reversed())).map(e -> e.getKey() + " = " + e.getValue().length()).collect(Collectors.joining("; "))); for (Map.Entry entry : yearCves.entrySet()) { final int year = entry.getKey(); final JSONArray cpes = entry.getValue(); final JSONArray existingJson = parseCpeItemsFromDownloadedYear(year); final JSONArray mergedJson = mergeCpeItems(Arrays.asList(cpes, existingJson)); LOG.info("Year: [{}], merging existing with downloaded [{} + {} --> {}]", year, existingJson.length(), cpes.length(), mergedJson.length()); final File cveFile = new File(super.downloadIntoDirectory, year + ".json"); try { FileUtils.write(cveFile, mergedJson.toString(), StandardCharsets.UTF_8); } catch (IOException e) { throw new RuntimeException("Unable to write NVD CPE year file " + cveFile.getAbsolutePath(), e); } } } private JSONObject convertCpeMatchToCpeDictItem(JSONObject cpe) { if (!cpe.has("criteria") || !cpe.has("matchCriteriaId") || !cpe.has("lastModified") || !cpe.has("created")) { return cpe; } return new JSONObject() .put("deprecated", false) .put("cpeName", cpe.getString("criteria")) .put("cpeNameId", cpe.getString("matchCriteriaId")) .put("lastModified", cpe.getString("lastModified")) .put("created", cpe.getString("created")); // TODO: validate that the following are not needed: // this should not be necessary, as the matches IDs are guaranteed to be present in the CPE Dictionary API: //.put("matches", cpe.optJSONArray("matches")); } private static JSONObject unwrapCpeEntry(JSONObject cpeMatch) { if (cpeMatch.has("cpe")) { return cpeMatch.getJSONObject("cpe"); } else if (cpeMatch.has("matchString")) { return cpeMatch.getJSONObject("matchString"); } return cpeMatch; } private JSONArray parseCpeItemsFromDownloadedYear(int year) { final File cveFile = new File(super.downloadIntoDirectory, year + ".json"); if (!cveFile.exists()) { return new JSONArray(); } try { final String content = FileUtils.readFileToString(cveFile, StandardCharsets.UTF_8); return new JSONArray(content); } catch (IOException e) { throw new RuntimeException("Unable to read NVD CPE year file " + cveFile.getAbsolutePath(), e); } } private Map sortCpesIntoYears(JSONArray cpesArray) { final Map yearCves = new HashMap<>(); if (cpesArray == null) { LOG.warn("No CPEs to process from the API while sorting CPEs into years."); return yearCves; } for (int i = 0; i < cpesArray.length(); i++) { final JSONObject cpe = cpesArray.getJSONObject(i); if (!cpe.has("created")) { throw new RuntimeException("CPE entry does not provide 'created' timestamp: " + cpe); } final String createdTimestamp = cpe.getString("created"); // e.q. 2007-08-23T21:05:57.937 final int year = Integer.parseInt(createdTimestamp.substring(0, 4)); if (year < 1999 || year > 9999) { LOG.warn("CPE entry most likely has invalid year [{}]", cpe); } yearCves.computeIfAbsent(year, k -> new JSONArray()) .put(cpe); } final int countBefore = cpesArray.length(); final int countAfter = yearCves.values().stream().mapToInt(JSONArray::length).sum(); if (countBefore != countAfter) { LOG.warn("CPEs were lost during sorting into years: [{} --> {}]", countBefore, countAfter); } return yearCves; } private JSONArray mergeCpeItems(List cpes) { final JSONArray mergedJson = new JSONArray(); final Set knownIds = new HashSet<>(); for (JSONArray array : cpes) { for (int i = 0; i < array.length(); i++) { final JSONObject cve = unwrapCpeEntry(array.getJSONObject(i)); final String id = ObjectUtils.firstNonNull(cve.optString("cpeNameId", null), cve.optString("matchCriteriaId", null)); if (knownIds.add(id)) { mergedJson.put(cve); } else if (id == null) { LOG.warn("CPE entry does not provide 'cpeNameId' or 'matchCriteriaId' - skipping: [{}]", cve); } else { LOG.debug("CPE entry with ID [{}] already exists - skipping: [{}]", id, cve); } } } return mergedJson; } private JSONObject downloadCpePage(int offset, ResourceLocationNvd baseLocation) { final URL pageUrl = super.getRemoteResourceLocationUrl(baseLocation, offset); final List response = super.downloader.fetchResponseBodyFromUrlAsList(pageUrl, Collections.singletonMap("apiKey", apiKey)); try { return new JSONObject(String.join("", response)); } catch (JSONException e) { throw new RuntimeException("Unable to parse NVD CPE API response: " + response + "\nRequest URL: " + pageUrl, e); } } private JSONObject downloadCpePage(int offset, Date lastModStartDate, Date lastModEndDate, ResourceLocationNvd baseLocation) { final long diff = lastModEndDate.getTime() - lastModStartDate.getTime(); if (diff > 120L * 24 * 60 * 60 * 1000) { throw new IllegalArgumentException("Difference between lastModStartDate and lastModEndDate must not be greater than 120 days"); } final String startDate = ISO_8601_DATE_FORMAT.format(lastModStartDate); final String endDate = ISO_8601_DATE_FORMAT.format(lastModEndDate); final URL pageUrl = super.getRemoteResourceLocationUrl(baseLocation, offset, startDate, endDate); final List response = super.downloader.fetchResponseBodyFromUrlAsList(pageUrl, Collections.singletonMap("apiKey", apiKey)); try { return new JSONObject(String.join("", response)); } catch (JSONException e) { throw new RuntimeException("Unable to parse NVD CVE API response: " + response + "\nRequest URL: " + pageUrl, e); } } private boolean isFullMirrorRequired() { final File[] downloadFiles = super.downloadIntoDirectory.listFiles(); if (downloadFiles == null) { LOG.info("No CPE JSON files found in download directory, performing full mirror"); return true; } final List files = Arrays.stream(downloadFiles).map(File::getName).collect(Collectors.toList()); if (files.stream().noneMatch(file -> file.endsWith(".json"))) { LOG.info("No CPE JSON files found in download directory, performing full mirror"); return true; } final int latestCheckYear = Calendar.getInstance().get(Calendar.YEAR) - 1; for (int year = 2007; year <= latestCheckYear; year++) { if (!files.contains(year + ".json")) { LOG.info("Missing CPE JSON file for year [{}], performing full mirror", year); return true; } } final long days120 = 120L * 24 * 60 * 60 * 1000; final long directoryLastModified = getDownloadDirectoryLastModified(); if (super.isUpdatedAgeOlderThan(directoryLastModified, days120)) { LOG.info("Download directory last modified date is older than 120 days, performing full mirror"); return true; } return false; } @Override protected boolean additionalIsDownloadRequired() { if (isFullMirrorRequired()) { return true; } final long lastModified = getDownloadDirectoryLastModified(); final Date lastModifiedDate = new Date(lastModified); final Date now = new Date(TimeUtils.utcNow()); final JSONObject changesMatch = downloadCpePage(0, lastModifiedDate, now, ResourceLocationNvd.CPE_MATCH_API_START_END_DATE); if (changesMatch.has("totalResults") && changesMatch.getInt("totalResults") > 0) { LOG.info("NVD CPE Match API reports [{}] new/changed entries since last download", changesMatch.getInt("totalResults")); return true; } final JSONObject changesDict = downloadCpePage(0, lastModifiedDate, now, ResourceLocationNvd.CPE_API_START_END_DATE); if (changesDict.has("totalResults") && changesDict.getInt("totalResults") > 0) { LOG.info("NVD CPE Dictionary API reports [{}] new/changed entries since last download", changesDict.getInt("totalResults")); return true; } return false; } @Override public void setRemoteResourceLocation(String location, String url) { super.setRemoteResourceLocation(ResourceLocationNvd.valueOf(location), url); } public enum ResourceLocationNvd implements ResourceLocation { /** *
    *
  1. startIndex 0-based index of the first CPE to be returned in the response data
  2. *
*/ CPE_API_LIST_ALL("https://services.nvd.nist.gov/rest/json/cpes/2.0?startIndex=%d"), /** * The maximum allowable range when using any date range parameters is 120 consecutive days.
* Values must be entered in the extended ISO-8061 date/time format:
* [YYYY][“-”][MM][“-”][DD][“T”][HH][“:”][MM][“:”][SS][Z] *
    *
  1. startIndex 0-based index of the first CPE to be returned in the response data
  2. *
  3. lastModStartDate the start date
  4. *
  5. lastModEndDate the end date
  6. *
*/ CPE_API_START_END_DATE("https://services.nvd.nist.gov/rest/json/cpes/2.0?startIndex=%d&lastModStartDate=%s&lastModEndDate=%s"), /** *
    *
  1. startIndex 0-based index of the first CPE to be returned in the response data
  2. *
*/ CPE_MATCH_API_LIST_ALL("https://services.nvd.nist.gov/rest/json/cpematch/2.0?startIndex=%d"), /** * The maximum allowable range when using any date range parameters is 120 consecutive days.
* Values must be entered in the extended ISO-8061 date/time format:
* [YYYY][“-”][MM][“-”][DD][“T”][HH][“:”][MM][“:”][SS][Z] *
    *
  1. startIndex 0-based index of the first CPE to be returned in the response data
  2. *
  3. lastModStartDate the start date
  4. *
  5. lastModEndDate the end date
  6. *
*/ CPE_MATCH_API_START_END_DATE("https://services.nvd.nist.gov/rest/json/cpematch/2.0?startIndex=%d&lastModStartDate=%s&lastModEndDate=%s"); private final String defaultValue; ResourceLocationNvd(String defaultValue) { this.defaultValue = defaultValue; } @Override public String getDefault() { return this.defaultValue; } } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy