All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.metaeffekt.mirror.download.advisor.CertSeiDownload Maven / Gradle / Ivy

The newest version!
/*
 * Copyright 2021-2024 the original author or authors.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.metaeffekt.mirror.download.advisor;

import com.metaeffekt.artifact.analysis.utils.FileUtils;
import com.metaeffekt.mirror.download.documentation.MirrorMetadata;
import com.metaeffekt.mirror.Retry;
import com.metaeffekt.mirror.download.Download;
import com.metaeffekt.mirror.download.ResourceLocation;
import org.json.JSONObject;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.File;
import java.net.URL;
import java.nio.charset.StandardCharsets;
import java.util.*;
import java.util.stream.Collectors;
import java.util.stream.IntStream;

import static com.metaeffekt.mirror.download.advisor.CertSeiDownload.ResourceLocationCertSei.NOTES_API_URL;
import static com.metaeffekt.mirror.download.advisor.CertSeiDownload.ResourceLocationCertSei.SUMMARY_API_URL;

/**
 * 

References:

* *

In order for the downloader to find what documents exist and need to be downloaded, the summary-API is used. * These summaries are provided by year and in a JSON format. * The starting year is 2000, which is why all summaries are downloaded from 2000 to the current year.

*

These summaries contain all notes (advisors) that were published in that year. The ID of each note is used to download * the individual notes.

*

To store the files locally, one directory per year is created, where the advisors are written to. The filename is * VU#[ID].json.

*
.
 * ├── 2000
 * │   ├── VU#111677.json
 * │   ├── VU#17566.json
 * ...
 * └── 2022
 *     ├── VU#119678.json
 * ...
 * 
*/ @MirrorMetadata(directoryName = "certsei", mavenPropertyName = "certSeiDownload") public class CertSeiDownload extends Download { private final static Logger LOG = LoggerFactory.getLogger(CertSeiDownload.class); private final List availableArchiveYears; private Map> cachedChangedYearlySummaryVUIDs; public CertSeiDownload(File baseMirrorDirectory) { super(baseMirrorDirectory, CertSeiDownload.class); availableArchiveYears = Collections.unmodifiableList(IntStream.range(2000, Calendar.getInstance().get(Calendar.YEAR) + 1) .boxed() .collect(Collectors.toList())); } @Override protected void performDownload() { if (cachedChangedYearlySummaryVUIDs == null) { cachedChangedYearlySummaryVUIDs = fetchChangedYearlySummaryVUIDs(); } for (Map.Entry> yearVUIDs : cachedChangedYearlySummaryVUIDs.entrySet()) { final int year = yearVUIDs.getKey(); final File yearDirectory = new File(super.downloadIntoDirectory, String.valueOf(yearVUIDs.getKey())); for (final String note : yearVUIDs.getValue().stream().sorted().collect(Collectors.toList())) { super.executor.submit(() -> { final String idNumber = note.replace("VU#", ""); final URL requestUrl = getRemoteResourceLocationUrl(NOTES_API_URL, idNumber); final File noteFile = new File(yearDirectory, note + ".json"); long remoteFileSize = super.downloader.fetchFileSizeFromUrl(requestUrl); if (!noteFile.exists() || noteFile.length() != remoteFileSize) { LOG.info("Fetching note [{}] for year [{}]", note, yearVUIDs.getKey()); new Retry<>(() -> super.downloader.fetchResponseBodyFromUrlToFile(requestUrl, noteFile)) .withValidator((result) -> { if (!noteFile.exists()) { LOG.warn("Note file does not exist: {}", noteFile.getAbsolutePath()); return false; } try { return FileUtils.readLines(noteFile, StandardCharsets.UTF_8).get(0).startsWith("{"); } catch (Exception e) { LOG.warn("Content of note file is not valid JSON: {}", noteFile.getAbsolutePath()); return false; } }) .onException(Exception.class) .withDelay(5000) .retryCount(10) .run(); } else { LOG.info("Note [{}] for year [{}] is already up-to-date", note, yearVUIDs.getKey()); } }); } final URL requestUrl = getRemoteResourceLocationUrl(SUMMARY_API_URL, year); long remoteFileSize = super.downloader.fetchFileSizeFromUrl(requestUrl); super.propertyFiles.set(super.downloadIntoDirectory, "info", InfoFileAttributes.CERT_SEI_PREFIX.getKey() + "summary-" + year, remoteFileSize); } super.executor.setDelay(5); super.executor.start(); try { super.executor.join(); } catch (InterruptedException e) { throw new RuntimeException("Failed to wait for executor to finish", e); } cachedChangedYearlySummaryVUIDs = null; } @Override protected boolean additionalIsDownloadRequired() { cachedChangedYearlySummaryVUIDs = fetchChangedYearlySummaryVUIDs(); if (cachedChangedYearlySummaryVUIDs.size() > 0) { LOG.info("Found [{}] changed yearly summary VUIDs, download required", cachedChangedYearlySummaryVUIDs.size()); return true; } else { return false; } } private Map> fetchChangedYearlySummaryVUIDs() { final Map> yearlyNotes = Collections.synchronizedMap(new LinkedHashMap<>()); for (Integer year : availableArchiveYears) { super.executor.submit(() -> { final long previousFileSize = super.propertyFiles.getLong(super.downloadIntoDirectory, "info", InfoFileAttributes.CERT_SEI_PREFIX.getKey() + "summary-" + year) .orElse(0L); final URL requestUrl = getRemoteResourceLocationUrl(SUMMARY_API_URL, year); long remoteFileSize = super.downloader.fetchFileSizeFromUrl(requestUrl); boolean shouldFetchYear; if (previousFileSize == 0) { shouldFetchYear = true; } else { shouldFetchYear = remoteFileSize != previousFileSize; } if (shouldFetchYear) { final List notesListJsonString = super.downloader.fetchResponseBodyFromUrlAsList(requestUrl); if (notesListJsonString.isEmpty() || notesListJsonString.get(0).charAt(0) != '{') { throw new RuntimeException("Invalid response from CERT-SEI summary API: " + notesListJsonString.get(0)); } final JSONObject notesListJson = new JSONObject(String.join("", notesListJsonString)); LOG.info("Year [{}] has changed, contains [{}] notes", year, notesListJson.optString("count", "unknown")); final List noteAppender = yearlyNotes.computeIfAbsent(year, k -> new ArrayList<>()); notesListJson.getJSONArray("notes").toList().stream() .map(String::valueOf) .forEach(noteAppender::add); } }); } super.executor.start(); try { super.executor.join(); } catch (InterruptedException e) { throw new RuntimeException("Failed to wait for executor to finish", e); } return yearlyNotes; } @Override public void setRemoteResourceLocation(String location, String url) { super.setRemoteResourceLocation(ResourceLocationCertSei.valueOf(location), url); } public enum ResourceLocationCertSei implements ResourceLocation { /** * Summary URL for a given year. *
    *
  1. %d Summary year (example: 2020)
  2. *
*/ SUMMARY_API_URL("https://kb.cert.org/vuls/api/%d/summary/"), /** * URL for a certain note. *
    *
  1. %s Note Identifier (example: 257161)
  2. *
*/ NOTES_API_URL("https://kb.cert.org/vuls/api/%s/"); private final String defaultValue; ResourceLocationCertSei(String defaultValue) { this.defaultValue = defaultValue; } @Override public String getDefault() { return this.defaultValue; } } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy