com.metaeffekt.mirror.download.advisor.CertSeiDownload Maven / Gradle / Ivy
/*
* Copyright 2021-2024 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.metaeffekt.mirror.download.advisor;
import com.metaeffekt.artifact.analysis.utils.FileUtils;
import com.metaeffekt.mirror.download.documentation.MirrorMetadata;
import com.metaeffekt.mirror.Retry;
import com.metaeffekt.mirror.download.Download;
import com.metaeffekt.mirror.download.ResourceLocation;
import org.json.JSONObject;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.File;
import java.net.URL;
import java.nio.charset.StandardCharsets;
import java.util.*;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
import static com.metaeffekt.mirror.download.advisor.CertSeiDownload.ResourceLocationCertSei.NOTES_API_URL;
import static com.metaeffekt.mirror.download.advisor.CertSeiDownload.ResourceLocationCertSei.SUMMARY_API_URL;
/**
* References:
*
* - Data provider: Software Engineering Institute
* - API reference:
* Vulnerability Note API - VINCE - VulWiki
*
* In order for the downloader to find what documents exist and need to be downloaded, the summary-API is used.
* These summaries are provided by year and in a JSON format.
* The starting year is 2000, which is why all summaries are downloaded from 2000 to the current year.
* These summaries contain all notes (advisors) that were published in that year. The ID of each note is used to download
* the individual notes.
* To store the files locally, one directory per year is created, where the advisors are written to. The filename is
* VU#[ID].json
.
* .
* ├── 2000
* │ ├── VU#111677.json
* │ ├── VU#17566.json
* ...
* └── 2022
* ├── VU#119678.json
* ...
*
*/
@MirrorMetadata(directoryName = "certsei", mavenPropertyName = "certSeiDownload")
public class CertSeiDownload extends Download {
private final static Logger LOG = LoggerFactory.getLogger(CertSeiDownload.class);
private final List availableArchiveYears;
private Map> cachedChangedYearlySummaryVUIDs;
public CertSeiDownload(File baseMirrorDirectory) {
super(baseMirrorDirectory, CertSeiDownload.class);
availableArchiveYears = Collections.unmodifiableList(IntStream.range(2000, Calendar.getInstance().get(Calendar.YEAR) + 1)
.boxed()
.collect(Collectors.toList()));
}
@Override
protected void performDownload() {
if (cachedChangedYearlySummaryVUIDs == null) {
cachedChangedYearlySummaryVUIDs = fetchChangedYearlySummaryVUIDs();
}
for (Map.Entry> yearVUIDs : cachedChangedYearlySummaryVUIDs.entrySet()) {
final int year = yearVUIDs.getKey();
final File yearDirectory = new File(super.downloadIntoDirectory, String.valueOf(yearVUIDs.getKey()));
for (final String note : yearVUIDs.getValue().stream().sorted().collect(Collectors.toList())) {
super.executor.submit(() -> {
final String idNumber = note.replace("VU#", "");
final URL requestUrl = getRemoteResourceLocationUrl(NOTES_API_URL, idNumber);
final File noteFile = new File(yearDirectory, note + ".json");
long remoteFileSize = super.downloader.fetchFileSizeFromUrl(requestUrl);
if (!noteFile.exists() || noteFile.length() != remoteFileSize) {
LOG.info("Fetching note [{}] for year [{}]", note, yearVUIDs.getKey());
new Retry<>(() -> super.downloader.fetchResponseBodyFromUrlToFile(requestUrl, noteFile))
.withValidator((result) -> {
if (!noteFile.exists()) {
LOG.warn("Note file does not exist: {}", noteFile.getAbsolutePath());
return false;
}
try {
return FileUtils.readLines(noteFile, StandardCharsets.UTF_8).get(0).startsWith("{");
} catch (Exception e) {
LOG.warn("Content of note file is not valid JSON: {}", noteFile.getAbsolutePath());
return false;
}
})
.onException(Exception.class)
.withDelay(5000)
.retryCount(10)
.run();
} else {
LOG.info("Note [{}] for year [{}] is already up-to-date", note, yearVUIDs.getKey());
}
});
}
final URL requestUrl = getRemoteResourceLocationUrl(SUMMARY_API_URL, year);
long remoteFileSize = super.downloader.fetchFileSizeFromUrl(requestUrl);
super.propertyFiles.set(super.downloadIntoDirectory, "info", InfoFileAttributes.CERT_SEI_PREFIX.getKey() + "summary-" + year, remoteFileSize);
}
super.executor.setDelay(5);
super.executor.start();
try {
super.executor.join();
} catch (InterruptedException e) {
throw new RuntimeException("Failed to wait for executor to finish", e);
}
cachedChangedYearlySummaryVUIDs = null;
}
@Override
protected boolean additionalIsDownloadRequired() {
cachedChangedYearlySummaryVUIDs = fetchChangedYearlySummaryVUIDs();
if (cachedChangedYearlySummaryVUIDs.size() > 0) {
LOG.info("Found [{}] changed yearly summary VUIDs, download required", cachedChangedYearlySummaryVUIDs.size());
return true;
} else {
return false;
}
}
private Map> fetchChangedYearlySummaryVUIDs() {
final Map> yearlyNotes = Collections.synchronizedMap(new LinkedHashMap<>());
for (Integer year : availableArchiveYears) {
super.executor.submit(() -> {
final long previousFileSize = super.propertyFiles.getLong(super.downloadIntoDirectory, "info", InfoFileAttributes.CERT_SEI_PREFIX.getKey() + "summary-" + year)
.orElse(0L);
final URL requestUrl = getRemoteResourceLocationUrl(SUMMARY_API_URL, year);
long remoteFileSize = super.downloader.fetchFileSizeFromUrl(requestUrl);
boolean shouldFetchYear;
if (previousFileSize == 0) {
shouldFetchYear = true;
} else {
shouldFetchYear = remoteFileSize != previousFileSize;
}
if (shouldFetchYear) {
final List notesListJsonString = super.downloader.fetchResponseBodyFromUrlAsList(requestUrl);
if (notesListJsonString.isEmpty() || notesListJsonString.get(0).charAt(0) != '{') {
throw new RuntimeException("Invalid response from CERT-SEI summary API: " + notesListJsonString.get(0));
}
final JSONObject notesListJson = new JSONObject(String.join("", notesListJsonString));
LOG.info("Year [{}] has changed, contains [{}] notes", year, notesListJson.optString("count", "unknown"));
final List noteAppender = yearlyNotes.computeIfAbsent(year, k -> new ArrayList<>());
notesListJson.getJSONArray("notes").toList().stream()
.map(String::valueOf)
.forEach(noteAppender::add);
}
});
}
super.executor.start();
try {
super.executor.join();
} catch (InterruptedException e) {
throw new RuntimeException("Failed to wait for executor to finish", e);
}
return yearlyNotes;
}
@Override
public void setRemoteResourceLocation(String location, String url) {
super.setRemoteResourceLocation(ResourceLocationCertSei.valueOf(location), url);
}
public enum ResourceLocationCertSei implements ResourceLocation {
/**
* Summary URL for a given year.
*
* %d
Summary year (example: 2020
)
*
*/
SUMMARY_API_URL("https://kb.cert.org/vuls/api/%d/summary/"),
/**
* URL for a certain note.
*
* %s
Note Identifier (example: 257161
)
*
*/
NOTES_API_URL("https://kb.cert.org/vuls/api/%s/");
private final String defaultValue;
ResourceLocationCertSei(String defaultValue) {
this.defaultValue = defaultValue;
}
@Override
public String getDefault() {
return this.defaultValue;
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy