com.metaeffekt.mirror.index.nvd.CpeDictionaryIndex Maven / Gradle / Ivy
/*
* Copyright 2021-2024 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.metaeffekt.mirror.index.nvd;
import com.metaeffekt.artifact.analysis.utils.CustomCollectors;
import com.metaeffekt.artifact.analysis.utils.FileUtils;
import com.metaeffekt.artifact.analysis.utils.StringUtils;
import com.metaeffekt.artifact.analysis.utils.TimeUtils;
import com.metaeffekt.artifact.analysis.vulnerability.CommonEnumerationUtil;
import com.metaeffekt.mirror.download.documentation.MirrorMetadata;
import com.metaeffekt.mirror.contents.base.Reference;
import com.metaeffekt.mirror.download.nvd.CpeDictionaryDownload;
import com.metaeffekt.mirror.index.Index;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.TextField;
import org.json.JSONArray;
import org.json.JSONException;
import org.json.JSONObject;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;
import us.springett.parsers.cpe.Cpe;
import javax.xml.parsers.ParserConfigurationException;
import java.io.File;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.*;
@Deprecated
@MirrorMetadata(directoryName = "cpe-dict", mavenPropertyName = "cpeDictionaryIndex", deprecated = true)
public class CpeDictionaryIndex extends Index {
private final static Logger LOG = LoggerFactory.getLogger(CpeDictionaryIndex.class);
public CpeDictionaryIndex(File baseMirrorDirectory) {
super(baseMirrorDirectory, CpeDictionaryIndex.class, Collections.singletonList(CpeDictionaryDownload.class), Collections.emptyList());
}
@Override
protected Map createIndexDocuments() {
final Map documents = new HashMap<>();
final File downloadsDirectory = super.requiredDownloads[0];
final File cpeDictionary = new File(downloadsDirectory, "cpe-dict.xml");
if (!cpeDictionary.isFile()) {
throw new RuntimeException("Could not find cpe-dict.xml in " + downloadsDirectory.getAbsolutePath());
}
final File cpeMatch = new File(downloadsDirectory, "cpe-match.json");
if (!cpeMatch.isFile()) {
throw new RuntimeException("Could not find cpe-match.json in " + downloadsDirectory.getAbsolutePath());
}
parseCpeMatch(cpeMatch, documents);
// to prevent all the CPE information being stored in memory, write the existing documents into the index
super.writeIndexDocuments(documents);
parseCpeDict(cpeDictionary, documents);
return documents;
}
private void parseCpeMatch(File cpeMatch, Map documents) {
LOG.info("Parsing CPE match in {}", cpeMatch.getAbsolutePath());
final JSONObject json;
final String jsonContents;
try {
jsonContents = FileUtils.readFileToString(cpeMatch, StandardCharsets.UTF_8);
} catch (IOException e) {
throw new RuntimeException("Unable to read file contents during indexing: " + cpeMatch.getAbsolutePath(), e);
}
try {
json = new JSONObject(jsonContents);
} catch (JSONException e) {
throw new RuntimeException("Unable to parse JSON document during indexing: " + cpeMatch.getAbsolutePath(), e);
}
final JSONArray matchesCpeList = json.getJSONArray("matches");
final int loggingStepSize = matchesCpeList.length() / 4;
for (int i = 0; i < matchesCpeList.length(); i++) {
final JSONObject match = matchesCpeList.getJSONObject(i);
final JSONArray cpeNames = match.optJSONArray("cpe_name");
if (cpeNames != null && cpeNames.length() > 0) {
for (int j = 0; j < cpeNames.length(); j++) {
final JSONObject cpeNameObject = cpeNames.getJSONObject(j);
final String cpe = cpeNameObject.getString("cpe23Uri");
appendSingleCpeFromCpeMatchToIndexableDocuments(cpe, documents);
}
}
final String cpe = match.optString("cpe23Uri");
if (StringUtils.hasText(cpe)) {
appendSingleCpeFromCpeMatchToIndexableDocuments(cpe, documents);
}
if (i % loggingStepSize == 0) {
LOG.info("Created documents for [{} / {}] CPE entries", i, matchesCpeList.length());
}
}
}
private void appendSingleCpeFromCpeMatchToIndexableDocuments(String cpeString, Map documents) {
final Optional optionalCpe = CommonEnumerationUtil.parseCpe(cpeString);
if (!optionalCpe.isPresent()) {
return;
}
final Cpe cpe = optionalCpe.get();
final Document doc = createDocumentFromCpe(cpe);
// NVD 1.0 mirror does not provide some of the data that the 2.x API has: [nvdId, deprecated, titles, references]
doc.add(new TextField("nvdId", UUID.nameUUIDFromBytes(cpe.toCpe23FS().getBytes()).toString(), Field.Store.YES));
doc.add(new TextField("deprecated", "false", Field.Store.YES));
doc.add(new TextField("title", "", Field.Store.YES));
doc.add(new TextField("references", "[]", Field.Store.YES));
final String cpe23FS = cpe.toCpe23FS();
if (!documents.containsKey(cpe23FS)) {
documents.put(cpe23FS, doc);
}
}
private void parseCpeDict(File cpeDict, Map documents) {
LOG.info("Parsing CPE dictionary in {}", cpeDict.getAbsolutePath());
final org.w3c.dom.Document xmlDocument;
try {
final String xmlContents = FileUtils.readFileToString(cpeDict, StandardCharsets.UTF_8);
xmlDocument = parseXmlDocument(xmlContents);
} catch (IOException e) {
throw new RuntimeException("Unable to read file contents during indexing: " + cpeDict.getAbsolutePath(), e);
} catch (ParserConfigurationException | SAXException e) {
throw new RuntimeException("Unable to parse XML document during indexing: " + cpeDict.getAbsolutePath(), e);
}
final NodeList cpeItems = xmlDocument.getElementsByTagName("cpe-item");
final int loggingStepSize = cpeItems.getLength() / 4;
for (int i = 0; i < cpeItems.getLength(); i++) {
final org.w3c.dom.Element cpeItem = (org.w3c.dom.Element) cpeItems.item(i);
final String cpe22 = cpeItem.getAttribute("name");
final Optional optionalCpe = CommonEnumerationUtil.parseCpe(cpe22);
if (!optionalCpe.isPresent()) {
return;
}
final Cpe cpe = optionalCpe.get();
final Document doc = createDocumentFromCpe(cpe);
final String title = cpeItem.getElementsByTagName("title").item(0).getTextContent();
final List references = new ArrayList<>();
final NodeList referencesNode = cpeItem.getElementsByTagName("references");
if (referencesNode.getLength() > 0) {
final NodeList referenceNodes = ((org.w3c.dom.Element) referencesNode.item(0)).getElementsByTagName("reference");
for (int j = 0; j < referenceNodes.getLength(); j++) {
final org.w3c.dom.Element referenceNode = (org.w3c.dom.Element) referenceNodes.item(j);
final String href = referenceNode.getAttribute("href");
final String text = referenceNode.getTextContent();
references.add(Reference.fromTitleAndUrl(text, href));
}
}
final String nvdId = cpeItem.getElementsByTagName("meta:item-metadata").item(0).getAttributes().getNamedItem("nvd-id").getTextContent();
final String modificationDate = cpeItem.getElementsByTagName("meta:item-metadata").item(0).getAttributes().getNamedItem("modification-date").getTextContent();
if (cpeItem.getElementsByTagName("meta:item-metadata").item(0).getAttributes().getNamedItem("deprecated-by-nvd-id") != null) {
final String deprecatedByNvdId = cpeItem.getElementsByTagName("meta:item-metadata").item(0).getAttributes().getNamedItem("deprecated-by-nvd-id").getTextContent();
doc.add(new TextField("deprecatedByNvdId", deprecatedByNvdId, Field.Store.YES));
doc.add(new TextField("deprecated", "true", Field.Store.YES));
} else {
doc.add(new TextField("deprecated", "false", Field.Store.YES));
}
if (StringUtils.hasText(title)) {
doc.add(new TextField("title", new JSONArray().put(new JSONObject().put("title", title).put("lang", "en")).toString(), Field.Store.YES));
}
doc.add(new TextField("references", references.stream().map(Reference::toJson).collect(CustomCollectors.toJsonArray()).toString(), Field.Store.YES));
doc.add(new TextField("nvdId", nvdId, Field.Store.YES));
final Date parsedUpdateDate = TimeUtils.tryParse(modificationDate);
if (parsedUpdateDate != null) {
doc.add(new TextField("updateDate", String.valueOf(parsedUpdateDate.getTime()), Field.Store.YES));
}
documents.put(cpe.toCpe23FS(), doc);
if (i % loggingStepSize == 0) {
LOG.info("Created documents for [{} / {}] CPE entries", i, cpeItems.getLength());
}
}
}
private static Document createDocumentFromCpe(Cpe cpe) {
final Document doc = new Document();
doc.add(new TextField("part", cpe.getPart().getAbbreviation(), Field.Store.YES));
doc.add(new TextField("vendor", cpe.getVendor(), Field.Store.YES));
doc.add(new TextField("product", cpe.getProduct(), Field.Store.YES));
doc.add(new TextField("version", cpe.getVersion(), Field.Store.YES));
doc.add(new TextField("update", cpe.getUpdate(), Field.Store.YES));
doc.add(new TextField("edition", cpe.getEdition(), Field.Store.YES));
doc.add(new TextField("language", cpe.getLanguage(), Field.Store.YES));
doc.add(new TextField("sw_edition", cpe.getSwEdition(), Field.Store.YES));
doc.add(new TextField("target_sw", cpe.getTargetSw(), Field.Store.YES));
doc.add(new TextField("target_hw", cpe.getTargetHw(), Field.Store.YES));
doc.add(new TextField("other", cpe.getOther(), Field.Store.YES));
doc.add(new TextField("cpe23Uri", cpe.toCpe23FS(), Field.Store.YES));
return doc;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy