All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.metaeffekt.mirror.index.nvd.CpeDictionaryIndex Maven / Gradle / Ivy

The newest version!
/*
 * Copyright 2021-2024 the original author or authors.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.metaeffekt.mirror.index.nvd;

import com.metaeffekt.artifact.analysis.utils.CustomCollectors;
import com.metaeffekt.artifact.analysis.utils.FileUtils;
import com.metaeffekt.artifact.analysis.utils.StringUtils;
import com.metaeffekt.artifact.analysis.utils.TimeUtils;
import com.metaeffekt.artifact.analysis.vulnerability.CommonEnumerationUtil;
import com.metaeffekt.mirror.download.documentation.MirrorMetadata;
import com.metaeffekt.mirror.contents.base.Reference;
import com.metaeffekt.mirror.download.nvd.CpeDictionaryDownload;
import com.metaeffekt.mirror.index.Index;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.TextField;
import org.json.JSONArray;
import org.json.JSONException;
import org.json.JSONObject;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;
import us.springett.parsers.cpe.Cpe;

import javax.xml.parsers.ParserConfigurationException;
import java.io.File;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.*;

@Deprecated
@MirrorMetadata(directoryName = "cpe-dict", mavenPropertyName = "cpeDictionaryIndex", deprecated = true)
public class CpeDictionaryIndex extends Index {

    private final static Logger LOG = LoggerFactory.getLogger(CpeDictionaryIndex.class);

    public CpeDictionaryIndex(File baseMirrorDirectory) {
        super(baseMirrorDirectory, CpeDictionaryIndex.class, Collections.singletonList(CpeDictionaryDownload.class), Collections.emptyList());
    }

    @Override
    protected Map createIndexDocuments() {
        final Map documents = new HashMap<>();

        final File downloadsDirectory = super.requiredDownloads[0];

        final File cpeDictionary = new File(downloadsDirectory, "cpe-dict.xml");
        if (!cpeDictionary.isFile()) {
            throw new RuntimeException("Could not find cpe-dict.xml in " + downloadsDirectory.getAbsolutePath());
        }

        final File cpeMatch = new File(downloadsDirectory, "cpe-match.json");
        if (!cpeMatch.isFile()) {
            throw new RuntimeException("Could not find cpe-match.json in " + downloadsDirectory.getAbsolutePath());
        }

        parseCpeMatch(cpeMatch, documents);
        // to prevent all the CPE information being stored in memory, write the existing documents into the index
        super.writeIndexDocuments(documents);

        parseCpeDict(cpeDictionary, documents);

        return documents;
    }

    private void parseCpeMatch(File cpeMatch, Map documents) {
        LOG.info("Parsing CPE match in {}", cpeMatch.getAbsolutePath());

        final JSONObject json;

        final String jsonContents;
        try {
            jsonContents = FileUtils.readFileToString(cpeMatch, StandardCharsets.UTF_8);
        } catch (IOException e) {
            throw new RuntimeException("Unable to read file contents during indexing: " + cpeMatch.getAbsolutePath(), e);
        }
        try {
            json = new JSONObject(jsonContents);
        } catch (JSONException e) {
            throw new RuntimeException("Unable to parse JSON document during indexing: " + cpeMatch.getAbsolutePath(), e);
        }

        final JSONArray matchesCpeList = json.getJSONArray("matches");
        final int loggingStepSize = matchesCpeList.length() / 4;

        for (int i = 0; i < matchesCpeList.length(); i++) {
            final JSONObject match = matchesCpeList.getJSONObject(i);

            final JSONArray cpeNames = match.optJSONArray("cpe_name");
            if (cpeNames != null && cpeNames.length() > 0) {
                for (int j = 0; j < cpeNames.length(); j++) {
                    final JSONObject cpeNameObject = cpeNames.getJSONObject(j);
                    final String cpe = cpeNameObject.getString("cpe23Uri");

                    appendSingleCpeFromCpeMatchToIndexableDocuments(cpe, documents);
                }
            }

            final String cpe = match.optString("cpe23Uri");
            if (StringUtils.hasText(cpe)) {
                appendSingleCpeFromCpeMatchToIndexableDocuments(cpe, documents);
            }

            if (i % loggingStepSize == 0) {
                LOG.info("Created documents for [{} / {}] CPE entries", i, matchesCpeList.length());
            }
        }
    }

    private void appendSingleCpeFromCpeMatchToIndexableDocuments(String cpeString, Map documents) {
        final Optional optionalCpe = CommonEnumerationUtil.parseCpe(cpeString);
        if (!optionalCpe.isPresent()) {
            return;
        }

        final Cpe cpe = optionalCpe.get();
        final Document doc = createDocumentFromCpe(cpe);

        // NVD 1.0 mirror does not provide some of the data that the 2.x API has: [nvdId, deprecated, titles, references]
        doc.add(new TextField("nvdId", UUID.nameUUIDFromBytes(cpe.toCpe23FS().getBytes()).toString(), Field.Store.YES));

        doc.add(new TextField("deprecated", "false", Field.Store.YES));
        doc.add(new TextField("title", "", Field.Store.YES));
        doc.add(new TextField("references", "[]", Field.Store.YES));

        final String cpe23FS = cpe.toCpe23FS();
        if (!documents.containsKey(cpe23FS)) {
            documents.put(cpe23FS, doc);
        }
    }

    private void parseCpeDict(File cpeDict, Map documents) {
        LOG.info("Parsing CPE dictionary in {}", cpeDict.getAbsolutePath());

        final org.w3c.dom.Document xmlDocument;

        try {
            final String xmlContents = FileUtils.readFileToString(cpeDict, StandardCharsets.UTF_8);
            xmlDocument = parseXmlDocument(xmlContents);
        } catch (IOException e) {
            throw new RuntimeException("Unable to read file contents during indexing: " + cpeDict.getAbsolutePath(), e);
        } catch (ParserConfigurationException | SAXException e) {
            throw new RuntimeException("Unable to parse XML document during indexing: " + cpeDict.getAbsolutePath(), e);
        }

        final NodeList cpeItems = xmlDocument.getElementsByTagName("cpe-item");

        final int loggingStepSize = cpeItems.getLength() / 4;

        for (int i = 0; i < cpeItems.getLength(); i++) {
            final org.w3c.dom.Element cpeItem = (org.w3c.dom.Element) cpeItems.item(i);

            final String cpe22 = cpeItem.getAttribute("name");

            final Optional optionalCpe = CommonEnumerationUtil.parseCpe(cpe22);
            if (!optionalCpe.isPresent()) {
                return;
            }

            final Cpe cpe = optionalCpe.get();
            final Document doc = createDocumentFromCpe(cpe);


            final String title = cpeItem.getElementsByTagName("title").item(0).getTextContent();


            final List references = new ArrayList<>();
            final NodeList referencesNode = cpeItem.getElementsByTagName("references");

            if (referencesNode.getLength() > 0) {
                final NodeList referenceNodes = ((org.w3c.dom.Element) referencesNode.item(0)).getElementsByTagName("reference");

                for (int j = 0; j < referenceNodes.getLength(); j++) {
                    final org.w3c.dom.Element referenceNode = (org.w3c.dom.Element) referenceNodes.item(j);
                    final String href = referenceNode.getAttribute("href");
                    final String text = referenceNode.getTextContent();
                    references.add(Reference.fromTitleAndUrl(text, href));
                }
            }


            final String nvdId = cpeItem.getElementsByTagName("meta:item-metadata").item(0).getAttributes().getNamedItem("nvd-id").getTextContent();
            final String modificationDate = cpeItem.getElementsByTagName("meta:item-metadata").item(0).getAttributes().getNamedItem("modification-date").getTextContent();

            if (cpeItem.getElementsByTagName("meta:item-metadata").item(0).getAttributes().getNamedItem("deprecated-by-nvd-id") != null) {
                final String deprecatedByNvdId = cpeItem.getElementsByTagName("meta:item-metadata").item(0).getAttributes().getNamedItem("deprecated-by-nvd-id").getTextContent();

                doc.add(new TextField("deprecatedByNvdId", deprecatedByNvdId, Field.Store.YES));
                doc.add(new TextField("deprecated", "true", Field.Store.YES));
            } else {
                doc.add(new TextField("deprecated", "false", Field.Store.YES));
            }

            if (StringUtils.hasText(title)) {
                doc.add(new TextField("title", new JSONArray().put(new JSONObject().put("title", title).put("lang", "en")).toString(), Field.Store.YES));
            }
            doc.add(new TextField("references", references.stream().map(Reference::toJson).collect(CustomCollectors.toJsonArray()).toString(), Field.Store.YES));
            doc.add(new TextField("nvdId", nvdId, Field.Store.YES));

            final Date parsedUpdateDate = TimeUtils.tryParse(modificationDate);
            if (parsedUpdateDate != null) {
                doc.add(new TextField("updateDate", String.valueOf(parsedUpdateDate.getTime()), Field.Store.YES));
            }

            documents.put(cpe.toCpe23FS(), doc);

            if (i % loggingStepSize == 0) {
                LOG.info("Created documents for [{} / {}] CPE entries", i, cpeItems.getLength());
            }
        }
    }

    private static Document createDocumentFromCpe(Cpe cpe) {
        final Document doc = new Document();

        doc.add(new TextField("part", cpe.getPart().getAbbreviation(), Field.Store.YES));
        doc.add(new TextField("vendor", cpe.getVendor(), Field.Store.YES));
        doc.add(new TextField("product", cpe.getProduct(), Field.Store.YES));
        doc.add(new TextField("version", cpe.getVersion(), Field.Store.YES));
        doc.add(new TextField("update", cpe.getUpdate(), Field.Store.YES));
        doc.add(new TextField("edition", cpe.getEdition(), Field.Store.YES));
        doc.add(new TextField("language", cpe.getLanguage(), Field.Store.YES));
        doc.add(new TextField("sw_edition", cpe.getSwEdition(), Field.Store.YES));
        doc.add(new TextField("target_sw", cpe.getTargetSw(), Field.Store.YES));
        doc.add(new TextField("target_hw", cpe.getTargetHw(), Field.Store.YES));
        doc.add(new TextField("other", cpe.getOther(), Field.Store.YES));

        doc.add(new TextField("cpe23Uri", cpe.toCpe23FS(), Field.Store.YES));

        return doc;
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy