All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.owasp.dependencycheck.analyzer.CPEAnalyzer Maven / Gradle / Ivy

/*
 * This file is part of dependency-check-core.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 * Copyright (c) 2012 Jeremy Long. All Rights Reserved.
 */
package org.owasp.dependencycheck.analyzer;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException;
import java.net.URLEncoder;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.TimeUnit;
import java.util.stream.Collectors;
import javax.annotation.concurrent.ThreadSafe;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.builder.CompareToBuilder;
import org.apache.commons.lang3.builder.EqualsBuilder;
import org.apache.commons.lang3.builder.HashCodeBuilder;
import org.apache.commons.lang3.mutable.MutableInt;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.jetbrains.annotations.NotNull;
import org.owasp.dependencycheck.Engine;
import org.owasp.dependencycheck.analyzer.exception.AnalysisException;
import org.owasp.dependencycheck.data.cpe.CpeMemoryIndex;
import org.owasp.dependencycheck.data.cpe.Fields;
import org.owasp.dependencycheck.data.cpe.IndexEntry;
import org.owasp.dependencycheck.data.cpe.IndexException;
import org.owasp.dependencycheck.data.lucene.LuceneUtils;
import org.owasp.dependencycheck.data.lucene.SearchFieldAnalyzer;
import org.owasp.dependencycheck.data.nvdcve.CveDB;
import org.owasp.dependencycheck.data.nvdcve.DatabaseException;
import org.owasp.dependencycheck.data.update.cpe.CpePlus;
import org.owasp.dependencycheck.dependency.Confidence;
import org.owasp.dependencycheck.dependency.Dependency;
import org.owasp.dependencycheck.dependency.Evidence;
import org.owasp.dependencycheck.dependency.EvidenceType;
import org.owasp.dependencycheck.dependency.naming.CpeIdentifier;
import org.owasp.dependencycheck.exception.InitializationException;
import org.owasp.dependencycheck.utils.DependencyVersion;
import org.owasp.dependencycheck.utils.DependencyVersionUtil;
import org.owasp.dependencycheck.utils.Settings;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import us.springett.parsers.cpe.Cpe;
import us.springett.parsers.cpe.CpeBuilder;
import us.springett.parsers.cpe.exceptions.CpeValidationException;
import us.springett.parsers.cpe.values.Part;

/**
 * CPEAnalyzer is a utility class that takes a project dependency and attempts
 * to discern if there is an associated CPE. It uses the evidence contained
 * within the dependency to search the Lucene index.
 *
 * @author Jeremy Long
 */
@ThreadSafe
public class CPEAnalyzer extends AbstractAnalyzer {

    /**
     * The Logger.
     */
    private static final Logger LOGGER = LoggerFactory.getLogger(CPEAnalyzer.class);
    /**
     * The maximum number of query results to return.
     */
    private static final int MAX_QUERY_RESULTS = 25;
    /**
     * The weighting boost to give terms when constructing the Lucene query.
     */
    private static final int WEIGHTING_BOOST = 1;
    /**
     * A string representation of a regular expression defining characters
     * utilized within the CPE Names. Note, the :/ are included so URLs are
     * passed into the Lucene query so that the specialized tokenizer can parse
     * them.
     */
    private static final String CLEANSE_CHARACTER_RX = "[^A-Za-z0-9 ._:/-]";
    /**
     * A string representation of a regular expression used to remove all but
     * alpha characters.
     */
    private static final String CLEANSE_NONALPHA_RX = "[^A-Za-z]*";
    /**
     * UTF-8 character set name.
     */
    private static final String UTF8 = StandardCharsets.UTF_8.name();
    /**
     * The URL to search the NVD CVE data at NIST. This is used by calling:
     * 
String.format(NVD_SEARCH_URL, vendor, product, version);
*/ public static final String NVD_SEARCH_URL = "https://nvd.nist.gov/vuln/search/results?form_type=Advanced&" + "results_type=overview&search_type=all&cpe_vendor=cpe%%3A%%2F%%3A%1$s&cpe_product=cpe%%3A%%2F%%3A%1$s%%3A%2$s&" + "cpe_version=cpe%%3A%%2F%%3A%1$s%%3A%2$s%%3A%3$s"; /** * The URL to search the NVD CVE data at NIST. This is used by calling: *
String.format(NVD_SEARCH_URL, vendor, product);
*/ public static final String NVD_SEARCH_BROAD_URL = "https://nvd.nist.gov/vuln/search/results?form_type=Advanced&" + "results_type=overview&search_type=all&cpe_vendor=cpe%%3A%%2F%%3A%1$s&cpe_product=cpe%%3A%%2F%%3A%1$s%%3A%2$s"; /** * The CPE in memory index. */ private CpeMemoryIndex cpe; /** * The CVE Database. */ private CveDB cve; /** * The list of ecosystems to skip during analysis. These are skipped because * there is generally a more accurate vulnerability analyzer in the * pipeline. */ private List skipEcosystems; /** * A reference to the suppression analyzer; for timing reasons we need to * test for suppressions immediately after identifying the match because a * higher confidence match on a FP can mask a lower confidence, yet valid * match. */ private CpeSuppressionAnalyzer suppression; /** * Returns the name of this analyzer. * * @return the name of this analyzer. */ @Override public String getName() { return "CPE Analyzer"; } /** * Returns the analysis phase that this analyzer should run in. * * @return the analysis phase that this analyzer should run in. */ @Override public AnalysisPhase getAnalysisPhase() { return AnalysisPhase.IDENTIFIER_ANALYSIS; } /** * Creates the CPE Lucene Index. * * @param engine a reference to the dependency-check engine * @throws InitializationException is thrown if there is an issue opening * the index. */ @Override public void prepareAnalyzer(Engine engine) throws InitializationException { super.prepareAnalyzer(engine); try { this.open(engine.getDatabase()); } catch (IOException ex) { LOGGER.debug("Exception initializing the Lucene Index", ex); throw new InitializationException("An exception occurred initializing the Lucene Index", ex); } catch (DatabaseException ex) { LOGGER.debug("Exception accessing the database", ex); throw new InitializationException("An exception occurred accessing the database", ex); } final String[] tmp = engine.getSettings().getArray(Settings.KEYS.ECOSYSTEM_SKIP_CPEANALYZER); if (tmp == null) { skipEcosystems = new ArrayList<>(); } else { LOGGER.debug("Skipping CPE Analysis for {}", StringUtils.join(tmp, ",")); skipEcosystems = Arrays.asList(tmp); } suppression = new CpeSuppressionAnalyzer(); suppression.initialize(engine.getSettings()); suppression.prepareAnalyzer(engine); } /** * Opens the data source. * * @param cve a reference to the NVD CVE database * @throws IOException when the Lucene directory to be queried does not * exist or is corrupt. * @throws DatabaseException when the database throws an exception. This * usually occurs when the database is in use by another process. */ public void open(CveDB cve) throws IOException, DatabaseException { this.cve = cve; this.cpe = CpeMemoryIndex.getInstance(); try { final long creationStart = System.currentTimeMillis(); cpe.open(cve, this.getSettings()); final long creationSeconds = TimeUnit.MILLISECONDS.toSeconds(System.currentTimeMillis() - creationStart); LOGGER.info("Created CPE Index ({} seconds)", creationSeconds); } catch (IndexException ex) { LOGGER.debug("IndexException", ex); throw new DatabaseException(ex); } } /** * Closes the data sources. */ @Override public void closeAnalyzer() { if (cpe != null) { cpe.close(); cpe = null; } } /** * Searches the data store of CPE entries, trying to identify the CPE for * the given dependency based on the evidence contained within. The * dependency passed in is updated with any identified CPE values. * * @param dependency the dependency to search for CPE entries on * @throws CorruptIndexException is thrown when the Lucene index is corrupt * @throws IOException is thrown when an IOException occurs * @throws ParseException is thrown when the Lucene query cannot be parsed * @throws AnalysisException thrown if the suppression rules failed */ protected void determineCPE(Dependency dependency) throws CorruptIndexException, IOException, ParseException, AnalysisException { final Map vendors = new HashMap<>(); final Map products = new HashMap<>(); final Set previouslyFound = new HashSet<>(); for (Confidence confidence : Confidence.values()) { collectTerms(vendors, dependency.getIterator(EvidenceType.VENDOR, confidence)); LOGGER.debug("vendor search: {}", vendors); collectTerms(products, dependency.getIterator(EvidenceType.PRODUCT, confidence)); LOGGER.debug("product search: {}", products); if (!vendors.isEmpty() && !products.isEmpty()) { final List entries = searchCPE(vendors, products, dependency.getVendorWeightings(), dependency.getProductWeightings()); if (entries == null) { continue; } boolean identifierAdded = false; //filtering on score seems to create additional FN - but maybe we should continue to investigate this option // StandardDeviation stdev = new StandardDeviation(); // float maxScore = 0; // for (IndexEntry e : entries) { // if (previouslyFound.contains(e.getDocumentId())) { // continue; // } // stdev.increment((double) e.getSearchScore()); // if (maxScore < e.getSearchScore()) { // maxScore = e.getSearchScore(); // } // } // double filter = maxScore - (stdev.getResult() * 5); for (IndexEntry e : entries) { if (previouslyFound.contains(e.getDocumentId()) /*|| (filter > 0 && e.getSearchScore() < filter)*/) { continue; } previouslyFound.add(e.getDocumentId()); //LOGGER.error("\"Verifying entry\",\"{}\",\"{}\",\"{}\",\"{}\",\"{}\",\"{}\"", dependency.getFileName(), //e.getVendor(), e.getProduct(), confidence.toString(), e.getSearchScore(), filter); if (verifyEntry(e, dependency)) { final String vendor = e.getVendor(); final String product = e.getProduct(); LOGGER.debug("identified vendor/product: {}/{}", vendor, product); identifierAdded |= determineIdentifiers(dependency, vendor, product, confidence); } } if (identifierAdded) { break; } } } } /** *

* Returns the text created by concatenating the text and the values from * the EvidenceCollection (filtered for a specific confidence). This * attempts to prevent duplicate terms from being added.

*

* Note, if the evidence is longer then 200 characters it will be * truncated.

* * @param terms the collection of terms * @param evidence an iterable set of evidence to concatenate */ @SuppressWarnings("null") protected void collectTerms(Map terms, Iterable evidence) { for (Evidence e : evidence) { String value = cleanseText(e.getValue()); if (StringUtils.isBlank(value)) { continue; } if (value.length() > 1000) { boolean trimmed = false; int pos = value.lastIndexOf(" ", 1000); if (pos > 0) { value = value.substring(0, pos); trimmed = true; } else { pos = value.lastIndexOf(".", 1000); } if (!trimmed) { if (pos > 0) { value = value.substring(0, pos); trimmed = true; } else { pos = value.lastIndexOf("-", 1000); } } if (!trimmed) { if (pos > 0) { value = value.substring(0, pos); trimmed = true; } else { pos = value.lastIndexOf("_", 1000); } } if (!trimmed) { if (pos > 0) { value = value.substring(0, pos); trimmed = true; } else { pos = value.lastIndexOf("/", 1000); } } if (!trimmed && pos > 0) { value = value.substring(0, pos); trimmed = true; } if (!trimmed) { value = value.substring(0, 1000); } } final MutableInt count = terms.get(value); if (count == null) { terms.put(value, new MutableInt(1)); } else { count.add(1); } } } /** *

* Searches the Lucene CPE index to identify possible CPE entries associated * with the supplied vendor, product, and version.

* *

* If either the vendorWeightings or productWeightings lists have been * populated this data is used to add weighting factors to the search.

* * @param vendor the text used to search the vendor field * @param product the text used to search the product field * @param vendorWeightings a list of strings to use to add weighting factors * to the vendor field * @param productWeightings Adds a list of strings that will be used to add * weighting factors to the product search * @return a list of possible CPE values */ protected List searchCPE(Map vendor, Map product, Set vendorWeightings, Set productWeightings) { final List ret = new ArrayList<>(MAX_QUERY_RESULTS); final String searchString = buildSearch(vendor, product, vendorWeightings, productWeightings); if (searchString == null) { return ret; } try { final Query query = cpe.parseQuery(searchString); final TopDocs docs = cpe.search(query, MAX_QUERY_RESULTS); for (ScoreDoc d : docs.scoreDocs) { //if (d.score >= minLuceneScore) { final Document doc = cpe.getDocument(d.doc); final IndexEntry entry = new IndexEntry(); entry.setDocumentId(d.doc); entry.setVendor(doc.get(Fields.VENDOR)); entry.setProduct(doc.get(Fields.PRODUCT)); entry.setSearchScore(d.score); // LOGGER.error("Explanation: ---------------------"); // LOGGER.error("Explanation: " + entry.getVendor() + " " + entry.getProduct() + " " + entry.getSearchScore()); // LOGGER.error("Explanation: " + searchString); // LOGGER.error("Explanation: " + cpe.explain(query, d.doc)); if (!ret.contains(entry)) { ret.add(entry); } //} } return ret; } catch (ParseException ex) { LOGGER.warn("An error occurred querying the CPE data. See the log for more details."); LOGGER.info("Unable to parse: {}", searchString, ex); } catch (IndexException ex) { LOGGER.warn("An error occurred resetting the CPE index searcher. See the log for more details."); LOGGER.info("Unable to reset the search analyzer", ex); } catch (IOException ex) { LOGGER.warn("An error occurred reading CPE data. See the log for more details."); LOGGER.info("IO Error with search string: {}", searchString, ex); } return null; } /** *

* Builds a Lucene search string by properly escaping data and constructing * a valid search query.

* *

* If either the possibleVendor or possibleProducts lists have been * populated this data is used to add weighting factors to the search string * generated.

* * @param vendor text to search the vendor field * @param product text to search the product field * @param vendorWeighting a list of strings to apply to the vendor to boost * the terms weight * @param productWeightings a list of strings to apply to the product to * boost the terms weight * @return the Lucene query */ protected String buildSearch(Map vendor, Map product, Set vendorWeighting, Set productWeightings) { final StringBuilder sb = new StringBuilder(); if (!appendWeightedSearch(sb, Fields.PRODUCT, product, productWeightings)) { return null; } sb.append(" AND "); if (!appendWeightedSearch(sb, Fields.VENDOR, vendor, vendorWeighting)) { return null; } return sb.toString(); } /** * This method constructs a Lucene query for a given field. The searchText * is split into separate words and if the word is within the list of * weighted words then an additional weighting is applied to the term as it * is appended into the query. * * @param sb a StringBuilder that the query text will be appended to. * @param field the field within the Lucene index that the query is * searching. * @param terms text used to construct the query. * @param weightedText a list of terms that will be considered higher * importance when searching. * @return if the append was successful. */ @SuppressWarnings("StringSplitter") private boolean appendWeightedSearch(StringBuilder sb, String field, Map terms, Set weightedText) { if (terms.isEmpty()) { return false; } sb.append(field).append(":("); boolean addSpace = false; boolean addedTerm = false; for (Map.Entry entry : terms.entrySet()) { final StringBuilder boostedTerms = new StringBuilder(); final int weighting = entry.getValue().intValue(); final String[] text = entry.getKey().split(" "); for (String word : text) { if (word.isEmpty()) { continue; } if (addSpace) { sb.append(" "); } else { addSpace = true; } addedTerm = true; if (LuceneUtils.isKeyword(word)) { sb.append("\""); LuceneUtils.appendEscapedLuceneQuery(sb, word); sb.append("\""); } else { LuceneUtils.appendEscapedLuceneQuery(sb, word); } final String boostTerm = findBoostTerm(word, weightedText); //The weighting is on a full phrase rather then at a term level for vendor or products //TODO - should the weighting be at a "word" level as opposed to phrase level? Or combined word and phrase? //remember the reason we are counting the frequency of "phrases" as opposed to terms is that //we need to keep the correct sequence of terms from the evidence so the term concatenating analyzer //works correctly and will causes searches to take spring framework and produce: spring springframework framework if (boostTerm != null) { sb.append("^").append(weighting + WEIGHTING_BOOST); if (!boostTerm.equals(word)) { boostedTerms.append(" ").append(boostTerm).append("^").append(weighting + WEIGHTING_BOOST); } } else if (weighting > 1) { sb.append("^").append(weighting); } } if (boostedTerms.length() > 0) { sb.append(boostedTerms); } } sb.append(")"); return addedTerm; } /** * Removes characters from the input text that are not used within the CPE * index. * * @param text is the text to remove the characters from. * @return the text having removed some characters. */ private String cleanseText(String text) { return text.replaceAll(CLEANSE_CHARACTER_RX, " "); } /** * Searches the collection of boost terms for the given term. The elements * are case insensitive matched using only the alpha-numeric contents of the * terms; all other characters are removed. * * @param term the term to search for * @param boost the collection of boost terms * @return the value identified */ private String findBoostTerm(String term, Set boost) { for (String entry : boost) { if (equalsIgnoreCaseAndNonAlpha(term, entry)) { return entry; } } return null; } /** * Compares two strings after lower casing them and removing the non-alpha * characters. * * @param l string one to compare. * @param r string two to compare. * @return whether or not the two strings are similar. */ private boolean equalsIgnoreCaseAndNonAlpha(String l, String r) { if (l == null || r == null) { return false; } final String left = l.replaceAll(CLEANSE_NONALPHA_RX, ""); final String right = r.replaceAll(CLEANSE_NONALPHA_RX, ""); return left.equalsIgnoreCase(right); } /** * Ensures that the CPE Identified matches the dependency. This validates * that the product, vendor, and version information for the CPE are * contained within the dependencies evidence. * * @param entry a CPE entry. * @param dependency the dependency that the CPE entries could be for. * @return whether or not the entry is valid. */ private boolean verifyEntry(final IndexEntry entry, final Dependency dependency) { boolean isValid = false; //TODO - does this nullify some of the fuzzy matching that happens in the lucene search? // for instance CPE some-component and in the evidence we have SomeComponent. if (collectionContainsString(dependency.getEvidence(EvidenceType.PRODUCT), entry.getProduct()) && collectionContainsString(dependency.getEvidence(EvidenceType.VENDOR), entry.getVendor())) { //&& collectionContainsVersion(dependency.getVersionEvidence(), entry.getVersion()) isValid = true; } return isValid; } /** * Used to determine if the EvidenceCollection contains a specific string. * * @param evidence an of evidence object to check * @param text the text to search for * @return whether or not the EvidenceCollection contains the string */ @SuppressWarnings("StringSplitter") private boolean collectionContainsString(Set evidence, String text) { //TODO - likely need to change the split... not sure if this will work for CPE with special chars if (text == null) { return false; } // Check if we have an exact match final String textLC = text.toLowerCase(); for (Evidence e : evidence) { if (e.getValue().toLowerCase().equals(textLC)) { return true; } } final String[] words = text.split("[\\s_-]+"); final List list = new ArrayList<>(); String tempWord = null; final CharArraySet stopWords = SearchFieldAnalyzer.getStopWords(); for (String word : words) { /* single letter words should be concatenated with the next word. so { "m", "core", "sample" } -> { "mcore", "sample" } */ if (tempWord != null) { list.add(tempWord + word); tempWord = null; } else if (word.length() <= 2) { tempWord = word; } else { if (stopWords.contains(word)) { continue; } list.add(word); } } if (tempWord != null) { if (!list.isEmpty()) { final String tmp = list.get(list.size() - 1) + tempWord; list.add(tmp); } else { list.add(tempWord); } } if (list.isEmpty()) { return false; } boolean isValid = true; // Prepare the evidence values, e.g. remove the characters we used for splitting final List evidenceValues = new ArrayList<>(evidence.size()); evidence.forEach((e) -> { evidenceValues.add(e.getValue().toLowerCase().replaceAll("[\\s_-]+", "")); }); for (String word : list) { word = word.toLowerCase(); boolean found = false; for (String e : evidenceValues) { if (e.contains(word)) { if ("http".equals(word) && e.contains("http:")) { continue; } found = true; break; } } isValid &= found; // if (!isValid) { // break; // } } return isValid; } /** * Analyzes a dependency and attempts to determine if there are any CPE * identifiers for this dependency. * * @param dependency The Dependency to analyze. * @param engine The analysis engine * @throws AnalysisException is thrown if there is an issue analyzing the * dependency. */ @Override protected void analyzeDependency(Dependency dependency, Engine engine) throws AnalysisException { if (skipEcosystems.contains(dependency.getEcosystem())) { return; } try { determineCPE(dependency); } catch (CorruptIndexException ex) { throw new AnalysisException("CPE Index is corrupt.", ex); } catch (IOException ex) { throw new AnalysisException("Failure opening the CPE Index.", ex); } catch (ParseException ex) { throw new AnalysisException("Unable to parse the generated Lucene query for this dependency.", ex); } } /** * Retrieves a list of CPE values from the CveDB based on the vendor and * product passed in. The list is then validated to find only CPEs that are * valid for the given dependency. It is possible that the CPE identified is * a best effort "guess" based on the vendor, product, and version * information. * * @param dependency the Dependency being analyzed * @param vendor the vendor for the CPE being analyzed * @param product the product for the CPE being analyzed * @param currentConfidence the current confidence being used during * analysis * @return true if an identifier was added to the dependency; * otherwise false * @throws UnsupportedEncodingException is thrown if UTF-8 is not supported * @throws AnalysisException thrown if the suppression rules failed */ @SuppressWarnings("StringSplitter") protected boolean determineIdentifiers(Dependency dependency, String vendor, String product, Confidence currentConfidence) throws UnsupportedEncodingException, AnalysisException { final CpeBuilder cpeBuilder = new CpeBuilder(); final Set cpePlusEntries = cve.getCPEs(vendor, product); final Set cpes = filterEcosystem(dependency.getEcosystem(), cpePlusEntries); if (cpes == null || cpes.isEmpty()) { return false; } DependencyVersion bestGuess; if ("Golang".equals(dependency.getEcosystem()) && dependency.getVersion() == null) { bestGuess = new DependencyVersion("*"); } else { bestGuess = new DependencyVersion("-"); } Confidence bestGuessConf = null; String bestGuessURL = null; boolean hasBroadMatch = false; final List collected = new ArrayList<>(); int maxDepth = 0; for (Cpe cpe : cpes) { final DependencyVersion dbVer = DependencyVersionUtil.parseVersion(cpe.getVersion(), true); if (dbVer != null) { final int count = dbVer.getVersionParts().size(); if (count > maxDepth) { maxDepth = count; } } } if (dependency.getVersion() != null && !dependency.getVersion().isEmpty()) { //we shouldn't always use the dependency version - in some cases this causes FP boolean useDependencyVersion = true; final CharArraySet stopWords = SearchFieldAnalyzer.getStopWords(); if (dependency.getName() != null && !dependency.getName().isEmpty()) { final String name = dependency.getName(); for (String word : product.split("[^a-zA-Z0-9]")) { useDependencyVersion &= name.contains(word) || stopWords.contains(word); } } if (useDependencyVersion) { //TODO - we need to filter this so that we only use this if something in the //dependency.getName() matches the vendor/product in some way final DependencyVersion depVersion = new DependencyVersion(dependency.getVersion()); if (depVersion.getVersionParts().size() > 0) { cpeBuilder.part(Part.APPLICATION).vendor(vendor).product(product); //Only semantic versions used in NVD and evidence may contain an update version if (maxDepth == 3 && depVersion.getVersionParts().size() == 4 && depVersion.getVersionParts().get(3).matches("^(v|beta|alpha|u|rc|m|20\\d\\d).*$")) { cpeBuilder.version(StringUtils.join(depVersion.getVersionParts().subList(0, 3), ".")); //when written - no update versions in the NVD start with v### - they all strip the v off if (depVersion.getVersionParts().get(3).matches("^v\\d.*$")) { cpeBuilder.update(depVersion.getVersionParts().get(3).substring(1)); } else { cpeBuilder.update(depVersion.getVersionParts().get(3)); } } else { cpeBuilder.version(depVersion.toString()); } try { final Cpe depCpe = cpeBuilder.build(); final String url = String.format(NVD_SEARCH_URL, URLEncoder.encode(vendor, UTF8), URLEncoder.encode(product, UTF8), URLEncoder.encode(depCpe.getVersion(), UTF8)); final IdentifierMatch match = new IdentifierMatch(depCpe, url, IdentifierConfidence.EXACT_MATCH, currentConfidence); collected.add(match); } catch (CpeValidationException ex) { throw new AnalysisException(String.format("Unable to create a CPE for %s:%s:%s", vendor, product, bestGuess.toString())); } } } } //TODO the following algorithm incorrectly identifies things as a lower version // if there lower confidence evidence when the current (highest) version number // is newer then anything in the NVD. for (Confidence conf : Confidence.values()) { for (Evidence evidence : dependency.getIterator(EvidenceType.VERSION, conf)) { final DependencyVersion evVer = DependencyVersionUtil.parseVersion(evidence.getValue(), true); if (evVer == null) { continue; } DependencyVersion evBaseVer = null; //Only semantic versions used in NVD and evidence may contain an update version if (maxDepth == 3 && evVer.getVersionParts().size() == 4) { final String update = evVer.getVersionParts().get(3); if (update.matches("^(v|beta|alpha|u|rc|m|20\\d\\d).*$")) { evBaseVer = new DependencyVersion(); evBaseVer.setVersionParts(evVer.getVersionParts().subList(0, 3)); } } //TODO - review and update for new JSON data for (Cpe vs : cpes) { final DependencyVersion dbVer = DependencyVersionUtil.parseVersion(vs.getVersion()); DependencyVersion dbVerUpdate = dbVer; if (vs.getUpdate() != null && !vs.getUpdate().isEmpty() && !vs.getUpdate().startsWith("*") && !vs.getUpdate().startsWith("-")) { dbVerUpdate = DependencyVersionUtil.parseVersion(vs.getVersion() + '.' + vs.getUpdate(), true); } if (dbVer == null) { //special case, no version specified - everything is vulnerable hasBroadMatch = true; final String url = String.format(NVD_SEARCH_BROAD_URL, URLEncoder.encode(vs.getVendor(), UTF8), URLEncoder.encode(vs.getProduct(), UTF8)); final IdentifierMatch match = new IdentifierMatch(vs, url, IdentifierConfidence.BROAD_MATCH, conf); collected.add(match); } else if (evVer.equals(dbVer)) { //yeah! exact match final String url = String.format(NVD_SEARCH_URL, URLEncoder.encode(vs.getVendor(), UTF8), URLEncoder.encode(vs.getProduct(), UTF8), URLEncoder.encode(vs.getVersion(), UTF8)); final IdentifierMatch match = new IdentifierMatch(vs, url, IdentifierConfidence.EXACT_MATCH, conf); collected.add(match); } else if (evBaseVer != null && evBaseVer.equals(dbVer) && (bestGuessConf == null || bestGuessConf.compareTo(conf) > 0)) { bestGuessConf = conf; bestGuess = dbVer; bestGuessURL = String.format(NVD_SEARCH_URL, URLEncoder.encode(vs.getVendor(), UTF8), URLEncoder.encode(vs.getProduct(), UTF8), URLEncoder.encode(vs.getVersion(), UTF8)); } else if (dbVerUpdate != null && evVer.getVersionParts().size() <= dbVerUpdate.getVersionParts().size() && evVer.matchesAtLeastThreeLevels(dbVerUpdate)) { if (bestGuessConf == null || bestGuessConf.compareTo(conf) > 0) { if (bestGuess.getVersionParts().size() < dbVer.getVersionParts().size()) { bestGuess = dbVer; bestGuessConf = conf; } } } } if ((bestGuessConf == null || bestGuessConf.compareTo(conf) > 0) && bestGuess.getVersionParts().size() < evVer.getVersionParts().size()) { bestGuess = evVer; bestGuessConf = conf; } } } cpeBuilder.part(Part.APPLICATION).vendor(vendor).product(product); if (maxDepth == 3 && bestGuess.getVersionParts().size() == 4 && bestGuess.getVersionParts().get(3).matches("^(v|beta|alpha|u|rc|m|20\\d\\d).*$")) { cpeBuilder.version(StringUtils.join(bestGuess.getVersionParts().subList(0, 3), ".")); //when written - no update versions in the NVD start with v### - they all strip the v off if (bestGuess.getVersionParts().get(3).matches("^v\\d.*$")) { cpeBuilder.update(bestGuess.getVersionParts().get(3).substring(1)); } else { cpeBuilder.update(bestGuess.getVersionParts().get(3)); } } else { cpeBuilder.version(bestGuess.toString()); } final Cpe guessCpe; try { guessCpe = cpeBuilder.build(); } catch (CpeValidationException ex) { throw new AnalysisException(String.format("Unable to create a CPE for %s:%s:%s", vendor, product, bestGuess.toString())); } if (!"-".equals(guessCpe.getVersion())) { String url = null; if (hasBroadMatch) { //if we have a broad match we can add the URL to the best guess. url = String.format(NVD_SEARCH_BROAD_URL, URLEncoder.encode(vendor, UTF8), URLEncoder.encode(product, UTF8)); } if (bestGuessURL != null) { url = bestGuessURL; } if (bestGuessConf == null) { bestGuessConf = Confidence.LOW; } final IdentifierMatch match = new IdentifierMatch(guessCpe, url, IdentifierConfidence.BEST_GUESS, bestGuessConf); collected.add(match); } boolean identifierAdded = false; if (!collected.isEmpty()) { Collections.sort(collected); final IdentifierConfidence bestIdentifierQuality = collected.get(0).getIdentifierConfidence(); final Confidence bestEvidenceQuality = collected.get(0).getEvidenceConfidence(); boolean addedNonGuess = false; final Confidence prevAddedConfidence = dependency.getVulnerableSoftwareIdentifiers().stream().map(id -> id.getConfidence()) .min(Comparator.comparing(Confidence::ordinal)) .orElse(Confidence.LOW); for (IdentifierMatch m : collected) { if (bestIdentifierQuality.equals(m.getIdentifierConfidence()) && bestEvidenceQuality.equals(m.getEvidenceConfidence())) { final CpeIdentifier i = m.getIdentifier(); if (bestIdentifierQuality == IdentifierConfidence.BEST_GUESS) { if (addedNonGuess) { continue; } i.setConfidence(Confidence.LOW); } else { i.setConfidence(bestEvidenceQuality); } if (prevAddedConfidence.compareTo(i.getConfidence()) < 0) { continue; } //TODO - while this gets the job down it is slow; consider refactoring dependency.addVulnerableSoftwareIdentifier(i); suppression.analyze(dependency, null); if (dependency.getVulnerableSoftwareIdentifiers().contains(i)) { identifierAdded = true; if (!addedNonGuess && bestIdentifierQuality != IdentifierConfidence.BEST_GUESS) { addedNonGuess = true; } } } } } return identifierAdded; } /** *

* Returns the setting key to determine if the analyzer is enabled.

* * @return the key for the analyzer's enabled property */ @Override protected String getAnalyzerEnabledSettingKey() { return Settings.KEYS.ANALYZER_CPE_ENABLED; } /** * Filters the given list of CPE Entries (plus ecosystem) for the given * dependencies ecosystem. * * @param ecosystem the dependencies ecosystem * @param entries the CPE Entries (plus ecosystem) * @return the filtered list of CPE entries */ private Set filterEcosystem(String ecosystem, Set entries) { if (entries == null || entries.isEmpty()) { return null; } if (ecosystem != null) { return entries.stream().filter((c) -> { if (c.getEcosystem() == null) { return true; } switch (c.getEcosystem()) { case JarAnalyzer.DEPENDENCY_ECOSYSTEM: case "java": return ecosystem.equals(JarAnalyzer.DEPENDENCY_ECOSYSTEM); case AssemblyAnalyzer.DEPENDENCY_ECOSYSTEM: case NugetconfAnalyzer.DEPENDENCY_ECOSYSTEM: case "asp.net": return ecosystem.equals(NugetconfAnalyzer.DEPENDENCY_ECOSYSTEM) || ecosystem.equals(AssemblyAnalyzer.DEPENDENCY_ECOSYSTEM); case RetireJsAnalyzer.DEPENDENCY_ECOSYSTEM: case "jquery": return ecosystem.equals(RetireJsAnalyzer.DEPENDENCY_ECOSYSTEM); case PythonDistributionAnalyzer.DEPENDENCY_ECOSYSTEM: return ecosystem.equals(PythonDistributionAnalyzer.DEPENDENCY_ECOSYSTEM); case CMakeAnalyzer.DEPENDENCY_ECOSYSTEM: case "borland_c++": case "c/c++": case "gnu_c++": return ecosystem.equals(CMakeAnalyzer.DEPENDENCY_ECOSYSTEM); case ComposerLockAnalyzer.DEPENDENCY_ECOSYSTEM: case "drupal": case "joomla": case "joomla!": case "moodle": case "phpcms": case "piwigo": case "simplesamlphp": case "symfony": case "typo3": return ecosystem.equals(ComposerLockAnalyzer.DEPENDENCY_ECOSYSTEM); case AbstractNpmAnalyzer.NPM_DEPENDENCY_ECOSYSTEM: case "node.js": case "nodejs": return ecosystem.equals(AbstractNpmAnalyzer.NPM_DEPENDENCY_ECOSYSTEM); case RubyBundleAuditAnalyzer.DEPENDENCY_ECOSYSTEM: case "rails": return ecosystem.equals(RubyBundleAuditAnalyzer.DEPENDENCY_ECOSYSTEM); case "perl": //TODO - if we add the elixir analyzer this needs to be removed case "elixir": case "delphi": return false; default: return true; } }).map(c -> c.getCpe()) .collect(Collectors.toSet()); } return entries.stream() .map(c -> c.getCpe()) .collect(Collectors.toSet()); } /** * The confidence whether the identifier is an exact match, or a best guess. */ private enum IdentifierConfidence { /** * An exact match for the CPE. */ EXACT_MATCH, /** * A best guess for the CPE. */ BEST_GUESS, /** * The entire vendor/product group must be added (without a guess at * version) because there is a CVE with a VS that only specifies * vendor/product. */ BROAD_MATCH } /** * A simple object to hold an identifier and carry information about the * confidence in the identifier. */ private static class IdentifierMatch implements Comparable { /** * The confidence whether this is an exact match, or a best guess. */ private IdentifierConfidence identifierConfidence; /** * The CPE identifier. */ private CpeIdentifier identifier; /** * Constructs an IdentifierMatch. * * @param cpe the CPE value for the match * @param url the URL of the identifier * @param identifierConfidence the confidence in the identifier: best * guess or exact match * @param evidenceConfidence the confidence of the evidence used to find * the identifier */ IdentifierMatch(Cpe cpe, String url, IdentifierConfidence identifierConfidence, Confidence evidenceConfidence) { this.identifier = new CpeIdentifier(cpe, url, evidenceConfidence); this.identifierConfidence = identifierConfidence; } // /** * Get the value of evidenceConfidence * * @return the value of evidenceConfidence */ public Confidence getEvidenceConfidence() { return this.identifier.getConfidence(); } /** * Set the value of evidenceConfidence * * @param evidenceConfidence new value of evidenceConfidence */ public void setEvidenceConfidence(Confidence evidenceConfidence) { this.identifier.setConfidence(evidenceConfidence); } /** * Get the value of confidence. * * @return the value of confidence */ public IdentifierConfidence getIdentifierConfidence() { return identifierConfidence; } /** * Set the value of confidence. * * @param confidence new value of confidence */ public void setIdentifierConfidence(IdentifierConfidence confidence) { this.identifierConfidence = confidence; } /** * Get the value of identifier. * * @return the value of identifier */ public CpeIdentifier getIdentifier() { return identifier; } /** * Set the value of identifier. * * @param identifier new value of identifier */ public void setIdentifier(CpeIdentifier identifier) { this.identifier = identifier; } // // /** * Standard toString() implementation. * * @return the string representation of the object */ @Override public String toString() { return "IdentifierMatch{ IdentifierConfidence=" + identifierConfidence + ", identifier=" + identifier + '}'; } /** * Standard hashCode() implementation. * * @return the hashCode */ @Override public int hashCode() { return new HashCodeBuilder(115, 303) .append(identifierConfidence) .append(identifier) .toHashCode(); } /** * Standard equals implementation. * * @param obj the object to compare * @return true if the objects are equal, otherwise false */ @Override public boolean equals(Object obj) { if (obj == null || !(obj instanceof IdentifierMatch)) { return false; } if (this == obj) { return true; } final IdentifierMatch other = (IdentifierMatch) obj; return new EqualsBuilder() .append(identifierConfidence, other.identifierConfidence) .append(identifier, other.identifier) .build(); } // /** * Standard implementation of compareTo that compares identifier * confidence, evidence confidence, and then the identifier. * * @param o the IdentifierMatch to compare to * @return the natural ordering of IdentifierMatch */ @Override public int compareTo(@NotNull IdentifierMatch o) { return new CompareToBuilder() .append(identifierConfidence, o.identifierConfidence) .append(identifier, o.identifier) .toComparison(); } } /** * Command line tool for querying the Lucene CPE Index. * * @param args not used */ public static void main(String[] args) { final Settings props = new Settings(); try (Engine en = new Engine(Engine.Mode.EVIDENCE_PROCESSING, props)) { en.openDatabase(false, false); final CPEAnalyzer analyzer = new CPEAnalyzer(); analyzer.initialize(props); analyzer.prepareAnalyzer(en); LOGGER.error("test"); System.out.println("Memory index query for ODC"); try (BufferedReader br = new BufferedReader(new InputStreamReader(System.in, StandardCharsets.UTF_8))) { while (true) { final Map vendor = new HashMap<>(); final Map product = new HashMap<>(); System.out.print("Vendor: "); String[] parts = br.readLine().split(" "); for (String term : parts) { final MutableInt count = vendor.get(term); if (count == null) { vendor.put(term, new MutableInt(0)); } else { count.add(1); } } System.out.print("Product: "); parts = br.readLine().split(" "); for (String term : parts) { final MutableInt count = product.get(term); if (count == null) { product.put(term, new MutableInt(0)); } else { count.add(1); } } final List list = analyzer.searchCPE(vendor, product, new HashSet<>(), new HashSet<>()); if (list == null || list.isEmpty()) { System.out.println("No results found"); } else { list.forEach((e) -> System.out.println(String.format("%s:%s (%f)", e.getVendor(), e.getProduct(), e.getSearchScore()))); } System.out.println(); System.out.println(); } } } catch (InitializationException | IOException ex) { System.err.println("Lucene ODC search tool failed:"); System.err.println(ex.getMessage()); } } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy