All Downloads are FREE. Search and download functionalities are using the official Maven repository.

boofcv.alg.scene.nister2006.RecognitionVocabularyTreeNister2006 Maven / Gradle / Ivy

/*
 * Copyright (c) 2024, Peter Abeles. All Rights Reserved.
 *
 * This file is part of BoofCV (http://boofcv.org).
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package boofcv.alg.scene.nister2006;

import boofcv.alg.scene.bow.BowDistanceTypes;
import boofcv.alg.scene.bow.BowMatch;
import boofcv.alg.scene.bow.BowUtils;
import boofcv.alg.scene.bow.InvertedFile;
import boofcv.alg.scene.vocabtree.HierarchicalVocabularyTree;
import boofcv.alg.scene.vocabtree.HierarchicalVocabularyTree.Node;
import boofcv.misc.BoofLambdas;
import boofcv.struct.ConfigLength;
import lombok.Getter;
import lombok.Setter;
import org.ddogleg.struct.*;
import org.jetbrains.annotations.Nullable;
import pabeles.concurrency.GrowArray;

import java.io.PrintStream;
import java.util.List;
import java.util.Set;

/**
 * Image recognition based off of [1] using inverted files. A {@link HierarchicalVocabularyTree} is assumed to have
 * been already trained. When an image is added to the database a TF-IDF descriptor is computed using the tree
 * and then added to the relevant tree's leaves. When an image is looked up its TF-IDF descriptor is found then
 * all images in the data base are found that share at least one leaf node. These candidate matches are then
 * compared against each other and scored using L2-Norm.
 *
 * 

Implementation Notes:
* This implementation is intended to produce output which is faithful to the original work [1] but has * several modifications internally where there has been an attempt to improve runtime performance, often * at the cost of an increase in memory consumption. A non-exhaustive set of deviations is listed below

*
    *
  • Taking inspiration from [2], this implementation has an explicit representation of the inverted * files in non-leaf nodes. This avoid an expensive graph traversal step and replaces it with a very fast * array look up.
  • *
  • Histogram weights are stored in inverted files instead of word counts. Allows more efficient error * computation.
  • *
* *

* [1] Nister, David, and Henrik Stewenius. "Scalable recognition with a vocabulary tree." * 2006 IEEE Computer Society Conference on Computer Vision and Pattern Recognition (CVPR'06). Vol. 2. Ieee, 2006.
* [2] Esteban Uriza, Francisco Gómez Fernández, and Martín Rais, "Efficient Large-scale Image Search With a Vocabulary * Tree", Image Processing On Line, 8 (2018), pp. 71–98 *

* * @author Peter Abeles */ @SuppressWarnings({"NullAway.Init"}) public class RecognitionVocabularyTreeNister2006 implements VerbosePrint { /** Vocabulary Tree */ public @Getter HierarchicalVocabularyTree tree; /** A node can be part of the descriptor if it's at least this far from the root node */ public int minimumDepthFromRoot = 0; /** * If a node has an inverted file list greater than this amount then it will be skipped when scoring. This * should be viewed as a last ditch effort when the query is too slow. If there are a 1,000,000 images in the * DB, then 20,000 seems to be a reasonable number. */ public ConfigLength maximumQueryImagesInNode = ConfigLength.relative(1.0, 1); /** User data associated with each node */ public final GrowArray invertedFiles = new GrowArray<>(InvertedFile::new, InvertedFile::reset); /** List of images added to the database */ protected @Getter final BigDogArray_I32 imagesDB = new BigDogArray_I32(100, 10000, BigDogGrowth.GROW_FIRST); /** Scores for all candidate images which have been sorted */ protected @Getter final DogArray matches = new DogArray<>(BowMatch::new, BowMatch::reset); /** Distance between two TF-IDF descriptors. L1 and L2 norms are provided */ protected @Getter @Setter TupleMapDistanceNorm distanceFunction = new TupleMapDistanceNorm.L2(); /** Stores a mapping from feature index to leaf ID */ protected @Getter final DogArray_I32 featureIdxToLeafID = new DogArray_I32(); //---------------- Internal Workspace // The "frequency" that nodes in the tree appear in this image protected final DogArray frequencies = new DogArray<>(Frequency::new, Frequency::reset); // For lookup. One element for every image in the database DogArray_I32 imageIdx_to_match = new DogArray_I32(); DogArray_I32 nodeIdx_to_match = new DogArray_I32(); // temporary storage for an image TF-IDF descriptor DogArray_F32 tmpDescWeights = new DogArray_F32(); DogArray_I32 tmpDescWords = new DogArray_I32(); // If not null then print verbose information here @Nullable PrintStream verbose; /** * Configures the tree by adding LeafData to all the leaves in the tree then saves a reference for future use * * @param tree Three which is to be used as the database. Saved internally. */ public void initializeTree( HierarchicalVocabularyTree tree ) { this.tree = tree; clearImages(); } /** * Removes all images from the database. */ public void clearImages() { imagesDB.reset(); // Removes the old leaf data and replaces it with empty structures invertedFiles.reset(); if (tree != null) invertedFiles.resize(tree.nodes.size); } /** * Adds a new image to the database. * * @param imageID The image's unique ID for later reference * @param imageFeatures Feature descriptors from an image */ public void addImage( int imageID, List imageFeatures ) { if (imageFeatures.isEmpty()) return; int imageIdx = imagesDB.size; imagesDB.append(imageID); // compute a descriptor for this image while adding it to the leaves describe(imageFeatures, tmpDescWeights, tmpDescWords); for (int wordIdx = 0; wordIdx < tmpDescWords.size; wordIdx++) { int word = tmpDescWords.get(wordIdx); invertedFiles.get(word).addImage(imageIdx, tmpDescWeights.get(wordIdx)); } } /** * Looks up the best BowMatch from the database. The list of all potential matches can be accessed by calling * {@link #getMatches()}. * * @param queryImage Set of feature descriptors from the query image * @param filter Filter which can be used to reject matches that the user doesn't want returned. False = reject. * @param limit Maximum number of matches it will return. * @return The best matching image with score from the database */ public boolean query( List queryImage, @Nullable BoofLambdas.FilterInt filter, int limit ) { matches.reset(); // Can't BowMatch to anything if it's empty if (queryImage.isEmpty()) { return false; } findAndScoreMatches(queryImage); if (matches.isEmpty()) return false; if (verbose != null) verbose.println("raw matches.size=" + matches.size); // Book keeping for (int i = 0; i < matches.size(); i++) { BowMatch m = matches.get(i); // Undo changes and make sure all elements are -1 again imageIdx_to_match.set(m.identification, -1); // m.identification is overloaded earlier and actually stores the index m.identification = imagesDB.get(m.identification); } BowUtils.filterAndSortMatches(matches, filter, limit); return matches.size > 0; } /** * Uses the inverted file for each word to create a list of potential matches while scoring the matches * efficiently */ protected void findAndScoreMatches( List queryImage ) { // Don't use a node if it will degrade the runtime performance too much by considering too many images // This will also degrade the quality of query results int maximumInvertedFileLength = maximumQueryImagesInNode.computeI(imagesDB.size); // Create a description of this image and collect potential matches from leaves describe(queryImage, tmpDescWeights, tmpDescWords); // NOTE: It's assumed imageIdx_to_match is full of -1 imageIdx_to_match.resize(imagesDB.size, -1); // Find and score all the images that could possible be matched with the query for (int wordIdx = 0; wordIdx < tmpDescWords.size; wordIdx++) { float queryWordWeight = tmpDescWeights.get(wordIdx); HierarchicalVocabularyTree.Node node = tree.nodes.get(tmpDescWords.get(wordIdx)); InvertedFile invertedFile = invertedFiles.get(node.index); // See above if (invertedFile.size > maximumInvertedFileLength) continue; for (int i = 0; i < invertedFile.size; i++) { // Get the list of images in the database which have this particular word using // the inverted file list int imageIdx = invertedFile.get(i); BowMatch m; if (imageIdx_to_match.get(imageIdx) == -1) { imageIdx_to_match.set(imageIdx, matches.size); m = matches.grow(); m.identification = imageIdx; // this will be converted to ID on output } else { m = matches.get(imageIdx_to_match.get(imageIdx)); } // Update the score computation. See TupleMapDistanceNorm for why this is done m.error += distanceFunction.distanceUpdate(queryWordWeight, invertedFile.weights.get(i)); // NOTE: An earlier version created a list of common word weights. That took 5x longer } } } /** * Given the image features, compute a sparse descriptor for the image and pass in leaf nodes to 'op' for each * image feature. * * @param imageFeatures (Input) All image features in the image * @param descWeights (Output) Weights for non-zero word in TD-IDF descriptor for this image * @param descWords (Output) Word index for non-zero word in TD-IDF descriptor for this image */ protected void describe( List imageFeatures, DogArray_F32 descWeights, DogArray_I32 descWords ) { // Reset work variables frequencies.reset(); descWeights.reset(); descWords.reset(); // NOTE: It's assumed nodeIdx_to_match is full of -1 nodeIdx_to_match.resize(tree.nodes.size, -1); featureIdxToLeafID.resize(imageFeatures.size()); for (int featureIdx = 0; featureIdx < imageFeatures.size(); featureIdx++) { int leafID = tree.searchPathToLeaf(imageFeatures.get(featureIdx), ( depth, node ) -> { if (depth < minimumDepthFromRoot || node.weight <= 0.0f) return; Frequency f; int frequencyIdx = nodeIdx_to_match.get(node.index); if (frequencyIdx == -1) { nodeIdx_to_match.set(node.index, frequencies.size); f = frequencies.grow(); f.node = node; } else { f = frequencies.get(frequencyIdx); } f.totalAppearances++; }); featureIdxToLeafID.data[featureIdx] = leafID; } // undo changes to the lookup table for (int i = 0; i < frequencies.size; i++) { nodeIdx_to_match.set(frequencies.get(i).node.index, -1); } // No nodes with a non-zero weight that matched was found if (frequencies.isEmpty()) return; // Create the descriptor and normalize it double totalUniqueWordsSeenByImage = frequencies.size; // NOTE: I'm not 100% sure this is the divisor used in the paper, but doesn't really matter due to the // descriptor getting normalized. descWeights.reserve(frequencies.size); descWords.reserve(frequencies.size); for (int i = 0; i < frequencies.size; i++) { Frequency f = frequencies.get(i); // Term frequency: n[i] = number of times word[i] appears in this image / total words in this image double termFrequency = f.totalAppearances/totalUniqueWordsSeenByImage; // TF-IDF feature: d[i] = n[i] * node_weight[i] descWeights.add((float)(termFrequency*f.node.weight)); descWords.add(f.node.index); } distanceFunction.normalize(descWeights); } /** Used to change distance function to one of the built in types */ public void setDistanceType( BowDistanceTypes type ) { distanceFunction = switch (type) { case L1 -> new TupleMapDistanceNorm.L1(); case L2 -> new TupleMapDistanceNorm.L2(); default -> throw new IllegalArgumentException("Unknown type " + type); }; } @Override public void setVerbose( @Nullable PrintStream out, @Nullable Set settings ) { this.verbose = out; } /** * Used to sum the frequency of words (graph nodes) in the image */ @SuppressWarnings({"NullAway.Init"}) protected static class Frequency { // Number of times this word/node appeared in this image int totalAppearances; // The node which is referenced Node node; @SuppressWarnings({"NullAway"}) public void reset() { totalAppearances = 0; node = null; } } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy