boofcv.alg.scene.nister2006.RecognitionVocabularyTreeNister2006 Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of boofcv-recognition Show documentation
Show all versions of boofcv-recognition Show documentation
BoofCV is an open source Java library for real-time computer vision and robotics applications.
The newest version!
/*
* Copyright (c) 2024, Peter Abeles. All Rights Reserved.
*
* This file is part of BoofCV (http://boofcv.org).
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package boofcv.alg.scene.nister2006;
import boofcv.alg.scene.bow.BowDistanceTypes;
import boofcv.alg.scene.bow.BowMatch;
import boofcv.alg.scene.bow.BowUtils;
import boofcv.alg.scene.bow.InvertedFile;
import boofcv.alg.scene.vocabtree.HierarchicalVocabularyTree;
import boofcv.alg.scene.vocabtree.HierarchicalVocabularyTree.Node;
import boofcv.misc.BoofLambdas;
import boofcv.struct.ConfigLength;
import lombok.Getter;
import lombok.Setter;
import org.ddogleg.struct.*;
import org.jetbrains.annotations.Nullable;
import pabeles.concurrency.GrowArray;
import java.io.PrintStream;
import java.util.List;
import java.util.Set;
/**
* Image recognition based off of [1] using inverted files. A {@link HierarchicalVocabularyTree} is assumed to have
* been already trained. When an image is added to the database a TF-IDF descriptor is computed using the tree
* and then added to the relevant tree's leaves. When an image is looked up its TF-IDF descriptor is found then
* all images in the data base are found that share at least one leaf node. These candidate matches are then
* compared against each other and scored using L2-Norm.
*
* Implementation Notes:
* This implementation is intended to produce output which is faithful to the original work [1] but has
* several modifications internally where there has been an attempt to improve runtime performance, often
* at the cost of an increase in memory consumption. A non-exhaustive set of deviations is listed below
*
* - Taking inspiration from [2], this implementation has an explicit representation of the inverted
* files in non-leaf nodes. This avoid an expensive graph traversal step and replaces it with a very fast
* array look up.
* - Histogram weights are stored in inverted files instead of word counts. Allows more efficient error
* computation.
*
*
*
* [1] Nister, David, and Henrik Stewenius. "Scalable recognition with a vocabulary tree."
* 2006 IEEE Computer Society Conference on Computer Vision and Pattern Recognition (CVPR'06). Vol. 2. Ieee, 2006.
* [2] Esteban Uriza, Francisco Gómez Fernández, and Martín Rais, "Efficient Large-scale Image Search With a Vocabulary
* Tree", Image Processing On Line, 8 (2018), pp. 71–98
*
*
* @author Peter Abeles
*/
@SuppressWarnings({"NullAway.Init"})
public class RecognitionVocabularyTreeNister2006 implements VerbosePrint {
/** Vocabulary Tree */
public @Getter HierarchicalVocabularyTree tree;
/** A node can be part of the descriptor if it's at least this far from the root node */
public int minimumDepthFromRoot = 0;
/**
* If a node has an inverted file list greater than this amount then it will be skipped when scoring. This
* should be viewed as a last ditch effort when the query is too slow. If there are a 1,000,000 images in the
* DB, then 20,000 seems to be a reasonable number.
*/
public ConfigLength maximumQueryImagesInNode = ConfigLength.relative(1.0, 1);
/** User data associated with each node */
public final GrowArray invertedFiles = new GrowArray<>(InvertedFile::new, InvertedFile::reset);
/** List of images added to the database */
protected @Getter final BigDogArray_I32 imagesDB = new BigDogArray_I32(100, 10000, BigDogGrowth.GROW_FIRST);
/** Scores for all candidate images which have been sorted */
protected @Getter final DogArray matches = new DogArray<>(BowMatch::new, BowMatch::reset);
/** Distance between two TF-IDF descriptors. L1 and L2 norms are provided */
protected @Getter @Setter TupleMapDistanceNorm distanceFunction = new TupleMapDistanceNorm.L2();
/** Stores a mapping from feature index to leaf ID */
protected @Getter final DogArray_I32 featureIdxToLeafID = new DogArray_I32();
//---------------- Internal Workspace
// The "frequency" that nodes in the tree appear in this image
protected final DogArray frequencies = new DogArray<>(Frequency::new, Frequency::reset);
// For lookup. One element for every image in the database
DogArray_I32 imageIdx_to_match = new DogArray_I32();
DogArray_I32 nodeIdx_to_match = new DogArray_I32();
// temporary storage for an image TF-IDF descriptor
DogArray_F32 tmpDescWeights = new DogArray_F32();
DogArray_I32 tmpDescWords = new DogArray_I32();
// If not null then print verbose information here
@Nullable PrintStream verbose;
/**
* Configures the tree by adding LeafData to all the leaves in the tree then saves a reference for future use
*
* @param tree Three which is to be used as the database. Saved internally.
*/
public void initializeTree( HierarchicalVocabularyTree tree ) {
this.tree = tree;
clearImages();
}
/**
* Removes all images from the database.
*/
public void clearImages() {
imagesDB.reset();
// Removes the old leaf data and replaces it with empty structures
invertedFiles.reset();
if (tree != null)
invertedFiles.resize(tree.nodes.size);
}
/**
* Adds a new image to the database.
*
* @param imageID The image's unique ID for later reference
* @param imageFeatures Feature descriptors from an image
*/
public void addImage( int imageID, List imageFeatures ) {
if (imageFeatures.isEmpty())
return;
int imageIdx = imagesDB.size;
imagesDB.append(imageID);
// compute a descriptor for this image while adding it to the leaves
describe(imageFeatures, tmpDescWeights, tmpDescWords);
for (int wordIdx = 0; wordIdx < tmpDescWords.size; wordIdx++) {
int word = tmpDescWords.get(wordIdx);
invertedFiles.get(word).addImage(imageIdx, tmpDescWeights.get(wordIdx));
}
}
/**
* Looks up the best BowMatch from the database. The list of all potential matches can be accessed by calling
* {@link #getMatches()}.
*
* @param queryImage Set of feature descriptors from the query image
* @param filter Filter which can be used to reject matches that the user doesn't want returned. False = reject.
* @param limit Maximum number of matches it will return.
* @return The best matching image with score from the database
*/
public boolean query( List queryImage, @Nullable BoofLambdas.FilterInt filter, int limit ) {
matches.reset();
// Can't BowMatch to anything if it's empty
if (queryImage.isEmpty()) {
return false;
}
findAndScoreMatches(queryImage);
if (matches.isEmpty())
return false;
if (verbose != null) verbose.println("raw matches.size=" + matches.size);
// Book keeping
for (int i = 0; i < matches.size(); i++) {
BowMatch m = matches.get(i);
// Undo changes and make sure all elements are -1 again
imageIdx_to_match.set(m.identification, -1);
// m.identification is overloaded earlier and actually stores the index
m.identification = imagesDB.get(m.identification);
}
BowUtils.filterAndSortMatches(matches, filter, limit);
return matches.size > 0;
}
/**
* Uses the inverted file for each word to create a list of potential matches while scoring the matches
* efficiently
*/
protected void findAndScoreMatches( List queryImage ) {
// Don't use a node if it will degrade the runtime performance too much by considering too many images
// This will also degrade the quality of query results
int maximumInvertedFileLength = maximumQueryImagesInNode.computeI(imagesDB.size);
// Create a description of this image and collect potential matches from leaves
describe(queryImage, tmpDescWeights, tmpDescWords);
// NOTE: It's assumed imageIdx_to_match is full of -1
imageIdx_to_match.resize(imagesDB.size, -1);
// Find and score all the images that could possible be matched with the query
for (int wordIdx = 0; wordIdx < tmpDescWords.size; wordIdx++) {
float queryWordWeight = tmpDescWeights.get(wordIdx);
HierarchicalVocabularyTree.Node node = tree.nodes.get(tmpDescWords.get(wordIdx));
InvertedFile invertedFile = invertedFiles.get(node.index);
// See above
if (invertedFile.size > maximumInvertedFileLength)
continue;
for (int i = 0; i < invertedFile.size; i++) {
// Get the list of images in the database which have this particular word using
// the inverted file list
int imageIdx = invertedFile.get(i);
BowMatch m;
if (imageIdx_to_match.get(imageIdx) == -1) {
imageIdx_to_match.set(imageIdx, matches.size);
m = matches.grow();
m.identification = imageIdx; // this will be converted to ID on output
} else {
m = matches.get(imageIdx_to_match.get(imageIdx));
}
// Update the score computation. See TupleMapDistanceNorm for why this is done
m.error += distanceFunction.distanceUpdate(queryWordWeight, invertedFile.weights.get(i));
// NOTE: An earlier version created a list of common word weights. That took 5x longer
}
}
}
/**
* Given the image features, compute a sparse descriptor for the image and pass in leaf nodes to 'op' for each
* image feature.
*
* @param imageFeatures (Input) All image features in the image
* @param descWeights (Output) Weights for non-zero word in TD-IDF descriptor for this image
* @param descWords (Output) Word index for non-zero word in TD-IDF descriptor for this image
*/
protected void describe( List imageFeatures, DogArray_F32 descWeights, DogArray_I32 descWords ) {
// Reset work variables
frequencies.reset();
descWeights.reset();
descWords.reset();
// NOTE: It's assumed nodeIdx_to_match is full of -1
nodeIdx_to_match.resize(tree.nodes.size, -1);
featureIdxToLeafID.resize(imageFeatures.size());
for (int featureIdx = 0; featureIdx < imageFeatures.size(); featureIdx++) {
int leafID = tree.searchPathToLeaf(imageFeatures.get(featureIdx), ( depth, node ) -> {
if (depth < minimumDepthFromRoot || node.weight <= 0.0f)
return;
Frequency f;
int frequencyIdx = nodeIdx_to_match.get(node.index);
if (frequencyIdx == -1) {
nodeIdx_to_match.set(node.index, frequencies.size);
f = frequencies.grow();
f.node = node;
} else {
f = frequencies.get(frequencyIdx);
}
f.totalAppearances++;
});
featureIdxToLeafID.data[featureIdx] = leafID;
}
// undo changes to the lookup table
for (int i = 0; i < frequencies.size; i++) {
nodeIdx_to_match.set(frequencies.get(i).node.index, -1);
}
// No nodes with a non-zero weight that matched was found
if (frequencies.isEmpty())
return;
// Create the descriptor and normalize it
double totalUniqueWordsSeenByImage = frequencies.size;
// NOTE: I'm not 100% sure this is the divisor used in the paper, but doesn't really matter due to the
// descriptor getting normalized.
descWeights.reserve(frequencies.size);
descWords.reserve(frequencies.size);
for (int i = 0; i < frequencies.size; i++) {
Frequency f = frequencies.get(i);
// Term frequency: n[i] = number of times word[i] appears in this image / total words in this image
double termFrequency = f.totalAppearances/totalUniqueWordsSeenByImage;
// TF-IDF feature: d[i] = n[i] * node_weight[i]
descWeights.add((float)(termFrequency*f.node.weight));
descWords.add(f.node.index);
}
distanceFunction.normalize(descWeights);
}
/** Used to change distance function to one of the built in types */
public void setDistanceType( BowDistanceTypes type ) {
distanceFunction = switch (type) {
case L1 -> new TupleMapDistanceNorm.L1();
case L2 -> new TupleMapDistanceNorm.L2();
default -> throw new IllegalArgumentException("Unknown type " + type);
};
}
@Override public void setVerbose( @Nullable PrintStream out, @Nullable Set settings ) {
this.verbose = out;
}
/**
* Used to sum the frequency of words (graph nodes) in the image
*/
@SuppressWarnings({"NullAway.Init"})
protected static class Frequency {
// Number of times this word/node appeared in this image
int totalAppearances;
// The node which is referenced
Node node;
@SuppressWarnings({"NullAway"})
public void reset() {
totalAppearances = 0;
node = null;
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy