All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.lucene.index.FieldNormModifier Maven / Gradle / Ivy

package org.apache.lucene.index;

/**
 * Copyright 2006 The Apache Software Foundation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

import java.io.IOException;
import java.io.File;
import java.util.Date;

import org.apache.lucene.search.Similarity;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.StringHelper;

/**
 * Given a directory and a list of fields, updates the fieldNorms in place for every document.
 * 
 * If Similarity class is specified, uses its computeNorm method to set norms.
 * If -n command line argument is used, removed field norms, as if 
 * {@link org.apache.lucene.document.Field.Index}.NO_NORMS was used.
 *
 * 

* NOTE: This will overwrite any length normalization or field/document boosts. *

* */ public class FieldNormModifier { /** * Command Line Execution method. * *
   * Usage: FieldNormModifier /path/index <package.SimilarityClassName | -n> field1 field2 ...
   * 
*/ public static void main(String[] args) throws IOException { if (args.length < 3) { System.err.println("Usage: FieldNormModifier [field2] ..."); System.exit(1); } Similarity s = null; if (!args[1].equals("-n")) { try { s = Class.forName(args[1]).asSubclass(Similarity.class).newInstance(); } catch (Exception e) { System.err.println("Couldn't instantiate similarity with empty constructor: " + args[1]); e.printStackTrace(System.err); System.exit(1); } } Directory d = FSDirectory.open(new File(args[0])); FieldNormModifier fnm = new FieldNormModifier(d, s); for (int i = 2; i < args.length; i++) { System.out.print("Updating field: " + args[i] + " " + (new Date()).toString() + " ... "); fnm.reSetNorms(args[i]); System.out.println(new Date().toString()); } d.close(); } private Directory dir; private Similarity sim; /** * Constructor for code that wishes to use this class programmatically * If Similarity is null, kill the field norms. * * @param d the Directory to modify * @param s the Similarity to use (can be null) */ public FieldNormModifier(Directory d, Similarity s) { dir = d; sim = s; } /** * Resets the norms for the specified field. * *

* Opens a new IndexReader on the Directory given to this instance, * modifies the norms (either using the Similarity given to this instance, or by using fake norms, * and closes the IndexReader. *

* * @param field the field whose norms should be reset */ public void reSetNorms(String field) throws IOException { String fieldName = StringHelper.intern(field); int[] termCounts = new int[0]; IndexReader reader = null; TermEnum termEnum = null; TermDocs termDocs = null; try { reader = IndexReader.open(dir, true); termCounts = new int[reader.maxDoc()]; try { termEnum = reader.terms(new Term(field)); try { termDocs = reader.termDocs(); do { Term term = termEnum.term(); if (term != null && term.field().equals(fieldName)) { termDocs.seek(termEnum.term()); while (termDocs.next()) { termCounts[termDocs.doc()] += termDocs.freq(); } } } while (termEnum.next()); } finally { if (null != termDocs) termDocs.close(); } } finally { if (null != termEnum) termEnum.close(); } } finally { if (null != reader) reader.close(); } try { reader = IndexReader.open(dir, false); final FieldInvertState invertState = new FieldInvertState(); invertState.setBoost(1.0f); for (int d = 0; d < termCounts.length; d++) { if (! reader.isDeleted(d)) { if (sim == null) reader.setNorm(d, fieldName, Similarity.encodeNorm(1.0f)); else { invertState.setLength(termCounts[d]); reader.setNorm(d, fieldName, sim.encodeNormValue(sim.computeNorm(fieldName, invertState))); } } } } finally { if (null != reader) reader.close(); } } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy