All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.lucene.facet.taxonomy.PrintTaxonomyStats Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.facet.taxonomy;

import java.io.IOException;
import java.io.PrintStream;
import java.nio.file.Paths;
import org.apache.lucene.facet.taxonomy.TaxonomyReader.ChildrenIterator;
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.SuppressForbidden;

/** Prints how many ords are under each dimension. */

// java -cp ../build/core/classes/java:../build/facet/classes/java
// org.apache.lucene.facet.util.PrintTaxonomyStats -printTree
// /s2/scratch/indices/wikibig.trunk.noparents.facets.Lucene41.nd1M/facets
public class PrintTaxonomyStats {

  /** Sole constructor. */
  public PrintTaxonomyStats() {}

  /** Command-line tool. */
  @SuppressForbidden(reason = "System.out required: command line tool")
  public static void main(String[] args) throws IOException {
    boolean printTree = false;
    String path = null;
    for (int i = 0; i < args.length; i++) {
      if (args[i].equals("-printTree")) {
        printTree = true;
      } else {
        path = args[i];
      }
    }
    if (args.length != (printTree ? 2 : 1)) {
      System.out.println(
          "\nUsage: java -classpath ... org.apache.lucene.facet.util.PrintTaxonomyStats [-printTree] /path/to/taxononmy/index\n");
      System.exit(1);
    }
    Directory dir = FSDirectory.open(Paths.get(path));
    TaxonomyReader r = new DirectoryTaxonomyReader(dir);
    printStats(r, System.out, printTree);
    r.close();
    dir.close();
  }

  /** Recursively prints stats for all ordinals. */
  public static void printStats(TaxonomyReader r, PrintStream out, boolean printTree)
      throws IOException {
    out.println(r.getSize() + " total categories.");

    ChildrenIterator it = r.getChildren(TaxonomyReader.ROOT_ORDINAL);
    int child;
    while ((child = it.next()) != TaxonomyReader.INVALID_ORDINAL) {
      ChildrenIterator chilrenIt = r.getChildren(child);
      int numImmediateChildren = 0;
      while (chilrenIt.next() != TaxonomyReader.INVALID_ORDINAL) {
        numImmediateChildren++;
      }
      FacetLabel cp = r.getPath(child);
      out.println(
          "/"
              + cp.components[0]
              + ": "
              + numImmediateChildren
              + " immediate children; "
              + (1 + countAllChildren(r, child))
              + " total categories");
      if (printTree) {
        printAllChildren(out, r, child, "  ", 1);
      }
    }
  }

  private static int countAllChildren(TaxonomyReader r, int ord) throws IOException {
    int count = 0;
    ChildrenIterator it = r.getChildren(ord);
    int child;
    while ((child = it.next()) != TaxonomyReader.INVALID_ORDINAL) {
      count += 1 + countAllChildren(r, child);
    }
    return count;
  }

  private static void printAllChildren(
      PrintStream out, TaxonomyReader r, int ord, String indent, int depth) throws IOException {
    ChildrenIterator it = r.getChildren(ord);
    int child;
    while ((child = it.next()) != TaxonomyReader.INVALID_ORDINAL) {
      out.println(indent + "/" + r.getPath(child).components[depth]);
      printAllChildren(out, r, child, indent + "  ", depth + 1);
    }
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy