org.apache.lucene.facet.taxonomy.PrintTaxonomyStats Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of lucene-facet Show documentation
Show all versions of lucene-facet Show documentation
Apache Lucene (module: facet)
The newest version!
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.facet.taxonomy;
import java.io.IOException;
import java.io.PrintStream;
import java.nio.file.Paths;
import org.apache.lucene.facet.taxonomy.TaxonomyReader.ChildrenIterator;
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.SuppressForbidden;
/** Prints how many ords are under each dimension. */
// java -cp ../build/core/classes/java:../build/facet/classes/java
// org.apache.lucene.facet.util.PrintTaxonomyStats -printTree
// /s2/scratch/indices/wikibig.trunk.noparents.facets.Lucene41.nd1M/facets
public class PrintTaxonomyStats {
/** Sole constructor. */
public PrintTaxonomyStats() {}
/** Command-line tool. */
@SuppressForbidden(reason = "System.out required: command line tool")
public static void main(String[] args) throws IOException {
boolean printTree = false;
String path = null;
for (int i = 0; i < args.length; i++) {
if (args[i].equals("-printTree")) {
printTree = true;
} else {
path = args[i];
}
}
if (args.length != (printTree ? 2 : 1)) {
System.out.println(
"\nUsage: java -classpath ... org.apache.lucene.facet.util.PrintTaxonomyStats [-printTree] /path/to/taxononmy/index\n");
System.exit(1);
}
Directory dir = FSDirectory.open(Paths.get(path));
TaxonomyReader r = new DirectoryTaxonomyReader(dir);
printStats(r, System.out, printTree);
r.close();
dir.close();
}
/** Recursively prints stats for all ordinals. */
public static void printStats(TaxonomyReader r, PrintStream out, boolean printTree)
throws IOException {
out.println(r.getSize() + " total categories.");
ChildrenIterator it = r.getChildren(TaxonomyReader.ROOT_ORDINAL);
int child;
while ((child = it.next()) != TaxonomyReader.INVALID_ORDINAL) {
ChildrenIterator chilrenIt = r.getChildren(child);
int numImmediateChildren = 0;
while (chilrenIt.next() != TaxonomyReader.INVALID_ORDINAL) {
numImmediateChildren++;
}
FacetLabel cp = r.getPath(child);
out.println(
"/"
+ cp.components[0]
+ ": "
+ numImmediateChildren
+ " immediate children; "
+ (1 + countAllChildren(r, child))
+ " total categories");
if (printTree) {
printAllChildren(out, r, child, " ", 1);
}
}
}
private static int countAllChildren(TaxonomyReader r, int ord) throws IOException {
int count = 0;
ChildrenIterator it = r.getChildren(ord);
int child;
while ((child = it.next()) != TaxonomyReader.INVALID_ORDINAL) {
count += 1 + countAllChildren(r, child);
}
return count;
}
private static void printAllChildren(
PrintStream out, TaxonomyReader r, int ord, String indent, int depth) throws IOException {
ChildrenIterator it = r.getChildren(ord);
int child;
while ((child = it.next()) != TaxonomyReader.INVALID_ORDINAL) {
out.println(indent + "/" + r.getPath(child).components[depth]);
printAllChildren(out, r, child, indent + " ", depth + 1);
}
}
}