All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.solr.handler.admin.LukeRequestHandler Maven / Gradle / Ivy

There is a newer version: 9.7.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.solr.handler.admin;

import static org.apache.lucene.index.IndexOptions.DOCS;
import static org.apache.lucene.index.IndexOptions.DOCS_AND_FREQS;
import static org.apache.lucene.index.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;

import java.io.IOException;
import java.lang.invoke.MethodHandles;
import java.nio.ByteBuffer;
import java.nio.charset.StandardCharsets;
import java.nio.file.NoSuchFileException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Base64;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.TreeMap;
import java.util.TreeSet;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.CharFilterFactory;
import org.apache.lucene.analysis.TokenFilterFactory;
import org.apache.lucene.analysis.TokenizerFactory;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexCommit;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.MultiTerms;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.store.AlreadyClosedException;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CharsRefBuilder;
import org.apache.lucene.util.PriorityQueue;
import org.apache.solr.analysis.TokenizerChain;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrException.ErrorCode;
import org.apache.solr.common.luke.FieldFlag;
import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.handler.RequestHandlerBase;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.response.SolrQueryResponse;
import org.apache.solr.schema.CopyField;
import org.apache.solr.schema.FieldType;
import org.apache.solr.schema.IndexSchema;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.security.AuthorizationContext;
import org.apache.solr.update.SolrIndexWriter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * Exposes the internal lucene index. It's registered at /admin/luke by default.
 *
 * 

It is inspired by and modeled on Luke, the Lucene Index Browser that is currently a Lucene * module: https://github.com/apache/lucene/tree/main/lucene/luke * * @see SegmentsInfoRequestHandler * @since solr 1.2 */ public class LukeRequestHandler extends RequestHandlerBase { private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); public static final String NUMTERMS = "numTerms"; public static final String INCLUDE_INDEX_FIELD_FLAGS = "includeIndexFieldFlags"; public static final String DOC_ID = "docId"; public static final String ID = CommonParams.ID; public static final int DEFAULT_COUNT = 10; static final int HIST_ARRAY_SIZE = 33; @Override public Name getPermissionName(AuthorizationContext request) { return Name.READ_PERM; } private static enum ShowStyle { ALL, DOC, SCHEMA, INDEX; public static ShowStyle get(String v) { if (v == null) return null; if ("schema".equalsIgnoreCase(v)) return SCHEMA; if ("index".equalsIgnoreCase(v)) return INDEX; if ("doc".equalsIgnoreCase(v)) return DOC; if ("all".equalsIgnoreCase(v)) return ALL; throw new SolrException(ErrorCode.BAD_REQUEST, "Unknown Show Style: " + v); } } @Override public void handleRequestBody(SolrQueryRequest req, SolrQueryResponse rsp) throws Exception { IndexSchema schema = req.getSchema(); SolrIndexSearcher searcher = req.getSearcher(); DirectoryReader reader = searcher.getIndexReader(); SolrParams params = req.getParams(); ShowStyle style = ShowStyle.get(params.get("show")); // If no doc is given, show all fields and top terms rsp.add("index", getIndexInfo(reader)); if (ShowStyle.INDEX == style) { return; // that's all we need } Integer docId = params.getInt(DOC_ID); if (docId == null && params.get(ID) != null) { // Look for something with a given solr ID SchemaField uniqueKey = schema.getUniqueKeyField(); String v = uniqueKey.getType().toInternal(params.get(ID)); Term t = new Term(uniqueKey.getName(), v); docId = searcher.getFirstMatch(t); if (docId < 0) { throw new SolrException( SolrException.ErrorCode.NOT_FOUND, "Can't find document: " + params.get(ID)); } } // Read the document from the index if (docId != null) { if (style != null && style != ShowStyle.DOC) { throw new SolrException(ErrorCode.BAD_REQUEST, "missing doc param for doc style"); } Document doc = null; try { doc = reader.document(docId); } catch (Exception ex) { } if (doc == null) { throw new SolrException(SolrException.ErrorCode.NOT_FOUND, "Can't find document: " + docId); } SimpleOrderedMap info = getDocumentFieldsInfo(doc, docId, reader, schema); SimpleOrderedMap docinfo = new SimpleOrderedMap<>(); docinfo.add("docId", docId); docinfo.add("lucene", info); docinfo.add("solr", doc); rsp.add("doc", docinfo); } else if (ShowStyle.SCHEMA == style) { rsp.add("schema", getSchemaInfo(req.getSchema())); } else { rsp.add("fields", getIndexedFieldsInfo(req)); } // Add some generally helpful information NamedList info = new SimpleOrderedMap<>(); info.add("key", getFieldFlagsKey()); info.add( "NOTE", "Document Frequency (df) is not updated when a document is marked for deletion. df values include deleted documents."); rsp.add("info", info); rsp.setHttpCaching(false); } /** * @return a string representing a IndexableField's flags. */ private static String getFieldFlags(IndexableField f) { IndexOptions opts = (f == null) ? null : f.fieldType().indexOptions(); StringBuilder flags = new StringBuilder(); flags.append( (f != null && f.fieldType().indexOptions() != IndexOptions.NONE) ? FieldFlag.INDEXED.getAbbreviation() : '-'); flags.append( (f != null && f.fieldType().tokenized()) ? FieldFlag.TOKENIZED.getAbbreviation() : '-'); flags.append((f != null && f.fieldType().stored()) ? FieldFlag.STORED.getAbbreviation() : '-'); flags.append( (f != null && f.fieldType().docValuesType() != DocValuesType.NONE) ? FieldFlag.DOC_VALUES.getAbbreviation() : "-"); flags.append((false) ? FieldFlag.UNINVERTIBLE.getAbbreviation() : '-'); // SchemaField Specific flags.append((false) ? FieldFlag.MULTI_VALUED.getAbbreviation() : '-'); // SchemaField Specific flags.append( (f != null && f.fieldType().storeTermVectors()) ? FieldFlag.TERM_VECTOR_STORED.getAbbreviation() : '-'); flags.append( (f != null && f.fieldType().storeTermVectorOffsets()) ? FieldFlag.TERM_VECTOR_OFFSET.getAbbreviation() : '-'); flags.append( (f != null && f.fieldType().storeTermVectorPositions()) ? FieldFlag.TERM_VECTOR_POSITION.getAbbreviation() : '-'); flags.append( (f != null && f.fieldType().storeTermVectorPayloads()) ? FieldFlag.TERM_VECTOR_PAYLOADS.getAbbreviation() : '-'); flags.append( (f != null && f.fieldType().omitNorms()) ? FieldFlag.OMIT_NORMS.getAbbreviation() : '-'); flags.append((f != null && DOCS == opts) ? FieldFlag.OMIT_TF.getAbbreviation() : '-'); flags.append( (f != null && DOCS_AND_FREQS == opts) ? FieldFlag.OMIT_POSITIONS.getAbbreviation() : '-'); flags.append( (f != null && DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS == opts) ? FieldFlag.STORE_OFFSETS_WITH_POSITIONS.getAbbreviation() : '-'); flags.append( (f != null && f.getClass().getSimpleName().equals("LazyField")) ? FieldFlag.LAZY.getAbbreviation() : '-'); flags.append((f != null && f.binaryValue() != null) ? FieldFlag.BINARY.getAbbreviation() : '-'); flags.append( (false) ? FieldFlag.SORT_MISSING_FIRST.getAbbreviation() : '-'); // SchemaField Specific flags.append( (false) ? FieldFlag.SORT_MISSING_LAST.getAbbreviation() : '-'); // SchemaField Specific return flags.toString(); } /** * @return a string representing a SchemaField's flags. */ private static String getFieldFlags(SchemaField f) { FieldType t = (f == null) ? null : f.getType(); // see: http://www.nabble.com/schema-field-properties-tf3437753.html#a9585549 boolean lazy = false; // "lazy" is purely a property of reading fields boolean binary = false; // Currently not possible StringBuilder flags = new StringBuilder(); flags.append((f != null && f.indexed()) ? FieldFlag.INDEXED.getAbbreviation() : '-'); flags.append((t != null && t.isTokenized()) ? FieldFlag.TOKENIZED.getAbbreviation() : '-'); flags.append((f != null && f.stored()) ? FieldFlag.STORED.getAbbreviation() : '-'); flags.append((f != null && f.hasDocValues()) ? FieldFlag.DOC_VALUES.getAbbreviation() : "-"); flags.append( (f != null && f.isUninvertible()) ? FieldFlag.UNINVERTIBLE.getAbbreviation() : "-"); flags.append((f != null && f.multiValued()) ? FieldFlag.MULTI_VALUED.getAbbreviation() : '-'); flags.append( (f != null && f.storeTermVector()) ? FieldFlag.TERM_VECTOR_STORED.getAbbreviation() : '-'); flags.append( (f != null && f.storeTermOffsets()) ? FieldFlag.TERM_VECTOR_OFFSET.getAbbreviation() : '-'); flags.append( (f != null && f.storeTermPositions()) ? FieldFlag.TERM_VECTOR_POSITION.getAbbreviation() : '-'); flags.append( (f != null && f.storeTermPayloads()) ? FieldFlag.TERM_VECTOR_PAYLOADS.getAbbreviation() : '-'); flags.append((f != null && f.omitNorms()) ? FieldFlag.OMIT_NORMS.getAbbreviation() : '-'); flags.append( (f != null && f.omitTermFreqAndPositions()) ? FieldFlag.OMIT_TF.getAbbreviation() : '-'); flags.append( (f != null && f.omitPositions()) ? FieldFlag.OMIT_POSITIONS.getAbbreviation() : '-'); flags.append( (f != null && f.storeOffsetsWithPositions()) ? FieldFlag.STORE_OFFSETS_WITH_POSITIONS.getAbbreviation() : '-'); flags.append((lazy) ? FieldFlag.LAZY.getAbbreviation() : '-'); flags.append((binary) ? FieldFlag.BINARY.getAbbreviation() : '-'); flags.append( (f != null && f.sortMissingFirst()) ? FieldFlag.SORT_MISSING_FIRST.getAbbreviation() : '-'); flags.append( (f != null && f.sortMissingLast()) ? FieldFlag.SORT_MISSING_LAST.getAbbreviation() : '-'); return flags.toString(); } /** * @return a key to what each character means */ public static SimpleOrderedMap getFieldFlagsKey() { SimpleOrderedMap key = new SimpleOrderedMap<>(); for (FieldFlag f : FieldFlag.values()) { key.add(String.valueOf(f.getAbbreviation()), f.getDisplay()); } return key; } private static SimpleOrderedMap getDocumentFieldsInfo( Document doc, int docId, IndexReader reader, IndexSchema schema) throws IOException { final CharsRefBuilder spare = new CharsRefBuilder(); SimpleOrderedMap finfo = new SimpleOrderedMap<>(); for (Object o : doc.getFields()) { Field field = (Field) o; SimpleOrderedMap f = new SimpleOrderedMap<>(); SchemaField sfield = schema.getFieldOrNull(field.name()); FieldType ftype = (sfield == null) ? null : sfield.getType(); f.add("type", (ftype == null) ? null : ftype.getTypeName()); f.add("schema", getFieldFlags(sfield)); f.add("flags", getFieldFlags(field)); f.add("value", (ftype == null) ? null : ftype.toExternal(field)); // TODO: this really should be "stored" f.add("internal", field.stringValue()); // may be a binary number BytesRef bytes = field.binaryValue(); if (bytes != null) { f.add( "binary", new String( Base64.getEncoder() .encode(ByteBuffer.wrap(bytes.bytes, bytes.offset, bytes.length)) .array(), StandardCharsets.ISO_8859_1)); } if (ftype != null && !ftype.isPointField()) { Term t = new Term(field.name(), Objects.requireNonNullElse(ftype.storedToIndexed(field), "")); f.add( "docFreq", t.text() == null ? 0 : reader.docFreq(t)); // this can be 0 for non-indexed fields } // TODO: Calculate docFreq for point fields // If we have a term vector, return that if (field.fieldType().storeTermVectors()) { try { Terms v = reader.getTermVector(docId, field.name()); if (v != null) { SimpleOrderedMap tfv = new SimpleOrderedMap<>(); final TermsEnum termsEnum = v.iterator(); BytesRef text; while ((text = termsEnum.next()) != null) { final int freq = (int) termsEnum.totalTermFreq(); spare.copyUTF8Bytes(text); tfv.add(spare.toString(), freq); } f.add("termVector", tfv); } } catch (Exception ex) { log.warn("error writing term vector", ex); } } finfo.add(field.name(), f); } return finfo; } private static SimpleOrderedMap getIndexedFieldsInfo(SolrQueryRequest req) throws Exception { SolrIndexSearcher searcher = req.getSearcher(); SolrParams params = req.getParams(); Set fields = null; String fl = params.get(CommonParams.FL); if (fl != null) { fields = new TreeSet<>(Arrays.asList(fl.split("[,\\s]+"))); } LeafReader reader = searcher.getSlowAtomicReader(); IndexSchema schema = searcher.getSchema(); // Don't be tempted to put this in the loop below, the whole point here is to alphabetize the // fields! Set fieldNames = new TreeSet<>(); for (FieldInfo fieldInfo : reader.getFieldInfos()) { fieldNames.add(fieldInfo.name); } // Walk the term enum and keep a priority queue for each map in our set SimpleOrderedMap finfo = new SimpleOrderedMap<>(); for (String fieldName : fieldNames) { if (fields != null && !fields.contains(fieldName) && !fields.contains("*")) { continue; // we're not interested in this field Still an issue here } SimpleOrderedMap fieldMap = new SimpleOrderedMap<>(); SchemaField sfield = schema.getFieldOrNull(fieldName); FieldType ftype = (sfield == null) ? null : sfield.getType(); fieldMap.add("type", (ftype == null) ? null : ftype.getTypeName()); fieldMap.add("schema", getFieldFlags(sfield)); if (sfield != null && schema.isDynamicField(sfield.getName()) && schema.getDynamicPattern(sfield.getName()) != null) { fieldMap.add("dynamicBase", schema.getDynamicPattern(sfield.getName())); } Terms terms = reader.terms(fieldName); // Not indexed, so we need to report what we can (it made it through the fl param if // specified) if (terms == null) { finfo.add(fieldName, fieldMap); continue; } if (sfield != null && sfield.indexed()) { if (params.getBool(INCLUDE_INDEX_FIELD_FLAGS, true)) { Document doc = getFirstLiveDoc(terms, reader); if (doc != null) { // Found a document with this field try { IndexableField fld = doc.getField(fieldName); if (fld != null) { fieldMap.add("index", getFieldFlags(fld)); } else { // it is a non-stored field... fieldMap.add("index", "(unstored field)"); } } catch (Exception ex) { log.warn("error reading field: {}", fieldName); } } } fieldMap.add("docs", terms.getDocCount()); } if (fields != null && (fields.contains(fieldName) || fields.contains("*"))) { getDetailedFieldInfo(req, fieldName, fieldMap); } // Add the field finfo.add(fieldName, fieldMap); } return finfo; } // Just get a document with the term in it, the first one will do! // Is there a better way to do this? Shouldn't actually be very costly // to do it this way. private static Document getFirstLiveDoc(Terms terms, LeafReader reader) throws IOException { PostingsEnum postingsEnum = null; TermsEnum termsEnum = terms.iterator(); BytesRef text; // Deal with the chance that the first bunch of terms are in deleted documents. Is there a // better way? for (int idx = 0; idx < 1000 && postingsEnum == null; ++idx) { text = termsEnum.next(); // Ran off the end of the terms enum without finding any live docs with that field in them. if (text == null) { return null; } postingsEnum = termsEnum.postings(postingsEnum, PostingsEnum.NONE); final Bits liveDocs = reader.getLiveDocs(); if (postingsEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { if (liveDocs != null && liveDocs.get(postingsEnum.docID())) { continue; } return reader.document(postingsEnum.docID()); } } return null; } /** Return info from the index */ private static SimpleOrderedMap getSchemaInfo(IndexSchema schema) { Map> typeusemap = new TreeMap<>(); Map fields = new TreeMap<>(); SchemaField uniqueField = schema.getUniqueKeyField(); for (SchemaField f : schema.getFields().values()) { populateFieldInfo(schema, typeusemap, fields, uniqueField, f); } Map dynamicFields = new TreeMap<>(); for (SchemaField f : schema.getDynamicFieldPrototypes()) { populateFieldInfo(schema, typeusemap, dynamicFields, uniqueField, f); } SimpleOrderedMap types = new SimpleOrderedMap<>(); Map sortedTypes = new TreeMap<>(schema.getFieldTypes()); for (FieldType ft : sortedTypes.values()) { SimpleOrderedMap field = new SimpleOrderedMap<>(); field.add("fields", typeusemap.get(ft.getTypeName())); field.add("tokenized", ft.isTokenized()); field.add("className", ft.getClass().getName()); field.add("indexAnalyzer", getAnalyzerInfo(ft.getIndexAnalyzer())); field.add("queryAnalyzer", getAnalyzerInfo(ft.getQueryAnalyzer())); field.add("similarity", getSimilarityInfo(ft.getSimilarity())); types.add(ft.getTypeName(), field); } // Must go through this to maintain binary compatbility. Putting a TreeMap into a resp leads to // casting errors SimpleOrderedMap finfo = new SimpleOrderedMap<>(); SimpleOrderedMap fieldsSimple = new SimpleOrderedMap<>(); for (Map.Entry ent : fields.entrySet()) { fieldsSimple.add(ent.getKey(), ent.getValue()); } finfo.add("fields", fieldsSimple); SimpleOrderedMap dynamicSimple = new SimpleOrderedMap<>(); for (Map.Entry ent : dynamicFields.entrySet()) { dynamicSimple.add(ent.getKey(), ent.getValue()); } finfo.add("dynamicFields", dynamicSimple); finfo.add("uniqueKeyField", null == uniqueField ? null : uniqueField.getName()); finfo.add("similarity", getSimilarityInfo(schema.getSimilarity())); finfo.add("types", types); return finfo; } private static SimpleOrderedMap getSimilarityInfo(Similarity similarity) { SimpleOrderedMap toReturn = new SimpleOrderedMap<>(); if (similarity != null) { toReturn.add("className", similarity.getClass().getName()); toReturn.add("details", similarity.toString()); } return toReturn; } private static SimpleOrderedMap getAnalyzerInfo(Analyzer analyzer) { SimpleOrderedMap aninfo = new SimpleOrderedMap<>(); aninfo.add("className", analyzer.getClass().getName()); if (analyzer instanceof TokenizerChain) { TokenizerChain tchain = (TokenizerChain) analyzer; CharFilterFactory[] cfiltfacs = tchain.getCharFilterFactories(); if (0 < cfiltfacs.length) { SimpleOrderedMap> cfilters = new SimpleOrderedMap<>(); for (CharFilterFactory cfiltfac : cfiltfacs) { Map tok = new HashMap<>(); String className = cfiltfac.getClass().getName(); tok.put("className", className); tok.put("args", cfiltfac.getOriginalArgs()); cfilters.add(className.substring(className.lastIndexOf('.') + 1), tok); } aninfo.add("charFilters", cfilters); } SimpleOrderedMap tokenizer = new SimpleOrderedMap<>(); TokenizerFactory tfac = tchain.getTokenizerFactory(); tokenizer.add("className", tfac.getClass().getName()); tokenizer.add("args", tfac.getOriginalArgs()); aninfo.add("tokenizer", tokenizer); TokenFilterFactory[] filtfacs = tchain.getTokenFilterFactories(); if (0 < filtfacs.length) { SimpleOrderedMap> filters = new SimpleOrderedMap<>(); for (TokenFilterFactory filtfac : filtfacs) { Map tok = new HashMap<>(); String className = filtfac.getClass().getName(); tok.put("className", className); tok.put("args", filtfac.getOriginalArgs()); filters.add(className.substring(className.lastIndexOf('.') + 1), tok); } aninfo.add("filters", filters); } } return aninfo; } private static void populateFieldInfo( IndexSchema schema, Map> typeusemap, Map fields, SchemaField uniqueField, SchemaField f) { FieldType ft = f.getType(); SimpleOrderedMap field = new SimpleOrderedMap<>(); field.add("type", ft.getTypeName()); field.add("flags", getFieldFlags(f)); if (f.isRequired()) { field.add("required", f.isRequired()); } if (f.getDefaultValue() != null) { field.add("default", f.getDefaultValue()); } if (f.equals(uniqueField)) { field.add("uniqueKey", true); } if (ft.getIndexAnalyzer().getPositionIncrementGap(f.getName()) != 0) { field.add("positionIncrementGap", ft.getIndexAnalyzer().getPositionIncrementGap(f.getName())); } field.add("copyDests", toListOfStringDests(schema.getCopyFieldsList(f.getName()))); field.add("copySources", schema.getCopySources(f.getName())); fields.put(f.getName(), field); List v = typeusemap.get(ft.getTypeName()); if (v == null) { v = new ArrayList<>(); } v.add(f.getName()); typeusemap.put(ft.getTypeName(), v); } // This method just gets the top-most level of information. This was conflated with getting // detailed info for *all* the fields, called from CoreAdminHandler etc. public static SimpleOrderedMap getIndexInfo(DirectoryReader reader) throws IOException { Directory dir = reader.directory(); SimpleOrderedMap indexInfo = new SimpleOrderedMap<>(); indexInfo.add("numDocs", reader.numDocs()); indexInfo.add("maxDoc", reader.maxDoc()); indexInfo.add("deletedDocs", reader.maxDoc() - reader.numDocs()); // TODO? Is this different then: IndexReader.getCurrentVersion( dir )? indexInfo.add("version", reader.getVersion()); indexInfo.add("segmentCount", reader.leaves().size()); indexInfo.add("current", closeSafe(reader::isCurrent)); indexInfo.add("hasDeletions", reader.hasDeletions()); indexInfo.add("directory", dir); IndexCommit indexCommit = reader.getIndexCommit(); String segmentsFileName = indexCommit.getSegmentsFileName(); indexInfo.add("segmentsFile", segmentsFileName); indexInfo.add("segmentsFileSizeInBytes", getSegmentsFileLength(indexCommit)); Map userData = indexCommit.getUserData(); indexInfo.add("userData", userData); String s = userData.get(SolrIndexWriter.COMMIT_TIME_MSEC_KEY); if (s != null) { indexInfo.add("lastModified", new Date(Long.parseLong(s))); } return indexInfo; } @FunctionalInterface interface IOSupplier { boolean get() throws IOException; } private static Object closeSafe(IOSupplier isCurrent) { try { return isCurrent.get(); } catch (AlreadyClosedException | IOException exception) { } return false; } /** * A helper method that attempts to determine the file length of the the segments file for the * specified IndexCommit from it's Directory. * *

If any sort of {@link IOException} occurs, this method will return "-1" and swallow the * exception since this may be normal if the IndexCommit is no longer "on disk". The specific type * of the Exception will affect how severely it is logged: {@link NoSuchFileException} is * considered more "acceptible" then other types of IOException which may indicate an actual * problem with the Directory. */ private static long getSegmentsFileLength(IndexCommit commit) { try { return commit.getDirectory().fileLength(commit.getSegmentsFileName()); } catch (NoSuchFileException okException) { log.debug( "Unable to determine the (optional) fileSize for the current IndexReader's segments file because it is " + "no longer in the Directory, this can happen if there are new commits since the Reader was opened", okException); } catch (IOException strangeException) { log.warn( "Ignoring IOException wile attempting to determine the (optional) fileSize stat for the current IndexReader's segments file", strangeException); } return -1; } // Get terribly detailed information about a particular field. This is a very expensive call, use // it with caution especially on large indexes! private static void getDetailedFieldInfo( SolrQueryRequest req, String field, SimpleOrderedMap fieldMap) throws IOException { SolrParams params = req.getParams(); final int numTerms = params.getInt(NUMTERMS, DEFAULT_COUNT); TopTermQueue tiq = new TopTermQueue(numTerms + 1); // Something to collect the top N terms in. final CharsRefBuilder spare = new CharsRefBuilder(); Terms terms = MultiTerms.getTerms(req.getSearcher().getIndexReader(), field); if (terms == null) { // field does not exist return; } TermsEnum termsEnum = terms.iterator(); BytesRef text; int[] buckets = new int[HIST_ARRAY_SIZE]; while ((text = termsEnum.next()) != null) { ++tiq.distinctTerms; // This calculation seems odd, but it gives the same results as it used to. int freq = termsEnum.docFreq(); int slot = 32 - Integer.numberOfLeadingZeros(Math.max(0, freq - 1)); buckets[slot] = buckets[slot] + 1; if (numTerms > 0 && freq > tiq.minFreq) { spare.copyUTF8Bytes(text); String t = spare.toString(); tiq.add(new TopTermQueue.TermInfo(new Term(field, t), termsEnum.docFreq())); if (tiq.size() > numTerms) { // if tiq full tiq.pop(); // remove lowest in tiq tiq.minFreq = tiq.getTopTermInfo().docFreq; } } } tiq.histogram.add(buckets); fieldMap.add("distinct", tiq.distinctTerms); // Include top terms fieldMap.add("topTerms", tiq.toNamedList(req.getSearcher().getSchema())); // Add a histogram fieldMap.add("histogram", tiq.histogram.toNamedList()); } private static List toListOfStrings(SchemaField[] raw) { List result = new ArrayList<>(raw.length); for (SchemaField f : raw) { result.add(f.getName()); } return result; } private static List toListOfStringDests(List raw) { List result = new ArrayList<>(raw.size()); for (CopyField f : raw) { result.add(f.getDestination().getName()); } return result; } //////////////////////// SolrInfoMBeans methods ////////////////////// @Override public String getDescription() { return "Lucene Index Browser. Inspired and modeled after Luke: https://code.google.com/archive/p/luke/"; } @Override public Category getCategory() { return Category.ADMIN; } /////////////////////////////////////////////////////////////////////////////////////// static class TermHistogram { int _maxBucket = -1; int _buckets[] = new int[HIST_ARRAY_SIZE]; public void add(int[] buckets) { for (int idx = 0; idx < buckets.length; ++idx) { if (buckets[idx] != 0) _maxBucket = idx; } for (int idx = 0; idx <= _maxBucket; ++idx) { _buckets[idx] = buckets[idx]; } } // TODO? should this be a list or a map? public NamedList toNamedList() { NamedList nl = new NamedList<>(); for (int bucket = 0; bucket <= _maxBucket; bucket++) { nl.add("" + (1 << bucket), _buckets[bucket]); } return nl; } } /** Private internal class that counts up frequent terms */ private static class TopTermQueue extends PriorityQueue { static class TermInfo { TermInfo(Term t, int df) { term = t; docFreq = df; } int docFreq; Term term; } public int minFreq = 0; public int distinctTerms = 0; public TermHistogram histogram; TopTermQueue(int size) { super(size); histogram = new TermHistogram(); } @Override protected final boolean lessThan(TermInfo a, TermInfo b) { return a.docFreq < b.docFreq; } /** This is a destructive call... the queue is empty at the end */ public NamedList toNamedList(IndexSchema schema) { // reverse the list.. List aslist = new ArrayList<>(); while (size() > 0) { aslist.add(0, pop()); } NamedList list = new NamedList<>(); for (TermInfo i : aslist) { String txt = i.term.text(); SchemaField ft = schema.getFieldOrNull(i.term.field()); if (ft != null) { txt = ft.getType().indexedToReadable(txt); } list.add(txt, i.docFreq); } return list; } public TermInfo getTopTermInfo() { return top(); } } }