All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.lucene.store.instantiated.InstantiatedIndexReader Maven / Gradle / Ivy

The newest version!
package org.apache.lucene.store.instantiated;

/**
 * Copyright 2006 The Apache Software Foundation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

import java.io.IOException;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;

import org.apache.lucene.document.Document;
import org.apache.lucene.document.FieldSelector;
import org.apache.lucene.index.*;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BitVector;

/**
 * An InstantiatedIndexReader is not a snapshot in time, it is completely in
 * sync with the latest commit to the store!
 * 

* Consider using InstantiatedIndex as if it was immutable. * * @deprecated contrib/instantiated will be removed in 4.0; * you can use the memory codec to hold all postings in RAM */ @Deprecated public class InstantiatedIndexReader extends IndexReader { private final InstantiatedIndex index; public InstantiatedIndexReader(InstantiatedIndex index) { super(); this.index = index; } @Deprecated @Override public boolean isOptimized() { return true; } /** * An InstantiatedIndexReader is not a snapshot in time, it is completely in * sync with the latest commit to the store! * * @return output from {@link InstantiatedIndex#getVersion()} in associated instantiated index. */ @Override public long getVersion() { return index.getVersion(); } @Override public FieldInfos getFieldInfos() { return index.getFieldInfos(); } @Override public Directory directory() { throw new UnsupportedOperationException(); } /** * An InstantiatedIndexReader is always current! * * Check whether this IndexReader is still using the current (i.e., most * recently committed) version of the index. If a writer has committed any * changes to the index since this reader was opened, this will return * false, in which case you must open a new IndexReader in * order to see the changes. See the description of the autoCommit flag * which controls when the {@link IndexWriter} actually commits changes to the * index. * * @return always true * @throws CorruptIndexException if the index is corrupt * @throws IOException if there is a low-level IO error * @throws UnsupportedOperationException unless overridden in subclass */ @Override public boolean isCurrent() throws IOException { return true; } public InstantiatedIndex getIndex() { return index; } private BitVector uncommittedDeletedDocuments; private Map> uncommittedNormsByFieldNameAndDocumentNumber = null; private class NormUpdate { private int doc; private byte value; public NormUpdate(int doc, byte value) { this.doc = doc; this.value = value; } } @Override public int numDocs() { // todo i suppose this value could be cached, but array#length and bitvector#count is fast. int numDocs = getIndex().getDocumentsByNumber().length; if (uncommittedDeletedDocuments != null) { numDocs -= uncommittedDeletedDocuments.count(); } if (index.getDeletedDocuments() != null) { numDocs -= index.getDeletedDocuments().count(); } return numDocs; } @Override public int maxDoc() { return getIndex().getDocumentsByNumber().length; } @Override public boolean hasDeletions() { return index.getDeletedDocuments() != null || uncommittedDeletedDocuments != null; } @Override public boolean isDeleted(int n) { return (index.getDeletedDocuments() != null && index.getDeletedDocuments().get(n)) || (uncommittedDeletedDocuments != null && uncommittedDeletedDocuments.get(n)); } @Override protected void doDelete(int docNum) throws IOException { // dont delete if already deleted if ((index.getDeletedDocuments() != null && index.getDeletedDocuments().get(docNum)) || (uncommittedDeletedDocuments != null && uncommittedDeletedDocuments.get(docNum))) { return; } if (uncommittedDeletedDocuments == null) { uncommittedDeletedDocuments = new BitVector(maxDoc()); } uncommittedDeletedDocuments.set(docNum); } @Override protected void doUndeleteAll() throws IOException { // todo: read/write lock uncommittedDeletedDocuments = null; // todo: read/write unlock } @Override protected void doCommit(Map commitUserData) throws IOException { // todo: read/write lock // 1. update norms if (uncommittedNormsByFieldNameAndDocumentNumber != null) { for (Map.Entry> e : uncommittedNormsByFieldNameAndDocumentNumber.entrySet()) { byte[] norms = getIndex().getNormsByFieldNameAndDocumentNumber().get(e.getKey()); for (NormUpdate normUpdate : e.getValue()) { norms[normUpdate.doc] = normUpdate.value; } } uncommittedNormsByFieldNameAndDocumentNumber = null; } // 2. remove deleted documents if (uncommittedDeletedDocuments != null) { if (index.getDeletedDocuments() == null) { index.setDeletedDocuments(uncommittedDeletedDocuments); } else { for (int d = 0; d< uncommittedDeletedDocuments.size(); d++) { if (uncommittedDeletedDocuments.get(d)) { index.getDeletedDocuments().set(d); } } } uncommittedDeletedDocuments = null; } // todo unlock read/writelock } @Override protected void doClose() throws IOException { // ignored // todo perhaps release all associated instances? } /** * Return the {@link org.apache.lucene.document.Document} at the nth * position.

* Warning! * The resulting document is the actual stored document instance * and not a deserialized clone as retuned by an IndexReader * over a {@link org.apache.lucene.store.Directory}. * I.e., if you need to touch the document, clone it first! *

* This can also be seen as a feature for live changes of stored values, * but be careful! Adding a field with an name unknown to the index * or to a field with previously no stored values will make * {@link org.apache.lucene.store.instantiated.InstantiatedIndexReader#getFieldInfos()} * out of sync, causing problems for instance when merging the * instantiated index to another index.

* This implementation ignores the field selector! All stored fields are always returned! *

* * @param n document number * @param fieldSelector ignored * @return The stored fields of the {@link org.apache.lucene.document.Document} at the nth position * @throws CorruptIndexException if the index is corrupt * @throws IOException if there is a low-level IO error * * @see org.apache.lucene.document.Fieldable * @see org.apache.lucene.document.FieldSelector * @see org.apache.lucene.document.SetBasedFieldSelector * @see org.apache.lucene.document.LoadFirstFieldSelector */ @Override public Document document(int n, FieldSelector fieldSelector) throws CorruptIndexException, IOException { return isDeleted(n) ? null : getIndex().getDocumentsByNumber()[n].getDocument(); } /** * never ever touch these values. it is the true values, unless norms have * been touched. */ @Override public byte[] norms(String field) throws IOException { byte[] norms = getIndex().getNormsByFieldNameAndDocumentNumber().get(field); if (norms == null) { return new byte[0]; // todo a static final zero length attribute? } if (uncommittedNormsByFieldNameAndDocumentNumber != null) { norms = norms.clone(); List updated = uncommittedNormsByFieldNameAndDocumentNumber.get(field); if (updated != null) { for (NormUpdate normUpdate : updated) { norms[normUpdate.doc] = normUpdate.value; } } } return norms; } @Override public void norms(String field, byte[] bytes, int offset) throws IOException { byte[] norms = getIndex().getNormsByFieldNameAndDocumentNumber().get(field); if (norms == null) { return; } System.arraycopy(norms, 0, bytes, offset, norms.length); } @Override protected void doSetNorm(int doc, String field, byte value) throws IOException { if (uncommittedNormsByFieldNameAndDocumentNumber == null) { uncommittedNormsByFieldNameAndDocumentNumber = new HashMap>(getIndex().getNormsByFieldNameAndDocumentNumber().size()); } List list = uncommittedNormsByFieldNameAndDocumentNumber.get(field); if (list == null) { list = new LinkedList(); uncommittedNormsByFieldNameAndDocumentNumber.put(field, list); } list.add(new NormUpdate(doc, value)); } @Override public int docFreq(Term t) throws IOException { InstantiatedTerm term = getIndex().findTerm(t); if (term == null) { return 0; } else { return term.getAssociatedDocuments().length; } } @Override public TermEnum terms() throws IOException { return new InstantiatedTermEnum(this); } @Override public TermEnum terms(Term t) throws IOException { InstantiatedTerm it = getIndex().findTerm(t); if (it != null) { return new InstantiatedTermEnum(this, it.getTermIndex()); } else { int startPos = Arrays.binarySearch(index.getOrderedTerms(), t, InstantiatedTerm.termComparator); if (startPos < 0) { startPos = -1 - startPos; } return new InstantiatedTermEnum(this, startPos); } } @Override public TermDocs termDocs() throws IOException { return new InstantiatedTermDocs(this); } @Override public TermDocs termDocs(Term term) throws IOException { if (term == null) { return new InstantiatedAllTermDocs(this); } else { InstantiatedTermDocs termDocs = new InstantiatedTermDocs(this); termDocs.seek(term); return termDocs; } } @Override public TermPositions termPositions() throws IOException { return new InstantiatedTermPositions(this); } @Override public TermFreqVector[] getTermFreqVectors(int docNumber) throws IOException { InstantiatedDocument doc = getIndex().getDocumentsByNumber()[docNumber]; if (doc.getVectorSpace() == null) { return null; } TermFreqVector[] ret = new TermFreqVector[doc.getVectorSpace().size()]; Iterator it = doc.getVectorSpace().keySet().iterator(); for (int i = 0; i < ret.length; i++) { ret[i] = new InstantiatedTermPositionVector(getIndex().getDocumentsByNumber()[docNumber], it.next()); } return ret; } @Override public TermFreqVector getTermFreqVector(int docNumber, String field) throws IOException { InstantiatedDocument doc = getIndex().getDocumentsByNumber()[docNumber]; if (doc.getVectorSpace() == null || doc.getVectorSpace().get(field) == null) { return null; } else { return new InstantiatedTermPositionVector(doc, field); } } @Override public void getTermFreqVector(int docNumber, String field, TermVectorMapper mapper) throws IOException { InstantiatedDocument doc = getIndex().getDocumentsByNumber()[docNumber]; if (doc.getVectorSpace() != null && doc.getVectorSpace().get(field) == null) { List tv = doc.getVectorSpace().get(field); mapper.setExpectations(field, tv.size(), true, true); for (InstantiatedTermDocumentInformation tdi : tv) { mapper.map(tdi.getTerm().text(), tdi.getTermPositions().length, tdi.getTermOffsets(), tdi.getTermPositions()); } } } @Override public void getTermFreqVector(int docNumber, TermVectorMapper mapper) throws IOException { InstantiatedDocument doc = getIndex().getDocumentsByNumber()[docNumber]; for (Map.Entry> e : doc.getVectorSpace().entrySet()) { mapper.setExpectations(e.getKey(), e.getValue().size(), true, true); for (InstantiatedTermDocumentInformation tdi : e.getValue()) { mapper.map(tdi.getTerm().text(), tdi.getTermPositions().length, tdi.getTermOffsets(), tdi.getTermPositions()); } } } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy