All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.lucene.index.FreqProxFields Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.index;


import java.io.IOException;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;

import org.apache.lucene.index.FreqProxTermsWriterPerField.FreqProxPostingsArray;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;

/** Implements limited (iterators only, no stats) {@link
 *  Fields} interface over the in-RAM buffered
 *  fields/terms/postings, to flush postings through the
 *  PostingsFormat. */

class FreqProxFields extends Fields {
  final Map fields = new LinkedHashMap<>();

  public FreqProxFields(List fieldList) {
    // NOTE: fields are already sorted by field name
    for(FreqProxTermsWriterPerField field : fieldList) {
      fields.put(field.fieldInfo.name, field);
    }
  }

  public Iterator iterator() {
    return fields.keySet().iterator();
  }

  @Override
  public Terms terms(String field) throws IOException {
    FreqProxTermsWriterPerField perField = fields.get(field);
    return perField == null ? null : new FreqProxTerms(perField);
  }

  @Override
  public int size() {
    //return fields.size();
    throw new UnsupportedOperationException();
  }

  private static class FreqProxTerms extends Terms {
    final FreqProxTermsWriterPerField terms;

    public FreqProxTerms(FreqProxTermsWriterPerField terms) {
      this.terms = terms;
    }

    @Override
    public TermsEnum iterator() {
      FreqProxTermsEnum termsEnum = new FreqProxTermsEnum(terms);
      termsEnum.reset();
      return termsEnum;
    }

    @Override
    public long size() {
      //return terms.termsHashPerField.bytesHash.size();
      throw new UnsupportedOperationException();
    }

    @Override
    public long getSumTotalTermFreq() {
      //return terms.sumTotalTermFreq;
      throw new UnsupportedOperationException();
    }

    @Override
    public long getSumDocFreq() {
      //return terms.sumDocFreq;
      throw new UnsupportedOperationException();
    }

    @Override
    public int getDocCount() {
      //return terms.docCount;
      throw new UnsupportedOperationException();
    }
  
    @Override
    public boolean hasFreqs() {
      return terms.fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;      
    }

    @Override
    public boolean hasOffsets() {
      // NOTE: the in-memory buffer may have indexed offsets
      // because that's what FieldInfo said when we started,
      // but during indexing this may have been downgraded:
      return terms.fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;      
    }
  
    @Override
    public boolean hasPositions() {
      // NOTE: the in-memory buffer may have indexed positions
      // because that's what FieldInfo said when we started,
      // but during indexing this may have been downgraded:
      return terms.fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
    }
  
    @Override
    public boolean hasPayloads() {
      return terms.sawPayloads;
    }
  }

  private static class FreqProxTermsEnum extends TermsEnum {
    final FreqProxTermsWriterPerField terms;
    final int[] sortedTermIDs;
    final FreqProxPostingsArray postingsArray;
    final BytesRef scratch = new BytesRef();
    final int numTerms;
    int ord;

    public FreqProxTermsEnum(FreqProxTermsWriterPerField terms) {
      this.terms = terms;
      this.numTerms = terms.bytesHash.size();
      sortedTermIDs = terms.sortedTermIDs;
      assert sortedTermIDs != null;
      postingsArray = (FreqProxPostingsArray) terms.postingsArray;
    }

    public void reset() {
      ord = -1;
    }

    public SeekStatus seekCeil(BytesRef text) {
      // TODO: we could instead keep the BytesRefHash
      // intact so this is a hash lookup

      // binary search:
      int lo = 0;
      int hi = numTerms - 1;
      while (hi >= lo) {
        int mid = (lo + hi) >>> 1;
        int textStart = postingsArray.textStarts[sortedTermIDs[mid]];
        terms.bytePool.setBytesRef(scratch, textStart);
        int cmp = scratch.compareTo(text);
        if (cmp < 0) {
          lo = mid + 1;
        } else if (cmp > 0) {
          hi = mid - 1;
        } else {
          // found:
          ord = mid;
          assert term().compareTo(text) == 0;
          return SeekStatus.FOUND;
        }
      }

      // not found:
      ord = lo;
      if (ord >= numTerms) {
        return SeekStatus.END;
      } else {
        int textStart = postingsArray.textStarts[sortedTermIDs[ord]];
        terms.bytePool.setBytesRef(scratch, textStart);
        assert term().compareTo(text) > 0;
        return SeekStatus.NOT_FOUND;
      }
    }

    public void seekExact(long ord) {
      this.ord = (int) ord;
      int textStart = postingsArray.textStarts[sortedTermIDs[this.ord]];
      terms.bytePool.setBytesRef(scratch, textStart);
    }

    @Override
    public BytesRef next() {
      ord++;
      if (ord >= numTerms) {
        return null;
      } else {
        int textStart = postingsArray.textStarts[sortedTermIDs[ord]];
        terms.bytePool.setBytesRef(scratch, textStart);
        return scratch;
      }
    }

    @Override
    public BytesRef term() {
      return scratch;
    }

    @Override
    public long ord() {
      return ord;
    }

    @Override
    public int docFreq() {
      // We do not store this per-term, and we cannot
      // implement this at merge time w/o an added pass
      // through the postings:
      throw new UnsupportedOperationException();
    }

    @Override
    public long totalTermFreq() {
      // We do not store this per-term, and we cannot
      // implement this at merge time w/o an added pass
      // through the postings:
      throw new UnsupportedOperationException();
    }

    @Override
    public PostingsEnum postings(PostingsEnum reuse, int flags) {
      if (PostingsEnum.featureRequested(flags, PostingsEnum.POSITIONS)) {
        FreqProxPostingsEnum posEnum;

        if (!terms.hasProx) {
          // Caller wants positions but we didn't index them;
          // don't lie:
          throw new IllegalArgumentException("did not index positions");
        }

        if (!terms.hasOffsets && PostingsEnum.featureRequested(flags, PostingsEnum.OFFSETS)) {
          // Caller wants offsets but we didn't index them;
          // don't lie:
          throw new IllegalArgumentException("did not index offsets");
        }

        if (reuse instanceof FreqProxPostingsEnum) {
          posEnum = (FreqProxPostingsEnum) reuse;
          if (posEnum.postingsArray != postingsArray) {
            posEnum = new FreqProxPostingsEnum(terms, postingsArray);
          }
        } else {
          posEnum = new FreqProxPostingsEnum(terms, postingsArray);
        }
        posEnum.reset(sortedTermIDs[ord]);
        return posEnum;
      }

      FreqProxDocsEnum docsEnum;

      if (!terms.hasFreq && PostingsEnum.featureRequested(flags, PostingsEnum.FREQS)) {
        // Caller wants freqs but we didn't index them;
        // don't lie:
        throw new IllegalArgumentException("did not index freq");
      }

      if (reuse instanceof FreqProxDocsEnum) {
        docsEnum = (FreqProxDocsEnum) reuse;
        if (docsEnum.postingsArray != postingsArray) {
          docsEnum = new FreqProxDocsEnum(terms, postingsArray);
        }
      } else {
        docsEnum = new FreqProxDocsEnum(terms, postingsArray);
      }
      docsEnum.reset(sortedTermIDs[ord]);
      return docsEnum;
    }

    /**
     * Expert: Returns the TermsEnums internal state to position the TermsEnum
     * without re-seeking the term dictionary.
     * 

* NOTE: A seek by {@link TermState} might not capture the * {@link AttributeSource}'s state. Callers must maintain the * {@link AttributeSource} states separately * * @see TermState * @see #seekExact(BytesRef, TermState) */ public TermState termState() throws IOException { return new TermState() { @Override public void copyFrom(TermState other) { throw new UnsupportedOperationException(); } }; } } private static class FreqProxDocsEnum extends PostingsEnum { final FreqProxTermsWriterPerField terms; final FreqProxPostingsArray postingsArray; final ByteSliceReader reader = new ByteSliceReader(); final boolean readTermFreq; int docID = -1; int freq; boolean ended; int termID; public FreqProxDocsEnum(FreqProxTermsWriterPerField terms, FreqProxPostingsArray postingsArray) { this.terms = terms; this.postingsArray = postingsArray; this.readTermFreq = terms.hasFreq; } public void reset(int termID) { this.termID = termID; terms.initReader(reader, termID, 0); ended = false; docID = -1; } @Override public int docID() { return docID; } @Override public int freq() { // Don't lie here ... don't want codecs writings lots // of wasted 1s into the index: if (!readTermFreq) { throw new IllegalStateException("freq was not indexed"); } else { return freq; } } @Override public int nextPosition() throws IOException { return -1; } @Override public int startOffset() throws IOException { return -1; } @Override public int endOffset() throws IOException { return -1; } @Override public BytesRef getPayload() throws IOException { return null; } @Override public int nextDoc() throws IOException { if (docID == -1) { docID = 0; } if (reader.eof()) { if (ended) { return NO_MORE_DOCS; } else { ended = true; docID = postingsArray.lastDocIDs[termID]; if (readTermFreq) { freq = postingsArray.termFreqs[termID]; } } } else { int code = reader.readVInt(); if (!readTermFreq) { docID += code; } else { docID += code >>> 1; if ((code & 1) != 0) { freq = 1; } else { freq = reader.readVInt(); } } assert docID != postingsArray.lastDocIDs[termID]; } return docID; } @Override public int advance(int target) { throw new UnsupportedOperationException(); } @Override public long cost() { throw new UnsupportedOperationException(); } } private static class FreqProxPostingsEnum extends PostingsEnum { final FreqProxTermsWriterPerField terms; final FreqProxPostingsArray postingsArray; final ByteSliceReader reader = new ByteSliceReader(); final ByteSliceReader posReader = new ByteSliceReader(); final boolean readOffsets; int docID = -1; int freq; int pos; int startOffset; int endOffset; int posLeft; int termID; boolean ended; boolean hasPayload; BytesRefBuilder payload = new BytesRefBuilder(); public FreqProxPostingsEnum(FreqProxTermsWriterPerField terms, FreqProxPostingsArray postingsArray) { this.terms = terms; this.postingsArray = postingsArray; this.readOffsets = terms.hasOffsets; assert terms.hasProx; assert terms.hasFreq; } public void reset(int termID) { this.termID = termID; terms.initReader(reader, termID, 0); terms.initReader(posReader, termID, 1); ended = false; docID = -1; posLeft = 0; } @Override public int docID() { return docID; } @Override public int freq() { return freq; } @Override public int nextDoc() throws IOException { if (docID == -1) { docID = 0; } while (posLeft != 0) { nextPosition(); } if (reader.eof()) { if (ended) { return NO_MORE_DOCS; } else { ended = true; docID = postingsArray.lastDocIDs[termID]; freq = postingsArray.termFreqs[termID]; } } else { int code = reader.readVInt(); docID += code >>> 1; if ((code & 1) != 0) { freq = 1; } else { freq = reader.readVInt(); } assert docID != postingsArray.lastDocIDs[termID]; } posLeft = freq; pos = 0; startOffset = 0; return docID; } @Override public int advance(int target) { throw new UnsupportedOperationException(); } @Override public long cost() { throw new UnsupportedOperationException(); } @Override public int nextPosition() throws IOException { assert posLeft > 0; posLeft--; int code = posReader.readVInt(); pos += code >>> 1; if ((code & 1) != 0) { hasPayload = true; // has a payload payload.setLength(posReader.readVInt()); payload.grow(payload.length()); posReader.readBytes(payload.bytes(), 0, payload.length()); } else { hasPayload = false; } if (readOffsets) { startOffset += posReader.readVInt(); endOffset = startOffset + posReader.readVInt(); } return pos; } @Override public int startOffset() { if (!readOffsets) { throw new IllegalStateException("offsets were not indexed"); } return startOffset; } @Override public int endOffset() { if (!readOffsets) { throw new IllegalStateException("offsets were not indexed"); } return endOffset; } @Override public BytesRef getPayload() { if (hasPayload) { return payload.get(); } else { return null; } } } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy