All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.browseengine.bobo.facets.data.MultiValueWithWeightFacetDataCache Maven / Gradle / Ivy

There is a newer version: 3.1.2
Show newest version
/**
 * This software is licensed to you under the Apache License, Version 2.0 (the
 * "Apache License").
 *
 * LinkedIn's contributions are made under the Apache License. If you contribute
 * to the Software, the contributions will be deemed to have been made under the
 * Apache License, unless you expressly indicate otherwise. Please do not make any
 * contributions that would be inconsistent with the Apache License.
 *
 * You may obtain a copy of the Apache License at http://www.apache.org/licenses/LICENSE-2.0
 * Unless required by applicable law or agreed to in writing, this software
 * distributed under the Apache License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the Apache
 * License for the specific language governing permissions and limitations for the
 * software governed under the Apache License.
 *
 * © 2012 LinkedIn Corp. All Rights Reserved.  
 */

package com.browseengine.bobo.facets.data;

import it.unimi.dsi.fastutil.ints.IntArrayList;

import java.io.IOException;

import org.apache.log4j.Logger;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermDocs;
import org.apache.lucene.index.TermEnum;
import org.apache.lucene.index.TermPositions;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.util.OpenBitSet;

import com.browseengine.bobo.api.BoboIndexReader;
import com.browseengine.bobo.api.BoboIndexReader.WorkArea;
import com.browseengine.bobo.facets.range.MultiDataCacheBuilder;
import com.browseengine.bobo.sort.DocComparator;
import com.browseengine.bobo.sort.DocComparatorSource;
import com.browseengine.bobo.util.BigIntBuffer;
import com.browseengine.bobo.util.BigNestedIntArray;
import com.browseengine.bobo.util.BigNestedIntArray.BufferedLoader;
import com.browseengine.bobo.util.BigNestedIntArray.Loader;
import com.browseengine.bobo.util.StringArrayComparator;

public class MultiValueWithWeightFacetDataCache extends MultiValueFacetDataCache
{
  private static final long serialVersionUID = 1L;

  public final BigNestedIntArray _weightArray;

  public MultiValueWithWeightFacetDataCache()
  {
    super();
    _weightArray = new BigNestedIntArray();
  }

  /**
   * loads multi-value facet data. This method uses a workarea to prepare loading.
   * @param fieldName
   * @param reader
   * @param listFactory
   * @param workArea
   * @throws IOException
   */
  public void load(String fieldName,
                   IndexReader reader,
                   TermListFactory listFactory,
                   WorkArea workArea) throws IOException
  {
    long t0 = System.currentTimeMillis();
    int maxdoc = reader.maxDoc();
    BufferedLoader loader = getBufferedLoader(maxdoc, workArea);
    BufferedLoader weightLoader = getBufferedLoader(maxdoc, null);

    TermEnum tenum = null;
    TermDocs tdoc = null;
    TermValueList list = (listFactory == null ? (TermValueList)new TermStringList() : listFactory.createTermList());
    IntArrayList minIDList = new IntArrayList();
    IntArrayList maxIDList = new IntArrayList();
    IntArrayList freqList = new IntArrayList();
    OpenBitSet bitset = new OpenBitSet(maxdoc + 1);
    int negativeValueCount = getNegativeValueCount(reader, fieldName.intern()); 
    int t = 0; // current term number
    list.add(null);
    minIDList.add(-1);
    maxIDList.add(-1);
    freqList.add(0);
    t++;
    
    _overflow = false;

    String pre = null;

    int df = 0;
    int minID = -1;
    int maxID = -1;
    int valId = 0;

    try
    {
      tdoc = reader.termDocs();
      tenum = reader.terms(new Term(fieldName, ""));
      if (tenum != null)
      {
        do
        {
          Term term = tenum.term();
          if (term == null || !fieldName.equals(term.field()))
            break;

          String val = term.text();

          if (val != null)
          {
            int weight = 0;
            String[] split = val.split("\u0000");
            if (split.length > 1)
            {
              val = split[0];
              weight = Integer.parseInt(split[split.length-1]);
            }
            if (pre == null || !val.equals(pre))
            {
              if (pre != null)
              {
                freqList.add(df);
                minIDList.add(minID);
                maxIDList.add(maxID);
              }

              list.add(val);

              df = 0;
              minID = -1;
              maxID = -1;
              valId = (t - 1 < negativeValueCount) ? (negativeValueCount - t + 1) : t;
              t++;
            }

            tdoc.seek(tenum);
            if(tdoc.next())
            {
              df++;
              int docid = tdoc.doc();

              if(!loader.add(docid, valId)) logOverflow(fieldName);
              else weightLoader.add(docid, weight);

              if (docid < minID) minID = docid;
              bitset.fastSet(docid);
              while(tdoc.next())
              {
                df++;
                docid = tdoc.doc();
                
                if(!loader.add(docid, valId)) logOverflow(fieldName);
                else weightLoader.add(docid, weight);

                bitset.fastSet(docid);
              }
              if (docid > maxID) maxID = docid;
            }
            pre = val;
          }

        }
        while (tenum.next());
        if (pre != null)
        {
          freqList.add(df);
          minIDList.add(minID);
          maxIDList.add(maxID);
        }
      }
    }
    finally
    {
      try
      {
        if (tdoc != null)
        {
          tdoc.close();
        }
      }
      finally
      {
        if (tenum != null)
        {
          tenum.close();
        }
      }
    }

    list.seal();

    try
    {
      _nestedArray.load(maxdoc + 1, loader);
      _weightArray.load(maxdoc + 1, weightLoader);
    }
    catch (IOException e)
    {
      throw e;
    }
    catch (Exception e)
    {
      throw new RuntimeException("failed to load due to " + e.toString(), e);
    }
    
    this.valArray = list;
    this.freqs = freqList.toIntArray();
    this.minIDs = minIDList.toIntArray();
    this.maxIDs = maxIDList.toIntArray();

    int doc = 0;
    while (doc <= maxdoc && !_nestedArray.contains(doc, 0, true))
    {
      ++doc;
    }
    if (doc <= maxdoc)
    {
      this.minIDs[0] = doc;
      doc = maxdoc;
      while (doc > 0 && !_nestedArray.contains(doc, 0, true))
      {
        --doc;
      }
      if (doc > 0)
      {
        this.maxIDs[0] = doc;
      }
    }
    this.freqs[0] = maxdoc + 1 - (int) bitset.cardinality();   
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy