All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.lucene.index.BaseCompoundFormatTestCase Maven / Gradle / Ivy

There is a newer version: 10.1.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.index;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Random;

import org.apache.lucene.codecs.Codec;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StoredField;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FilterDirectory;
import org.apache.lucene.store.FlushInfo;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.store.MockDirectoryWrapper;
import org.apache.lucene.store.NRTCachingDirectory;
import org.apache.lucene.util.StringHelper;
import org.apache.lucene.util.TestUtil;
import org.apache.lucene.util.Version;

/**
 * Abstract class to do basic tests for a compound format.
 * NOTE: This test focuses on the compound impl, nothing else.
 * The [stretch] goal is for this test to be
 * so thorough in testing a new CompoundFormat that if this
 * test passes, then all Lucene/Solr tests should also pass.  Ie,
 * if there is some bug in a given CompoundFormat that this
 * test fails to catch then this test needs to be improved! */
public abstract class BaseCompoundFormatTestCase extends BaseIndexFileFormatTestCase {
    
  // test that empty CFS is empty
  public void testEmpty() throws IOException {
    Directory dir = newDirectory();
    
    SegmentInfo si = newSegmentInfo(dir, "_123");
    si.setFiles(Collections.emptySet());
    si.getCodec().compoundFormat().write(dir, si, IOContext.DEFAULT);
    Directory cfs = si.getCodec().compoundFormat().getCompoundReader(dir, si, IOContext.DEFAULT);
    assertEquals(0, cfs.listAll().length);
    cfs.close();
    dir.close();
  }
  
  /** 
   * This test creates compound file based on a single file.
   * Files of different sizes are tested: 0, 1, 10, 100 bytes.
   */
  public void testSingleFile() throws IOException {
    int data[] = new int[] { 0, 1, 10, 100 };
    for (int i=0; i {
      cfs.createOutput("bogus", IOContext.DEFAULT);
    });

    cfs.close();
    dir.close();
  }
  
  // test that cfs reader is read-only
  public void testDeleteFileDisabled() throws IOException {
    final String testfile = "_123.test";

    Directory dir = newDirectory();
    IndexOutput out = dir.createOutput(testfile, IOContext.DEFAULT);
    out.writeInt(3);
    out.close();
 
    SegmentInfo si = newSegmentInfo(dir, "_123");
    si.setFiles(Collections.emptyList());
    si.getCodec().compoundFormat().write(dir, si, IOContext.DEFAULT);
    Directory cfs = si.getCodec().compoundFormat().getCompoundReader(dir, si, IOContext.DEFAULT);
    expectThrows(UnsupportedOperationException.class, () -> {
      cfs.deleteFile(testfile);
    });

    cfs.close();
    dir.close();
  }
  
  // test that cfs reader is read-only
  public void testRenameFileDisabled() throws IOException {
    final String testfile = "_123.test";

    Directory dir = newDirectory();
    IndexOutput out = dir.createOutput(testfile, IOContext.DEFAULT);
    out.writeInt(3);
    out.close();
 
    SegmentInfo si = newSegmentInfo(dir, "_123");
    si.setFiles(Collections.emptyList());
    si.getCodec().compoundFormat().write(dir, si, IOContext.DEFAULT);
    Directory cfs = si.getCodec().compoundFormat().getCompoundReader(dir, si, IOContext.DEFAULT);
    expectThrows(UnsupportedOperationException.class, () -> {
      cfs.renameFile(testfile, "bogus");
    });

    cfs.close();
    dir.close();
  }
  
  // test that cfs reader is read-only
  public void testSyncDisabled() throws IOException {
    final String testfile = "_123.test";

    Directory dir = newDirectory();
    IndexOutput out = dir.createOutput(testfile, IOContext.DEFAULT);
    out.writeInt(3);
    out.close();
 
    SegmentInfo si = newSegmentInfo(dir, "_123");
    si.setFiles(Collections.emptyList());
    si.getCodec().compoundFormat().write(dir, si, IOContext.DEFAULT);
    Directory cfs = si.getCodec().compoundFormat().getCompoundReader(dir, si, IOContext.DEFAULT);
    expectThrows(UnsupportedOperationException.class, () -> {
      cfs.sync(Collections.singleton(testfile));
    });

    cfs.close();
    dir.close();
  }
  
  // test that cfs reader is read-only
  public void testMakeLockDisabled() throws IOException {
    final String testfile = "_123.test";

    Directory dir = newDirectory();
    IndexOutput out = dir.createOutput(testfile, IOContext.DEFAULT);
    out.writeInt(3);
    out.close();
 
    SegmentInfo si = newSegmentInfo(dir, "_123");
    si.setFiles(Collections.emptyList());
    si.getCodec().compoundFormat().write(dir, si, IOContext.DEFAULT);
    Directory cfs = si.getCodec().compoundFormat().getCompoundReader(dir, si, IOContext.DEFAULT);
    expectThrows(UnsupportedOperationException.class, () -> {
      cfs.obtainLock("foobar");
    });

    cfs.close();
    dir.close();
  }
  
  /** 
   * This test creates a compound file based on a large number of files of
   * various length. The file content is generated randomly. The sizes range
   * from 0 to 1Mb. Some of the sizes are selected to test the buffering
   * logic in the file reading code. For this the chunk variable is set to
   * the length of the buffer used internally by the compound file logic.
   */
  public void testRandomFiles() throws IOException {
    Directory dir = newDirectory();
    // Setup the test segment
    String segment = "_123";
    int chunk = 1024; // internal buffer size used by the stream
    createRandomFile(dir, segment + ".zero", 0);
    createRandomFile(dir, segment + ".one", 1);
    createRandomFile(dir, segment + ".ten", 10);
    createRandomFile(dir, segment + ".hundred", 100);
    createRandomFile(dir, segment + ".big1", chunk);
    createRandomFile(dir, segment + ".big2", chunk - 1);
    createRandomFile(dir, segment + ".big3", chunk + 1);
    createRandomFile(dir, segment + ".big4", 3 * chunk);
    createRandomFile(dir, segment + ".big5", 3 * chunk - 1);
    createRandomFile(dir, segment + ".big6", 3 * chunk + 1);
    createRandomFile(dir, segment + ".big7", 1000 * chunk);
    
    List files = new ArrayList<>();
    for (String file : dir.listAll()) {
      if (file.startsWith(segment)) {
        files.add(file);
      }
    }
    
    SegmentInfo si = newSegmentInfo(dir, "_123");
    si.setFiles(files);
    si.getCodec().compoundFormat().write(dir, si, IOContext.DEFAULT);
    Directory cfs = si.getCodec().compoundFormat().getCompoundReader(dir, si, IOContext.DEFAULT);
    
    for (String file : files) {
      IndexInput check = dir.openInput(file, newIOContext(random()));
      IndexInput test = cfs.openInput(file, newIOContext(random()));
      assertSameStreams(file, check, test);
      assertSameSeekBehavior(file, check, test);
      test.close();
      check.close();
    }
    cfs.close();
    dir.close();
  }
  
  // Make sure we don't somehow use more than 1 descriptor
  // when reading a CFS with many subs:
  public void testManySubFiles() throws IOException {
    final MockDirectoryWrapper dir = newMockFSDirectory(createTempDir("CFSManySubFiles"));
    
    final int FILE_COUNT = atLeast(500);
    
    List files = new ArrayList<>();
    for (int fileIdx = 0; fileIdx < FILE_COUNT; fileIdx++) {
      String file = "_123." + fileIdx;
      files.add(file);
      IndexOutput out = dir.createOutput(file, newIOContext(random()));
      out.writeByte((byte) fileIdx);
      out.close();
    }
    
    assertEquals(0, dir.getFileHandleCount());
    
    SegmentInfo si = newSegmentInfo(dir, "_123");
    si.setFiles(files);
    si.getCodec().compoundFormat().write(dir, si, IOContext.DEFAULT);
    Directory cfs = si.getCodec().compoundFormat().getCompoundReader(dir, si, IOContext.DEFAULT);
    
    final IndexInput[] ins = new IndexInput[FILE_COUNT];
    for (int fileIdx = 0; fileIdx < FILE_COUNT; fileIdx++) {
      ins[fileIdx] = cfs.openInput("_123." + fileIdx, newIOContext(random()));
    }
    
    assertEquals(1, dir.getFileHandleCount());

    for (int fileIdx = 0; fileIdx < FILE_COUNT; fileIdx++) {
      assertEquals((byte) fileIdx, ins[fileIdx].readByte());
    }
    
    assertEquals(1, dir.getFileHandleCount());
    
    for(int fileIdx=0;fileIdx {
      cr.openInput("bogus", newIOContext(random()));
    });
    
    cr.close();
    dir.close();
  }
  
  public void testReadPastEOF() throws IOException {
    Directory dir = newDirectory();
    Directory cr = createLargeCFS(dir);
    IndexInput is = cr.openInput("_123.f2", newIOContext(random()));
    is.seek(is.length() - 10);
    byte b[] = new byte[100];
    is.readBytes(b, 0, 10);

    // Single byte read past end of file
    expectThrows(IOException.class, () -> {
      is.readByte();
    });

    is.seek(is.length() - 10);

    // Block read past end of file
    expectThrows(IOException.class, () -> {
      is.readBytes(b, 0, 50);
    });
    
    is.close();
    cr.close();
    dir.close();
  }
  
  /** Returns a new fake segment */
  protected static SegmentInfo newSegmentInfo(Directory dir, String name) {
    return new SegmentInfo(dir, Version.LATEST, name, 10000, false, Codec.getDefault(), Collections.emptyMap(), StringHelper.randomId(), new HashMap<>());
  }
  
  /** Creates a file of the specified size with random data. */
  protected static void createRandomFile(Directory dir, String name, int size) throws IOException {
    IndexOutput os = dir.createOutput(name, newIOContext(random()));
    Random rnd = random();
    for (int i=0; i 0) {
      int readLen = (int) Math.min(remainder, expectedBuffer.length);
      expected.readBytes(expectedBuffer, 0, readLen);
      test.readBytes(testBuffer, 0, readLen);
      assertEqualArrays(msg + ", remainder " + remainder, expectedBuffer, testBuffer, 0, readLen);
      remainder -= readLen;
    }
  }
  
  protected static void assertSameStreams(String msg, IndexInput expected, IndexInput actual, long seekTo) throws IOException {
    if (seekTo >= 0 && seekTo < expected.length()) {
      expected.seek(seekTo);
      actual.seek(seekTo);
      assertSameStreams(msg + ", seek(mid)", expected, actual);
    }
  }
  
  protected static void assertSameSeekBehavior(String msg, IndexInput expected, IndexInput actual) throws IOException {
    // seek to 0
    long point = 0;
    assertSameStreams(msg + ", seek(0)", expected, actual, point);
    
    // seek to middle
    point = expected.length() / 2l;
    assertSameStreams(msg + ", seek(mid)", expected, actual, point);
    
    // seek to end - 2
    point = expected.length() - 2;
    assertSameStreams(msg + ", seek(end-2)", expected, actual, point);
    
    // seek to end - 1
    point = expected.length() - 1;
    assertSameStreams(msg + ", seek(end-1)", expected, actual, point);
    
    // seek to the end
    point = expected.length();
    assertSameStreams(msg + ", seek(end)", expected, actual, point);
    
    // seek past end
    point = expected.length() + 1;
    assertSameStreams(msg + ", seek(end+1)", expected, actual, point);
  }
  
  protected static void assertEqualArrays(String msg, byte[] expected, byte[] test, int start, int len) {
    assertNotNull(msg + " null expected", expected);
    assertNotNull(msg + " null test", test);
    
    for (int i=start; i files = new ArrayList<>();
    for (int i = 0; i < 20; i++) {
      createSequenceFile(dir, "_123.f" + i, (byte) 0, 2000);
      files.add("_123.f" + i);
    }
    
    SegmentInfo si = newSegmentInfo(dir, "_123");
    si.setFiles(files);
    si.getCodec().compoundFormat().write(dir, si, IOContext.DEFAULT);
    Directory cfs = si.getCodec().compoundFormat().getCompoundReader(dir, si, IOContext.DEFAULT);
    return cfs;
  }

  @Override
  protected void addRandomFields(Document doc) {
    doc.add(new StoredField("foobar", TestUtil.randomSimpleString(random())));
  }

  @Override
  public void testMergeStability() throws Exception {
    assumeTrue("test does not work with CFS", true);
  }

  // LUCENE-6311: make sure the resource name inside a compound file confesses that it's inside a compound file
  public void testResourceNameInsideCompoundFile() throws Exception {
    Directory dir = newDirectory();
    String subFile = "_123.xyz";
    createSequenceFile(dir, subFile, (byte) 0, 10);
    
    SegmentInfo si = newSegmentInfo(dir, "_123");
    si.setFiles(Collections.singletonList(subFile));
    si.getCodec().compoundFormat().write(dir, si, IOContext.DEFAULT);
    Directory cfs = si.getCodec().compoundFormat().getCompoundReader(dir, si, IOContext.DEFAULT);
    IndexInput in = cfs.openInput(subFile, IOContext.DEFAULT);
    String desc = in.toString();
    assertTrue("resource description hides that it's inside a compound file: " + desc, desc.contains("[slice=" + subFile + "]"));
    cfs.close();
    dir.close();
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy