All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.lucene.codecs.simpletext.SimpleTextCompoundFormat Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.codecs.simpletext;

import java.io.FileNotFoundException;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.text.DecimalFormat;
import java.text.DecimalFormatSymbols;
import java.text.ParseException;
import java.util.Arrays;
import java.util.Collections;
import java.util.Locale;
import java.util.Set;
import org.apache.lucene.codecs.CompoundDirectory;
import org.apache.lucene.codecs.CompoundFormat;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.store.ReadAdvice;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
import org.apache.lucene.util.StringHelper;

/**
 * plain text compound format.
 *
 * 

FOR RECREATIONAL USE ONLY * * @lucene.experimental */ public class SimpleTextCompoundFormat extends CompoundFormat { /** Sole constructor. */ public SimpleTextCompoundFormat() {} @Override public CompoundDirectory getCompoundReader(Directory dir, SegmentInfo si) throws IOException { String dataFile = IndexFileNames.segmentFileName(si.name, "", DATA_EXTENSION); final IndexInput in = dir.openInput(dataFile, IOContext.DEFAULT.withReadAdvice(ReadAdvice.NORMAL)); BytesRefBuilder scratch = new BytesRefBuilder(); // first get to TOC: DecimalFormat df = new DecimalFormat(OFFSETPATTERN, DecimalFormatSymbols.getInstance(Locale.ROOT)); long pos = in.length() - TABLEPOS.length - OFFSETPATTERN.length() - 1; in.seek(pos); SimpleTextUtil.readLine(in, scratch); assert StringHelper.startsWith(scratch.get(), TABLEPOS); long tablePos = -1; try { tablePos = df.parse(stripPrefix(scratch, TABLEPOS)).longValue(); } catch (ParseException e) { throw new CorruptIndexException( "can't parse CFS trailer, got: " + scratch.get().utf8ToString(), in, e); } // seek to TOC and read it in.seek(tablePos); SimpleTextUtil.readLine(in, scratch); assert StringHelper.startsWith(scratch.get(), TABLE); int numEntries = Integer.parseInt(stripPrefix(scratch, TABLE)); final String[] fileNames = new String[numEntries]; final long[] startOffsets = new long[numEntries]; final long[] endOffsets = new long[numEntries]; for (int i = 0; i < numEntries; i++) { SimpleTextUtil.readLine(in, scratch); assert StringHelper.startsWith(scratch.get(), TABLENAME); fileNames[i] = si.name + IndexFileNames.stripSegmentName(stripPrefix(scratch, TABLENAME)); if (i > 0) { // files must be unique and in sorted order assert fileNames[i].compareTo(fileNames[i - 1]) > 0; } SimpleTextUtil.readLine(in, scratch); assert StringHelper.startsWith(scratch.get(), TABLESTART); startOffsets[i] = Long.parseLong(stripPrefix(scratch, TABLESTART)); SimpleTextUtil.readLine(in, scratch); assert StringHelper.startsWith(scratch.get(), TABLEEND); endOffsets[i] = Long.parseLong(stripPrefix(scratch, TABLEEND)); } return new CompoundDirectory() { private int getIndex(String name) throws IOException { int index = Arrays.binarySearch(fileNames, name); if (index < 0) { throw new FileNotFoundException( "No sub-file found (fileName=" + name + " files: " + Arrays.toString(fileNames) + ")"); } return index; } @Override public String[] listAll() throws IOException { ensureOpen(); return fileNames.clone(); } @Override public long fileLength(String name) throws IOException { ensureOpen(); int index = getIndex(name); return endOffsets[index] - startOffsets[index]; } @Override public IndexInput openInput(String name, IOContext context) throws IOException { ensureOpen(); int index = getIndex(name); return in.slice( name, startOffsets[index], endOffsets[index] - startOffsets[index], context.readAdvice()); } @Override public void close() throws IOException { in.close(); } @Override public Set getPendingDeletions() throws IOException { return Collections.emptySet(); } @Override public void checkIntegrity() throws IOException { // No checksums for SimpleText } }; } @Override public void write(Directory dir, SegmentInfo si, IOContext context) throws IOException { String dataFile = IndexFileNames.segmentFileName(si.name, "", DATA_EXTENSION); int numFiles = si.files().size(); String[] names = si.files().toArray(new String[numFiles]); Arrays.sort(names); long[] startOffsets = new long[numFiles]; long[] endOffsets = new long[numFiles]; BytesRefBuilder scratch = new BytesRefBuilder(); try (IndexOutput out = dir.createOutput(dataFile, context)) { for (int i = 0; i < names.length; i++) { // write header for file SimpleTextUtil.write(out, HEADER); SimpleTextUtil.write(out, names[i], scratch); SimpleTextUtil.writeNewline(out); // write bytes for file startOffsets[i] = out.getFilePointer(); try (IndexInput in = dir.openInput(names[i], IOContext.READONCE)) { out.copyBytes(in, in.length()); } endOffsets[i] = out.getFilePointer(); } long tocPos = out.getFilePointer(); // write CFS table SimpleTextUtil.write(out, TABLE); SimpleTextUtil.write(out, Integer.toString(numFiles), scratch); SimpleTextUtil.writeNewline(out); for (int i = 0; i < names.length; i++) { SimpleTextUtil.write(out, TABLENAME); SimpleTextUtil.write(out, names[i], scratch); SimpleTextUtil.writeNewline(out); SimpleTextUtil.write(out, TABLESTART); SimpleTextUtil.write(out, Long.toString(startOffsets[i]), scratch); SimpleTextUtil.writeNewline(out); SimpleTextUtil.write(out, TABLEEND); SimpleTextUtil.write(out, Long.toString(endOffsets[i]), scratch); SimpleTextUtil.writeNewline(out); } DecimalFormat df = new DecimalFormat(OFFSETPATTERN, DecimalFormatSymbols.getInstance(Locale.ROOT)); SimpleTextUtil.write(out, TABLEPOS); SimpleTextUtil.write(out, df.format(tocPos), scratch); SimpleTextUtil.writeNewline(out); } } // helper method to strip strip away 'prefix' from 'scratch' and return as String private String stripPrefix(BytesRefBuilder scratch, BytesRef prefix) { return new String( scratch.bytes(), prefix.length, scratch.length() - prefix.length, StandardCharsets.UTF_8); } /** Extension of compound file */ static final String DATA_EXTENSION = "scf"; static final BytesRef HEADER = new BytesRef("cfs entry for: "); static final BytesRef TABLE = new BytesRef("table of contents, size: "); static final BytesRef TABLENAME = new BytesRef(" filename: "); static final BytesRef TABLESTART = new BytesRef(" start: "); static final BytesRef TABLEEND = new BytesRef(" end: "); static final BytesRef TABLEPOS = new BytesRef("table of contents begins at offset: "); static final String OFFSETPATTERN; static { int numDigits = Long.toString(Long.MAX_VALUE).length(); char[] pattern = new char[numDigits]; Arrays.fill(pattern, '0'); OFFSETPATTERN = new String(pattern); } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy