All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.lucene.codecs.simpletext.SimpleTextSegmentInfoFormat Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.codecs.simpletext;

import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.text.ParseException;
import java.util.Arrays;
import java.util.Map;
import java.util.Set;
import org.apache.lucene.codecs.SegmentInfoFormat;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.IndexSorter;
import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.index.SortFieldProvider;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.store.ByteArrayDataInput;
import org.apache.lucene.store.ChecksumIndexInput;
import org.apache.lucene.store.DataOutput;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
import org.apache.lucene.util.CollectionUtil;
import org.apache.lucene.util.StringHelper;
import org.apache.lucene.util.Version;

/**
 * plain text segments file format.
 *
 * 

FOR RECREATIONAL USE ONLY * * @lucene.experimental */ public class SimpleTextSegmentInfoFormat extends SegmentInfoFormat { static final BytesRef SI_VERSION = new BytesRef(" version "); static final BytesRef SI_MIN_VERSION = new BytesRef(" min version "); static final BytesRef SI_DOCCOUNT = new BytesRef(" number of documents "); static final BytesRef SI_USECOMPOUND = new BytesRef(" uses compound file "); static final BytesRef SI_HAS_BLOCKS = new BytesRef(" has blocks "); static final BytesRef SI_NUM_DIAG = new BytesRef(" diagnostics "); static final BytesRef SI_DIAG_KEY = new BytesRef(" key "); static final BytesRef SI_DIAG_VALUE = new BytesRef(" value "); static final BytesRef SI_NUM_ATT = new BytesRef(" attributes "); static final BytesRef SI_ATT_KEY = new BytesRef(" key "); static final BytesRef SI_ATT_VALUE = new BytesRef(" value "); static final BytesRef SI_NUM_FILES = new BytesRef(" files "); static final BytesRef SI_FILE = new BytesRef(" file "); static final BytesRef SI_ID = new BytesRef(" id "); static final BytesRef SI_SORT = new BytesRef(" sort "); static final BytesRef SI_SORT_TYPE = new BytesRef(" type "); static final BytesRef SI_SORT_NAME = new BytesRef(" name "); static final BytesRef SI_SORT_BYTES = new BytesRef(" bytes "); public static final String SI_EXTENSION = "si"; @Override public SegmentInfo read( Directory directory, String segmentName, byte[] segmentID, IOContext context) throws IOException { BytesRefBuilder scratch = new BytesRefBuilder(); String segFileName = IndexFileNames.segmentFileName(segmentName, "", SimpleTextSegmentInfoFormat.SI_EXTENSION); try (ChecksumIndexInput input = directory.openChecksumInput(segFileName)) { SimpleTextUtil.readLine(input, scratch); assert StringHelper.startsWith(scratch.get(), SI_VERSION); final Version version; try { version = Version.parse(readString(SI_VERSION.length, scratch)); } catch (ParseException pe) { throw new CorruptIndexException( "unable to parse version string: " + pe.getMessage(), input, pe); } SimpleTextUtil.readLine(input, scratch); assert StringHelper.startsWith(scratch.get(), SI_MIN_VERSION); Version minVersion; try { String versionString = readString(SI_MIN_VERSION.length, scratch); if (versionString.equals("null")) { minVersion = null; } else { minVersion = Version.parse(versionString); } } catch (ParseException pe) { throw new CorruptIndexException( "unable to parse version string: " + pe.getMessage(), input, pe); } SimpleTextUtil.readLine(input, scratch); assert StringHelper.startsWith(scratch.get(), SI_DOCCOUNT); final int docCount = Integer.parseInt(readString(SI_DOCCOUNT.length, scratch)); SimpleTextUtil.readLine(input, scratch); assert StringHelper.startsWith(scratch.get(), SI_USECOMPOUND); final boolean isCompoundFile = Boolean.parseBoolean(readString(SI_USECOMPOUND.length, scratch)); SimpleTextUtil.readLine(input, scratch); assert StringHelper.startsWith(scratch.get(), SI_HAS_BLOCKS); final boolean hasBlocks = Boolean.parseBoolean(readString(SI_HAS_BLOCKS.length, scratch)); SimpleTextUtil.readLine(input, scratch); assert StringHelper.startsWith(scratch.get(), SI_NUM_DIAG); int numDiag = Integer.parseInt(readString(SI_NUM_DIAG.length, scratch)); Map diagnostics = CollectionUtil.newHashMap(numDiag); for (int i = 0; i < numDiag; i++) { SimpleTextUtil.readLine(input, scratch); assert StringHelper.startsWith(scratch.get(), SI_DIAG_KEY); String key = readString(SI_DIAG_KEY.length, scratch); SimpleTextUtil.readLine(input, scratch); assert StringHelper.startsWith(scratch.get(), SI_DIAG_VALUE); String value = readString(SI_DIAG_VALUE.length, scratch); diagnostics.put(key, value); } SimpleTextUtil.readLine(input, scratch); assert StringHelper.startsWith(scratch.get(), SI_NUM_ATT); int numAtt = Integer.parseInt(readString(SI_NUM_ATT.length, scratch)); Map attributes = CollectionUtil.newHashMap(numAtt); for (int i = 0; i < numAtt; i++) { SimpleTextUtil.readLine(input, scratch); assert StringHelper.startsWith(scratch.get(), SI_ATT_KEY); String key = readString(SI_ATT_KEY.length, scratch); SimpleTextUtil.readLine(input, scratch); assert StringHelper.startsWith(scratch.get(), SI_ATT_VALUE); String value = readString(SI_ATT_VALUE.length, scratch); attributes.put(key, value); } SimpleTextUtil.readLine(input, scratch); assert StringHelper.startsWith(scratch.get(), SI_NUM_FILES); int numFiles = Integer.parseInt(readString(SI_NUM_FILES.length, scratch)); Set files = CollectionUtil.newHashSet(numFiles); for (int i = 0; i < numFiles; i++) { SimpleTextUtil.readLine(input, scratch); assert StringHelper.startsWith(scratch.get(), SI_FILE); String fileName = readString(SI_FILE.length, scratch); files.add(fileName); } SimpleTextUtil.readLine(input, scratch); assert StringHelper.startsWith(scratch.get(), SI_ID); final byte[] id = SimpleTextUtil.fromBytesRefString(readString(SI_ID.length, scratch)).bytes; if (!Arrays.equals(segmentID, id)) { throw new CorruptIndexException( "file mismatch, expected: " + StringHelper.idToString(segmentID) + ", got: " + StringHelper.idToString(id), input); } SimpleTextUtil.readLine(input, scratch); assert StringHelper.startsWith(scratch.get(), SI_SORT); final int numSortFields = Integer.parseInt(readString(SI_SORT.length, scratch)); SortField[] sortField = new SortField[numSortFields]; for (int i = 0; i < numSortFields; ++i) { SimpleTextUtil.readLine(input, scratch); assert StringHelper.startsWith(scratch.get(), SI_SORT_NAME); final String provider = readString(SI_SORT_NAME.length, scratch); SimpleTextUtil.readLine(input, scratch); assert StringHelper.startsWith(scratch.get(), SI_SORT_TYPE); SimpleTextUtil.readLine(input, scratch); assert StringHelper.startsWith(scratch.get(), SI_SORT_BYTES); BytesRef serializedSort = SimpleTextUtil.fromBytesRefString(readString(SI_SORT_BYTES.length, scratch)); final ByteArrayDataInput bytes = new ByteArrayDataInput( serializedSort.bytes, serializedSort.offset, serializedSort.length); sortField[i] = SortFieldProvider.forName(provider).readSortField(bytes); assert bytes.eof(); } final Sort indexSort; if (sortField.length == 0) { indexSort = null; } else { indexSort = new Sort(sortField); } SimpleTextUtil.checkFooter(input); SegmentInfo info = new SegmentInfo( directory, version, minVersion, segmentName, docCount, isCompoundFile, hasBlocks, null, diagnostics, id, attributes, indexSort); info.setFiles(files); return info; } } private String readString(int offset, BytesRefBuilder scratch) { return new String(scratch.bytes(), offset, scratch.length() - offset, StandardCharsets.UTF_8); } @Override public void write(Directory dir, SegmentInfo si, IOContext ioContext) throws IOException { String segFileName = IndexFileNames.segmentFileName(si.name, "", SimpleTextSegmentInfoFormat.SI_EXTENSION); try (IndexOutput output = dir.createOutput(segFileName, ioContext)) { // Only add the file once we've successfully created it, else IFD assert can trip: si.addFile(segFileName); BytesRefBuilder scratch = new BytesRefBuilder(); SimpleTextUtil.write(output, SI_VERSION); SimpleTextUtil.write(output, si.getVersion().toString(), scratch); SimpleTextUtil.writeNewline(output); SimpleTextUtil.write(output, SI_MIN_VERSION); if (si.getMinVersion() == null) { SimpleTextUtil.write(output, "null", scratch); } else { SimpleTextUtil.write(output, si.getMinVersion().toString(), scratch); } SimpleTextUtil.writeNewline(output); SimpleTextUtil.write(output, SI_DOCCOUNT); SimpleTextUtil.write(output, Integer.toString(si.maxDoc()), scratch); SimpleTextUtil.writeNewline(output); SimpleTextUtil.write(output, SI_USECOMPOUND); SimpleTextUtil.write(output, Boolean.toString(si.getUseCompoundFile()), scratch); SimpleTextUtil.writeNewline(output); SimpleTextUtil.write(output, SI_HAS_BLOCKS); SimpleTextUtil.write(output, Boolean.toString(si.getHasBlocks()), scratch); SimpleTextUtil.writeNewline(output); Map diagnostics = si.getDiagnostics(); int numDiagnostics = diagnostics == null ? 0 : diagnostics.size(); SimpleTextUtil.write(output, SI_NUM_DIAG); SimpleTextUtil.write(output, Integer.toString(numDiagnostics), scratch); SimpleTextUtil.writeNewline(output); if (numDiagnostics > 0) { for (Map.Entry diagEntry : diagnostics.entrySet()) { SimpleTextUtil.write(output, SI_DIAG_KEY); SimpleTextUtil.write(output, diagEntry.getKey(), scratch); SimpleTextUtil.writeNewline(output); SimpleTextUtil.write(output, SI_DIAG_VALUE); SimpleTextUtil.write(output, diagEntry.getValue(), scratch); SimpleTextUtil.writeNewline(output); } } Map attributes = si.getAttributes(); SimpleTextUtil.write(output, SI_NUM_ATT); SimpleTextUtil.write(output, Integer.toString(attributes.size()), scratch); SimpleTextUtil.writeNewline(output); for (Map.Entry attEntry : attributes.entrySet()) { SimpleTextUtil.write(output, SI_ATT_KEY); SimpleTextUtil.write(output, attEntry.getKey(), scratch); SimpleTextUtil.writeNewline(output); SimpleTextUtil.write(output, SI_ATT_VALUE); SimpleTextUtil.write(output, attEntry.getValue(), scratch); SimpleTextUtil.writeNewline(output); } Set files = si.files(); int numFiles = files == null ? 0 : files.size(); SimpleTextUtil.write(output, SI_NUM_FILES); SimpleTextUtil.write(output, Integer.toString(numFiles), scratch); SimpleTextUtil.writeNewline(output); if (numFiles > 0) { for (String fileName : files) { SimpleTextUtil.write(output, SI_FILE); SimpleTextUtil.write(output, fileName, scratch); SimpleTextUtil.writeNewline(output); } } SimpleTextUtil.write(output, SI_ID); SimpleTextUtil.write(output, new BytesRef(si.getId()).toString(), scratch); SimpleTextUtil.writeNewline(output); Sort indexSort = si.getIndexSort(); SimpleTextUtil.write(output, SI_SORT); final int numSortFields = indexSort == null ? 0 : indexSort.getSort().length; SimpleTextUtil.write(output, Integer.toString(numSortFields), scratch); SimpleTextUtil.writeNewline(output); for (int i = 0; i < numSortFields; ++i) { final SortField sortField = indexSort.getSort()[i]; IndexSorter sorter = sortField.getIndexSorter(); if (sorter == null) { throw new IllegalStateException("Cannot serialize sort " + sortField); } SimpleTextUtil.write(output, SI_SORT_NAME); SimpleTextUtil.write(output, sorter.getProviderName(), scratch); SimpleTextUtil.writeNewline(output); SimpleTextUtil.write(output, SI_SORT_TYPE); SimpleTextUtil.write(output, sortField.toString(), scratch); SimpleTextUtil.writeNewline(output); SimpleTextUtil.write(output, SI_SORT_BYTES); BytesRefOutput b = new BytesRefOutput(); SortFieldProvider.write(sortField, b); SimpleTextUtil.write(output, b.bytes.get().toString(), scratch); SimpleTextUtil.writeNewline(output); } SimpleTextUtil.writeChecksum(output, scratch); } } static class BytesRefOutput extends DataOutput { final BytesRefBuilder bytes = new BytesRefBuilder(); @Override public void writeByte(byte b) { bytes.append(b); } @Override public void writeBytes(byte[] b, int offset, int length) { bytes.append(b, offset, length); } } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy