All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.archive.util.ms.PieceReader Maven / Gradle / Ivy

Go to download

The Archive Commons Code Libraries project contains general Java utility libraries, as used by the Heritrix crawler and other projects.

There is a newer version: 3.4.0-20220727
Show newest version
/*
 *  This file is part of the Heritrix web crawler (crawler.archive.org).
 *
 *  Licensed to the Internet Archive (IA) by one or more individual 
 *  contributors. 
 *
 *  The IA licenses this file to You under the Apache License, Version 2.0
 *  (the "License"); you may not use this file except in compliance with
 *  the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing, software
 *  distributed under the License is distributed on an "AS IS" BASIS,
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *  See the License for the specific language governing permissions and
 *  limitations under the License.
 */
package org.archive.util.ms;

import java.io.IOException;

import org.archive.io.Endian;
import org.archive.io.SeekInputStream;
import org.archive.io.SeekReader;


class PieceReader extends SeekReader {


    private PieceTable table;
    private SeekInputStream doc;
    
    private boolean unicode;
    private int charPos;
    private int limit;


    public PieceReader(PieceTable table, SeekInputStream doc)
    throws IOException {
        this.table = table;
        this.doc = doc;
        charPos = 0;
        limit = -1;
    }


    private void seekIfNecessary() throws IOException {
        if (doc == null) {
            throw new IOException("Stream closed.");
        }
        if (charPos >= table.getMaxCharPos()) {
            return;
        }
        if (charPos < limit) {
            return;
        }
        Piece piece = table.next();
        unicode = piece.isUnicode();
        limit = piece.getCharPosLimit();
        doc.position(piece.getFilePos());
    }


    public int read() throws IOException {
        seekIfNecessary();
        if (doc == null) {
            throw new IOException("Stream closed.");
        }
        if (charPos >= table.getMaxCharPos()) {
            return -1;
        }

        int ch;
        if (unicode) {
            ch = Endian.littleChar(doc);
        } else {
            ch = Cp1252.decode(doc.read());
        }
        charPos++;
        return ch;
    }


    public int read(char[] buf, int ofs, int len) throws IOException {
        // FIXME: Think of a faster implementation that will work with
        // both unicode and non-unicode.
        seekIfNecessary();
        if (doc == null) {
            throw new IOException("Stream closed.");
        }
        if (charPos >= table.getMaxCharPos()) {
            return 0;
        }
        for (int i = 0; i < len; i++) {
            int ch = read();
            if (ch < 0) {
                return i;
            }
            buf[ofs + i] = (char)ch;
        }
        return len;
    }
    
    
    public void close() throws IOException {
        doc.close();
        table = null;
    }
    
    
    public long position() throws IOException {
        return charPos;
    }
    
    
    public void position(long p) throws IOException {
        if (p > Integer.MAX_VALUE) {
            throw new IOException("File too large.");
        }
        int charPos = (int)p;
        Piece piece = table.pieceFor(charPos);
        if (piece == null) {
            throw new IOException("Illegal position: " + p);
        }
        unicode = piece.isUnicode();
        limit = piece.getCharPosLimit();
        
        int ofs = charPos - piece.getCharPosStart();
        this.charPos = charPos;
        doc.position(piece.getFilePos() + ofs);
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy