All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.rcsb.cif.text.TextCifReader Maven / Gradle / Ivy

package org.rcsb.cif.text;

import org.rcsb.cif.CifOptions;
import org.rcsb.cif.ParsingException;
import org.rcsb.cif.model.text.TextBlock;
import org.rcsb.cif.model.text.TextFile;
import org.rcsb.cif.model.Block;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.List;
import java.util.stream.Collectors;

public class TextCifReader {
    public TextCifReader(CifOptions options) {
    }

    public TextFile read(InputStream inputStream) throws ParsingException, IOException {
        String content = new BufferedReader(new InputStreamReader(inputStream))
                .lines()
                .collect(Collectors.joining("\n"));
        inputStream.close();
        return readText(content);
    }

    public TextFile readText(String data) throws ParsingException {
        if (data.isEmpty()) {
            throw new ParsingException("Cannot parse empty file.");
        }

        final List dataBlocks = new ArrayList<>();
        final TokenizerState tokenizer = new TokenizerState(data);
        String blockHeader = "";

        FrameContext blockCtx = new FrameContext();

        // the next three initial values are never used in valid files
        List saveFrames = new ArrayList<>();
        FrameContext saveCtx = new FrameContext();
        Block saveFrame = new TextBlock(saveCtx.getCategories(), "");

        tokenizer.moveNext();
        while (tokenizer.getTokenType() != CifTokenType.END) {
            CifTokenType token = tokenizer.getTokenType();

            // data block
            if (token == CifTokenType.DATA) {
                if (tokenizer.inSaveFrame) {
                    throw new ParsingException("Unexpected data block inside a save frame.", tokenizer.getLineNumber());
                }
                if (!blockCtx.getCategories().isEmpty()) {
                    Block block = new TextBlock(blockCtx.getCategories(), blockHeader);
                    dataBlocks.add(block);
                    block.getSaveFrames().addAll(saveFrames);
                }
                blockHeader = tokenizer.getData().substring(tokenizer.getTokenStart() + 5, tokenizer.getTokenEnd());
                blockCtx = new FrameContext();
                saveFrames.clear();
                tokenizer.moveNext();
                // save frame
            } else if (tokenizer.getTokenType() == CifTokenType.SAVE) {
                final String saveHeader = tokenizer.getData().substring(tokenizer.getTokenStart() + 5, tokenizer.getTokenEnd());
                if (saveHeader.isEmpty()) {
                    if (!saveCtx.getCategories().isEmpty()) {
                        saveFrames.add(saveFrame);
                    }
                    tokenizer.inSaveFrame = false;
                } else {
                    if (tokenizer.inSaveFrame) {
                        throw new ParsingException("Save frames cannot be nested.", tokenizer.getLineNumber());
                    }
                    tokenizer.inSaveFrame = true;
                    final String safeHeader = tokenizer.getData().substring(tokenizer.getTokenStart() + 5, tokenizer.getTokenEnd());
                    saveCtx = new FrameContext();
                    saveFrame = new TextBlock(saveCtx.getCategories(), safeHeader);
                }
                tokenizer.moveNext();
                // loop
            } else if (token == CifTokenType.LOOP) {
                tokenizer.handleLoop(tokenizer.inSaveFrame ? saveCtx : blockCtx);
                // single row
            } else if (token == CifTokenType.COLUMN_NAME) {
                tokenizer.handleSingle(tokenizer.inSaveFrame ? saveCtx : blockCtx);
                // out of options
            } else {
                throw new ParsingException("Unexpected token (" + token + "). Expected data_, loop_, or data name.", tokenizer.getLineNumber());
            }
        }

        // check if the latest save frame was terminated
        if (tokenizer.inSaveFrame) {
            throw new ParsingException("Unfinished save frame (" + saveFrame.getBlockHeader() + ").", tokenizer.getLineNumber());
        }

        if (!blockCtx.getCategories().isEmpty() || !saveFrames.isEmpty()) {
            Block block = new TextBlock(blockCtx.getCategories(), blockHeader);
            dataBlocks.add(block);
            block.getSaveFrames().addAll(saveFrames);
        }

        return new TextFile(dataBlocks);
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy