All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.actelion.research.chem.io.pdb.parser.PDBFileParser Maven / Gradle / Ivy

There is a newer version: 2024.11.2
Show newest version
/*
 * Copyright (c) 1997 - 2016
 * Actelion Pharmaceuticals Ltd.
 * Gewerbestrasse 16
 * CH-4123 Allschwil, Switzerland
 *
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 * 1. Redistributions of source code must retain the above copyright notice, this
 *    list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright notice,
 *    this list of conditions and the following disclaimer in the documentation
 *    and/or other materials provided with the distribution.
 * 3. Neither the name of the the copyright holder nor the
 *    names of its contributors may be used to endorse or promote products
 *    derived from this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 *
 * @author Modest v. Korff
 */

package com.actelion.research.chem.io.pdb.parser;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.text.DateFormat;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.AbstractMap;
import java.util.AbstractMap.SimpleEntry;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.stream.IntStream;

/**
 * PDBFileParser
 * Created by korffmo1 on 20.03.18.
 */
public class PDBFileParser {

    // 05-FEB-18
    public static final String DATE_FORMAT = "dd-MMM-yy";

    public static final String TAG_HEADER = "HEADER";

    public static final String TAG_OBSOLTE = "OBSLTE";

    public static final String TAG_TITLE = "TITLE";

    public static final String TAG_SPLIT = "SPLIT";

    public static final String TAG_CAVEAT = "CAVEAT";

    // Mandatory
    public static final String TAG_COMPND = "COMPND";
    // Mandatory
    public static final String TAG_SOURCE = "SOURCE";
    // Mandatory
    public static final String TAG_KEYWDS = "KEYWDS";

    // Mandatory
    public static final String TAG_EXPDTA = "EXPDTA";

    // Optional Mandatory for NMR ensemble entries.
    public static final String TAG_NUMMDL = "NUMMDL";

    // Optional Mandatory for NMR minimized average Structures or when the entire polymer chain contains C alpha or P atoms only.
    public static final String TAG_MDLTYP = "MDLTYP";

    // Mandatory
    public static final String TAG_AUTHOR = "AUTHOR";
    // Mandatory
    public static final String TAG_REVDAT = "REVDAT";
    // Optional Mandatory for a replacement entry.
    public static final String TAG_SPRSDE = "SPRSDE";
    // Optional Mandatory for a publication describes the experiment.
    public static final String TAG_JRNL = "JRNL";

    // Remark has format 'other'
    public static final String TAG_REMARK = "REMARK";

    // Optional Mandatory for a re-refined structure
    public static final String TAG_REMARK0 = "REMARK 0";

    // Optional
    public static final String TAG_REMARK1 = "REMARK 1";
    // Mandatory
    public static final String TAG_REMARK2 = "REMARK 2";
    // Mandatory
    public static final String TAG_REMARK3 = "REMARK 3";
    // Optional
    public static final String TAG_REMARK_N = "REMARK N";

    // Optional Mandatory for all polymers.
    public static final String TAG_DBREF = "DBREF";
    // Optional Mandatory when certain sequence database accession and/or sequence numbering does not fit preceding DBREF format.
    public static final String TAG_DBREF1_DBREF2 = "DBREF1/DBREF2";
    // Optional Mandatory if sequence conflict exists.
    public static final String TAG_SEQADV = "SEQADV";
    // Mandatory Mandatory if ATOM records exist.
    public static final String TAG_SEQRES = "SEQRES";
    // Optional Mandatory if modified group exists in the coordinates.
    public static final String TAG_MODRES = "MODRES";
    // Optional Mandatory if a non-standard group other than water appears in the coordinates.
    public static final String TAG_HET = "HET";
    // Optional Mandatory if
    public static final String TAG_HETNAM = "HETNAM";
    // Optional
    public static final String TAG_HETSYN = "HETSYN";
    // Optional
    public static final String TAG_FORMUL = "FORMUL";
    // Optional
    public static final String TAG_HELIX = "HELIX";

    // Optional
    public static final String TAG_SHEET = "SHEET";
    // Optional Mandatory if a disulfide bond is present.
    public static final String TAG_SSBOND = "SSBOND";
    // Optional Mandatory if non-standard residues appear in a polymer
    public static final String TAG_LINK = "LINK";

    // Optional
    public static final String TAG_CISPEP = "CISPEP";
    // Optional
    public static final String TAG_SITE = "SITE";
    // Mandatory
    public static final String TAG_CRYST1 = "CRYST1";
    // Mandatory
    public static final String TAG_ORIGX1 = "ORIGX1";
    // Mandatory
    public static final String TAG_ORIGX2 = "ORIGX2";
    // Mandatory
    public static final String TAG_ORIGX3 = "ORIGX3";
    // Mandatory
    public static final String TAG_SCALE1 = "SCALE1";
    // Mandatory
    public static final String TAG_SCALE2 = "SCALE2";
    // Mandatory
    public static final String TAG_SCALE3 = "SCALE3";
    //
    public static final String TAG_MTRIX1 = "MTRIX1";

    public static final String TAG_MTRIX2 = "MTRIX2";
    // Optional Mandatory if the complete asymmetric unit must be generated from the given coordinates using non-crystallographic symmetry.
    public static final String TAG_MTRIX3 = "MTRIX3";

    // Optional Mandatory if more than one model is present in the entry.
    public static final String TAG_MODEL = "MODEL";

    // Optional Mandatory if standard residues exist.
    public static final String TAG_ATOM = "ATOM";
    // Optional
    public static final String TAG_ANISOU = "ANISOU";
    // Optional Mandatory if ATOM records exist.
    public static final String TAG_TER = "TER";
    // Optional Mandatory if non-standard group exists.
    public static final String TAG_HETATM = "HETATM";
    // Optional Mandatory if MODEL appears.
    public static final String TAG_ENDMDL = "ENDMDL";
    // Optional Mandatory if non-standard group appears and if LINK or SSBOND records exist.
    public static final String TAG_CONECT = "CONECT";

    // Mandatory
    public static final String TAG_MASTER = "MASTER";
    // Mandatory
    public static final String TAG_END = "END";



    private DateFormat dfDateDeposition;

    private RemarkParser remarkParser;

    private HetNameParser hetNameParser;

    private HetSynonymParser hetSynonymParser;
    private FormulaParser formulaParser;

    private SiteParser siteParser;

    private ModelParser modelParser;

    public PDBFileParser() {

        dfDateDeposition = new SimpleDateFormat(DATE_FORMAT);

        remarkParser = new RemarkParser();

        hetNameParser = new HetNameParser();

        hetSynonymParser = new HetSynonymParser();

        formulaParser = new FormulaParser();

        siteParser = new SiteParser();

        modelParser = new ModelParser();

    }

    public PDBCoordEntryFile parse(File fiPDB) throws IOException, ParseException {
        return parse(new BufferedReader(new FileReader(fiPDB)));
    }

    public PDBCoordEntryFile parse(BufferedReader br) throws IOException, ParseException {
        PDBCoordEntryFile pdbCoordEntryFile = new PDBCoordEntryFile();
        
		ArrayList liRaw = new ArrayList();

		String sCurrentLine;
		while ((sCurrentLine = br.readLine()) != null) {
			liRaw.add(sCurrentLine);
		}


        int indexLine = 0;
        
        String lHeader = liRaw.get(indexLine);

        if(lHeader.startsWith(TAG_HEADER)) {
        	try {
        		indexLine = parseHeader(lHeader, pdbCoordEntryFile);}
        	catch(Exception e) {
        		indexLine++;
        	}
    	}
        String data = liRaw.get(indexLine);
	    while((!data.startsWith(TAG_ATOM) && !data.startsWith(TAG_HETATM)) && indexLine siIndex = parseOneTimeMultipleLines(liRaw, indexLine, TAG_OBSOLTE);
	            pdbCoordEntryFile.setObsolete(siIndex.getKey());
	            indexLine = siIndex.getValue();
	        }
	
	        if(liRaw.get(indexLine).startsWith(TAG_TITLE)) {
	        	SimpleEntry siIndex = parseOneTimeMultipleLines(liRaw, indexLine, TAG_TITLE);
	            pdbCoordEntryFile.setTitle(siIndex.getKey());
	            indexLine = siIndex.getValue();
	        }
	        //} else {
	        //    throw new RuntimeException("Missing " + TAG_TITLE);
	        //}
	
	        // Not mandatory
	        if(liRaw.get(indexLine).startsWith(TAG_SPLIT)) {
	        	SimpleEntry siIndex = parseOneTimeMultipleLines(liRaw, indexLine, TAG_SPLIT);
	            pdbCoordEntryFile.setSplit(siIndex.getKey());
	            indexLine = siIndex.getValue();
	        }
	
	        // Not mandatory
	        if(liRaw.get(indexLine).startsWith(TAG_CAVEAT)) {
	        	SimpleEntry siIndex = parseOneTimeMultipleLines(liRaw, indexLine, TAG_CAVEAT);
	            pdbCoordEntryFile.setSplit(siIndex.getKey());
	            indexLine = siIndex.getValue();
	        }
	
	        // Mandatory
	        if(liRaw.get(indexLine).startsWith(TAG_COMPND)) {
	        	SimpleEntry siIndex = parseOneTimeMultipleLines(liRaw, indexLine, TAG_COMPND);
	            pdbCoordEntryFile.setCompound(siIndex.getKey());
	            indexLine = siIndex.getValue();
	        }
	        //} else {
	        //    throw new RuntimeException("Missing " + TAG_COMPND);
	        //}
	
	        // Mandatory
	        if(liRaw.get(indexLine).startsWith(TAG_SOURCE)) {
	        	SimpleEntry siIndex = parseOneTimeMultipleLines(liRaw, indexLine, TAG_SOURCE);
	            pdbCoordEntryFile.setSource(siIndex.getKey());
	            indexLine = siIndex.getValue();
	        }
	        //} else {
	        //   throw new RuntimeException("Missing " + TAG_SOURCE);
	        
	
	        // Mandatory
	        if(liRaw.get(indexLine).startsWith(TAG_KEYWDS)) {
	        	SimpleEntry siIndex = parseOneTimeMultipleLines(liRaw, indexLine, TAG_KEYWDS);
	            pdbCoordEntryFile.setKeywords(siIndex.getKey());
	            indexLine = siIndex.getValue();
	        } //else {
	            //throw new RuntimeException("Missing " + TAG_KEYWDS);
	        //}
	
	        // Mandatory
	        if(liRaw.get(indexLine).startsWith(TAG_EXPDTA)) {
	        	SimpleEntry siIndex = parseOneTimeMultipleLines(liRaw, indexLine, TAG_EXPDTA);
	            pdbCoordEntryFile.setExpdata(siIndex.getKey());
	            indexLine = siIndex.getValue();
	        } //else {
	        //    throw new RuntimeException("Missing " + TAG_EXPDTA);
	        //}
	
	        // Optional Mandatory for NMR ensemble entries.
	        if(liRaw.get(indexLine).startsWith(TAG_NUMMDL)) {
	        	SimpleEntry siIndex = parseOneTimeMultipleLines(liRaw, indexLine, TAG_NUMMDL);
	            pdbCoordEntryFile.setNummdl(siIndex.getKey());
	            indexLine = siIndex.getValue();
	        }
	
	        // Optional Mandatory for NMR minimized average Structures or when the entire polymer chain contains C alpha or P atoms only.
	        if(liRaw.get(indexLine).startsWith(TAG_MDLTYP)) {
	        	SimpleEntry siIndex = parseOneTimeMultipleLines(liRaw, indexLine, TAG_MDLTYP);
	            pdbCoordEntryFile.setMdltyp(siIndex.getKey());
	            indexLine = siIndex.getValue();
	        }
	
	        // Mandatory
	        if(liRaw.get(indexLine).startsWith(TAG_AUTHOR)) {
	        	SimpleEntry siIndex = parseOneTimeMultipleLines(liRaw, indexLine, TAG_AUTHOR);
	            pdbCoordEntryFile.setAuthor(siIndex.getKey());
	            indexLine = siIndex.getValue();
	        }
	
	        // Mandatory
	        if(liRaw.get(indexLine).startsWith(TAG_REVDAT)) {
	            ListInteger liIndex = parseMultipleTimesOneLine(liRaw, indexLine, TAG_REVDAT);
	            pdbCoordEntryFile.setRevdat(liIndex.getLi());
	            indexLine = liIndex.getId();
	        } //else {
	           // throw new RuntimeException("Missing " + TAG_REVDAT);
	        //}
	
	        // Optional Mandatory for a replacement entry.
	        if(liRaw.get(indexLine).startsWith(TAG_SPRSDE)) {
	        	SimpleEntry siIndex = parseOneTimeMultipleLines(liRaw, indexLine, TAG_SPRSDE);
	            pdbCoordEntryFile.setSprsde(siIndex.getKey());
	            indexLine = siIndex.getValue();
	        }
	
	        // Optional Mandatory for a publication describes the experiment.
	        if(liRaw.get(indexLine).startsWith(TAG_JRNL)) {
	            ListInteger liIndex = parseMultipleTimesOneLine(liRaw, indexLine, TAG_JRNL);
	            pdbCoordEntryFile.setJrnl(liIndex.getLi());
	            indexLine = liIndex.getId();
	        }
	        /*
	        remarkParser.parse(liRaw, indexLine);
	
	        HashMap hmNo_Remark = remarkParser.getHmNo_Remark();
	
	        indexLine = remarkParser.getIndexLine();
	
	        pdbCoordEntryFile.setRemarks(hmNo_Remark);
	        */
	
	        // Mandatory for all polymers.
	        if(liRaw.get(indexLine).startsWith(TAG_DBREF)) {
	            ListInteger liIndex = parseMultipleTimesOneLine(liRaw, indexLine, TAG_DBREF);
	            pdbCoordEntryFile.setDBRef(liIndex.getLi());
	            indexLine = liIndex.getId();
	        }
	
	        // Mandatory for all polymers.
	        if(liRaw.get(indexLine).startsWith(TAG_DBREF1_DBREF2)) {
	            ListInteger liIndex = parseMultipleTimesOneLine(liRaw, indexLine, TAG_DBREF1_DBREF2);
	            pdbCoordEntryFile.setDBRef1DBRef2(liIndex.getLi());
	            indexLine = liIndex.getId();
	        }
	
	        // Mandatory if sequence conflict exists.
	        if(liRaw.get(indexLine).startsWith(TAG_SEQADV)) {
	            ListInteger liIndex = parseMultipleTimesOneLine(liRaw, indexLine, TAG_SEQADV);
	            pdbCoordEntryFile.setSEQADV(liIndex.getLi());
	            indexLine = liIndex.getId();
	        }
	
	        // Mandatory if ATOM records exist.
	        // Primary sequence of backbone residues.
	        if(liRaw.get(indexLine).startsWith(TAG_SEQRES)) {
	            ListInteger liIndexChains = parseMultipleTimesMultipleLinesSEQRES(liRaw, indexLine, TAG_SEQRES);
	            pdbCoordEntryFile.setSEQRES(liIndexChains.getLi());
	            indexLine = liIndexChains.getId();
	        }
	
	        // Mandatory if modified group exists in the coordinates.
	        // Identification of modifications to standard residues.
	        if(liRaw.get(indexLine).startsWith(TAG_MODRES)) {
	            ListInteger liIndex = parseMultipleTimesOneLine(liRaw, indexLine, TAG_MODRES);
	            pdbCoordEntryFile.setModRes(liIndex.getLi());
	            indexLine = liIndex.getId();
	        }
	
	        // Mandatory if a non-standard group other than water appears in the coordinates.
	        // Identification of non-standard groups heterogens).
	
	        String patternHET = TAG_HET + " ";
	
	        if(liRaw.get(indexLine).startsWith(patternHET)) {
	            ListInteger liIndex = parseMultipleTimesOneLine(liRaw, indexLine, patternHET);
	            pdbCoordEntryFile.setHet(liIndex.getLi());
	            indexLine = liIndex.getId();
	        }
	
	        //
	        // Compound name of the heterogens.
	        if(liRaw.get(indexLine).startsWith(TAG_HETNAM)) {
	            hetNameParser.parse(liRaw, indexLine);
	            HashMap hmId_Name = hetNameParser.getHMId_Name();
	            pdbCoordEntryFile.setHmId_Name(hmId_Name);
	            indexLine = hetNameParser.getIndexLine();
	        }
	
	        if(liRaw.get(indexLine).startsWith(TAG_HETSYN)) {
	            hetSynonymParser.parse(liRaw, indexLine);
	            HashMap hmId_Synonyms = hetSynonymParser.getHMId_Synonyms();
	            pdbCoordEntryFile.setHmId_Synonyms(hmId_Synonyms);
	            indexLine = hetSynonymParser.getIndexLine();
	        }
	
	        if(liRaw.get(indexLine).startsWith(TAG_FORMUL)) {
	            formulaParser.parse(liRaw, indexLine);
	            HashMap hmId_Formula = formulaParser.getHMId_Formula();
	            pdbCoordEntryFile.setHmId_Formula(hmId_Formula);
	            indexLine = formulaParser.getIndexLine();
	        }
	
	        if(liRaw.get(indexLine).startsWith(TAG_HELIX)) {
	            ListInteger liIndex = parseMultipleTimesOneLine(liRaw, indexLine, TAG_HELIX);
	            pdbCoordEntryFile.setHelix(liIndex.getLi());
	            indexLine = liIndex.getId();
	        }
	
	        if(liRaw.get(indexLine).startsWith(TAG_SHEET)) {
	            ListInteger liIndex = parseMultipleTimesOneLine(liRaw, indexLine, TAG_SHEET);
	            pdbCoordEntryFile.setSheet(liIndex.getLi());
	            indexLine = liIndex.getId();
	        }
	
	        if(liRaw.get(indexLine).startsWith(TAG_SSBOND)) {
	            ListInteger liIndex = parseMultipleTimesOneLine(liRaw, indexLine, TAG_SSBOND);
	            pdbCoordEntryFile.setSSBond(liIndex.getLi());
	            indexLine = liIndex.getId();
	        }
	
	        if(liRaw.get(indexLine).startsWith(TAG_LINK)) {
	            ListInteger liIndex = parseMultipleTimesOneLine(liRaw, indexLine, TAG_LINK);
	            pdbCoordEntryFile.setLink(liIndex.getLi());
	            indexLine = liIndex.getId();
	        }
	
	        if(liRaw.get(indexLine).startsWith(TAG_CISPEP)) {
	            ListInteger liIndex = parseMultipleTimesOneLine(liRaw, indexLine, TAG_CISPEP);
	            pdbCoordEntryFile.setCisPep(liIndex.getLi());
	            indexLine = liIndex.getId();
	        }
	
	        if(liRaw.get(indexLine).startsWith(TAG_SITE)) {
	            siteParser.parse(liRaw, indexLine);
	            HashMap hmId_Site = siteParser.getHMId_Site();
	            pdbCoordEntryFile.setHmId_Site(hmId_Site);
	            indexLine = siteParser.getIndexLine();
	        }
	
	        if(liRaw.get(indexLine).startsWith(TAG_CRYST1)) {
	            pdbCoordEntryFile.setCryst1(liRaw.get(indexLine).substring(6));
	            indexLine++;
	        } //else {
	        //    throw new RuntimeException("Missing " + TAG_CRYST1);
	        //}
	
	        if(liRaw.get(indexLine).startsWith(TAG_ORIGX1)) {
	            pdbCoordEntryFile.setOrigX1(liRaw.get(indexLine).substring(10));
	            indexLine++;
	        } //else {
	        //   throw new RuntimeException("Missing " + TAG_ORIGX1);
	        //}
	
	        if(liRaw.get(indexLine).startsWith(TAG_ORIGX2)) {
	            pdbCoordEntryFile.setOrigX2(liRaw.get(indexLine).substring(10));
	            indexLine++;
	        } //else {
	           // throw new RuntimeException("Missing " + TAG_ORIGX2);
	        //}
	
	        if(liRaw.get(indexLine).startsWith(TAG_ORIGX3)) {
	            pdbCoordEntryFile.setOrigX3(liRaw.get(indexLine).substring(10));
	            indexLine++;
	        } //else {
	        //    throw new RuntimeException("Missing " + TAG_ORIGX3);
	        //}
	
	        if(liRaw.get(indexLine).startsWith(TAG_SCALE1)) {
	            pdbCoordEntryFile.setScale1(liRaw.get(indexLine).substring(10));
	            indexLine++;
	        } //else {
	           // throw new RuntimeException("Missing " + TAG_SCALE1);
	        //}
	
	        if(liRaw.get(indexLine).startsWith(TAG_SCALE2)) {
	            pdbCoordEntryFile.setScale2(liRaw.get(indexLine).substring(10));
	            indexLine++;
	        } //else {
	        //    throw new RuntimeException("Missing " + TAG_SCALE2);
	        //}
	
	        if(liRaw.get(indexLine).startsWith(TAG_SCALE3)) {
	            pdbCoordEntryFile.setScale3(liRaw.get(indexLine).substring(10));
	            indexLine++;
	        } //else {
	        //    throw new RuntimeException("Missing " + TAG_SCALE3);
	        //}
	
	        if(liRaw.get(indexLine).startsWith(TAG_MTRIX1)) {
	            ListInteger liIndex = parseMultipleTimesOneLine(liRaw, indexLine, TAG_MTRIX1);
	            pdbCoordEntryFile.setMtrix1(liIndex.getLi());
	            indexLine = liIndex.getId();
	        }
	
	        if(liRaw.get(indexLine).startsWith(TAG_MTRIX2)) {
	            ListInteger liIndex = parseMultipleTimesOneLine(liRaw, indexLine, TAG_MTRIX2);
	            pdbCoordEntryFile.setMtrix2(liIndex.getLi());
	            indexLine = liIndex.getId();
	        }
	
	        if(liRaw.get(indexLine).startsWith(TAG_MTRIX3)) {
	            ListInteger liIndex = parseMultipleTimesOneLine(liRaw, indexLine, TAG_MTRIX3);
	            pdbCoordEntryFile.setMtrix3(liIndex.getLi());
	            indexLine = liIndex.getId();
	        }
	        else {
	        	indexLine++;
	        }
	        
	        data = liRaw.get(indexLine);
	        
	        
	        //
	        // Parsing atoms and their coordinates
	        //
	    }
	    
	    //indexLine--;
        List hetAtomRecords = new ArrayList();
        List protAtomRecords = new ArrayList();
        modelParser.parse(liRaw, indexLine,protAtomRecords,hetAtomRecords);

        pdbCoordEntryFile.setProtAtomRecords(protAtomRecords);
        pdbCoordEntryFile.setHetAtomRecords(hetAtomRecords);
        //List liModelModel = modelParser.getLiModelModel();
        //pdbCoordEntryFile.setLiModelModel(liModelModel);

        indexLine = modelParser.getIndexLine();
        //
        // Parsing atom connections
        //
        

        
        List bonds = new ArrayList();
        
        if(liRaw.get(indexLine).startsWith(TAG_CONECT)) {
            ListInteger liIndex = parseMultipleTimesOneLine(liRaw, indexLine, TAG_CONECT);
            for(String bondInfo:liIndex.getLi()) {
            	bondInfo = bondInfo.trim();
            	String[] strArr = bondInfo.split("\\s+");
            	try {
            	int firstAtom = Integer.parseInt(strArr[0]);
            	IntStream.range(1,strArr.length).forEach(e -> {
            		int[] bond = new int[2];
            		bond[0] = firstAtom;
            		bond[1] = Integer.parseInt(strArr[e]);
            		bonds.add(bond);
            	});
            	}
            	catch(Exception e) {
            		continue;
            	}

            }
            indexLine = liIndex.getId();
        }
        pdbCoordEntryFile.setLiConnect(bonds);

        if(liRaw.get(indexLine).startsWith(TAG_MASTER)) {
            pdbCoordEntryFile.setMaster(liRaw.get(indexLine).substring(10).trim());
            indexLine++;
        } 
        if(liRaw.get(indexLine).startsWith(TAG_END)) {
            pdbCoordEntryFile.setEnd(true);
        } else {
            pdbCoordEntryFile.setEnd(false);
        }
        
        return pdbCoordEntryFile;

    }

    private int parseHeader(String lHeader, PDBCoordEntryFile pdbCoordEntryFile) throws ParseException {

         int length = lHeader.length();

         pdbCoordEntryFile.setClassification(lHeader.substring (10, Math.min(length,50)).trim() );

         Date date = dfDateDeposition.parse(lHeader.substring (50, Math.min(length,59)).trim() );

         pdbCoordEntryFile.setDateDeposition(date);

         pdbCoordEntryFile.setID(lHeader.substring (62, Math.min(length,66)).trim() );

         return 1;

    }

    /**
     * One time, multiple lines: There are records that conceptually exist only once in an entry, but the
     information content may exceed the number of columns available. These records are therefore
     continued on subsequent lines.
     * @param liRaw
     * @param indexLine
     * @return
     * @throws ParseException
     */

    private AbstractMap.SimpleEntry parseOneTimeMultipleLines(List liRaw, int indexLine, String tag) throws ParseException {

        String l0 = liRaw.get(indexLine);
        if(!l0.startsWith(tag)) {
            throw new RuntimeException("Error in parsing " + tag);
        }
        String titleSub0 = l0.substring(tag.length()).trim();

        StringBuilder sb = new StringBuilder(titleSub0);

        indexLine++;

        int start = indexLine;

        for (int i = start; i < liRaw.size(); i++) {

            String l = liRaw.get(i);
            if(l.startsWith(tag)) {

                String [] arr = l.split("[ ]+");
                sb.append(" ");
                for (int j = 1; j < arr.length; j++) {
                    sb.append(arr[j]);

                    if(j < arr.length-1) {
                        sb.append(" ");
                    }
                }

                indexLine++;
            } else {
                break;
            }

        }

        AbstractMap.SimpleEntry siTextIndex = new AbstractMap.SimpleEntry<>(sb.toString(), indexLine);

        return siTextIndex;

    }

    private ListInteger parseMultipleTimesOneLine(List liRaw, int indexLine, String tag) throws ParseException {

        int skipFirst = 1;

        String l0 = liRaw.get(indexLine);
        if(!l0.startsWith(tag)) {
            throw new RuntimeException("Error in parsing " + tag);
        }
        String titleSub0 = l0.substring(tag.length()).trim();

        List liTxt = new ArrayList<>();

        liTxt.add(titleSub0);

        indexLine++;

        int start = indexLine;

        for (int i = start; i < liRaw.size(); i++) {

            String l = liRaw.get(i);
            if(l.startsWith(tag)) {

                StringBuilder sb = new StringBuilder();

                String [] arr = l.split("[ ]+");
                sb.append(" ");
                for (int j = skipFirst; j < arr.length; j++) {
                    sb.append(arr[j]);

                    if(j < arr.length-1) {
                        sb.append(" ");
                    }
                }

                liTxt.add(sb.toString());

                indexLine++;
            } else {
                break;
            }

        }

        ListInteger liTextIndex = new ListInteger(liTxt, indexLine);

        return liTextIndex;

    }

    private static ListInteger parseMultipleTimesMultipleLinesSEQRES(List liRaw, int indexLine, String tag) throws ParseException {

        String l0 = liRaw.get(indexLine);
        if(!l0.startsWith(tag)) {
            throw new RuntimeException("Error in parsing " + tag);
        }

        StringBuilder sb = new StringBuilder();

        int start = indexLine;

        String chainId = l0.substring(11, 12);

        int numResidues = Integer.parseInt(l0.substring(13,17).trim());

        List liChain = new ArrayList<>();

        for (int i = start; i < liRaw.size(); i++) {

            String l = liRaw.get(i);

            if(!l.startsWith(tag)) {
                break;
            }

            String chainIdLine = l.substring(11, 12);

            int numResiduesLine = Integer.parseInt(l.substring(13,17).trim());


            if(!chainId.equals(chainIdLine)) {

                String chain = sb.toString();

                liChain.add(chain);

                chainId = chainIdLine;

                numResidues = numResiduesLine;

                sb = new StringBuilder();
            }

            if(sb.length()>0){
                sb.append(" ");
            }
            /*
            if(numResidues!=numResiduesLine) {
                throw new RuntimeException("Number of residues differs!");
            }
			*/
            String chainLine = l.substring(19).trim();
            sb.append(chainLine);

            indexLine++;
        }

        if(sb.length()>0){
            String chain = sb.toString();

            liChain.add(chain);
        }

        ListInteger listIndexLineChain = new ListInteger(liChain, indexLine);

        return listIndexLineChain;

    }


}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy