Please wait. This can take some minutes ...
Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance.
Project price only 1 $
You can buy this project and download/modify it how often you want.
org.jmol.adapter.readers.cif.MMCifReader Maven / Gradle / Ivy
Go to download
Jmol: an open-source Java viewer for chemical structures in 3D
/* $RCSfile$
* $Author: hansonr $
* $Date: 2006-10-20 07:48:25 -0500 (Fri, 20 Oct 2006) $
* $Revision: 5991 $
*
* Copyright (C) 2003-2005 Miguel, Jmol Development, www.jmol.org
*
* Contact: [email protected]
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*/
package org.jmol.adapter.readers.cif;
import java.util.Hashtable;
import java.util.Map;
import javajs.util.Lst;
import javajs.util.M4;
import javajs.util.P3;
import javajs.util.PT;
import javajs.util.SB;
import org.jmol.adapter.smarter.Atom;
import org.jmol.adapter.smarter.Structure;
import org.jmol.api.JmolAdapter;
import org.jmol.c.STR;
import org.jmol.java.BS;
import org.jmol.util.BSUtil;
import org.jmol.util.Logger;
import org.jmol.util.SimpleUnitCell;
/**
*
* mmCIF files are recognized prior to class creation.
* Required fields include one of:
*
* _entry.id
* _database_PDB_
* _pdbx_
* _chem_comp.pdbx_type
* _audit_author.name
* _atom_site.
*
*
* @author Bob Hanson ([email protected] )
*
*/
public class MMCifReader extends CifReader {
protected boolean isBiomolecule;
private boolean byChain, bySymop;
private Map chainAtomMap;
private Map chainAtomCounts;
protected Lst> vBiomolecules;
private Map htBiomts;
protected Map> htSites;
protected Map htHetero;
private Map> htBondMap;
private Map assemblyIdAtoms;
private int thisChain = -1;
private int modelIndex = 0;
private P3 chainSum;
private int[] chainAtomCount;
private boolean isLigandBondBug;
// Jmol-14.3.3_2014.07.27 broke mmCIF bond reading for ligands
// Jmol-14.3.9_2014.11.11 fixes this.
M4 mident;
@Override
protected void initSubclass() {
setIsPDB();
mident = M4.newM4(null);
isMMCIF = true;
if (isDSSP1)
asc.setInfo("isDSSP1",Boolean.TRUE);
if (htParams.containsKey("isMutate"))
asc.setInfo("isMutate",Boolean.TRUE);
doSetBonds = checkFilterKey("ADDBONDS");
byChain = checkFilterKey("BYCHAIN");
if (checkFilterKey("BIOMOLECULE")) // PDB format
filter = PT.rep(filter, "BIOMOLECULE", "ASSEMBLY");
isBiomolecule = checkFilterKey("ASSEMBLY");
if (isBiomolecule) {
filter = filter.replace(':', ' '); // no chain selection for biomolecules
bySymop = checkFilterKey("BYSYMOP");
}
isCourseGrained = byChain || bySymop;
if (isCourseGrained) {
chainAtomMap = new Hashtable();
chainAtomCounts = new Hashtable();
}
// When this reader was split off from CifReader, a bug was introduced
// into the Resolver that made it so that ligand files were read by
// CifReader and not MMCifReader. This caused CHEM_COMP_BOND records to be
// skipped and so in the case of pdbAddHydrogen no hydrogens added.
isLigandBondBug = (stateScriptVersionInt >= 140204 && stateScriptVersionInt <= 140208
|| stateScriptVersionInt >= 140304 && stateScriptVersionInt <= 140308);
}
@Override
protected void processSubclassEntry() throws Exception {
if (key0.startsWith(FAMILY_ASSEM_CAT)
|| key0.startsWith(FAMILY_STRUCTCONN_CAT)
|| key0.startsWith(FAMILY_SEQUENCEDIF_CAT)
|| key0.startsWith(FAMILY_STRUCTCONF_CAT)
|| key0.startsWith(FAMILY_SHEET_CAT)
// || key0.startsWith(FAMILY_PDBX_NONPOLY_CAT)
)
processSubclassLoopBlock();
else if (key.equals("_rna3d")) {
addedData = data;
addedDataKey = key;
} else if (key.equals("_dssr")) {
dssr = vwr.parseJSON(reader.readLine());
reader.readLine(); // sometimes there is a null character here
}
}
@Override
protected boolean processSubclassLoopBlock() throws Exception {
if (key0.startsWith(FAMILY_NCS_CAT))
return processStructOperListBlock(true);
if (key0.startsWith(FAMILY_OPER_CAT))
return processStructOperListBlock(false);
if (key0.startsWith(FAMILY_ASSEM_CAT))
return processAssemblyGenBlock();
if (key0.startsWith(FAMILY_SEQUENCEDIF_CAT))
return processSequence();
if (isCourseGrained)
return false;
if (key0.startsWith(FAMILY_STRUCSITE_CAT))
return processStructSiteBlock();
if (key0.startsWith(FAMILY_CHEMCOMP_CAT))
return processChemCompLoopBlock();
// if (key0.startsWith(FAMILY_PDBX_NONPOLY_CAT))
// return processNonpolyLoopBlock();
if (key0.startsWith(FAMILY_STRUCTCONF_CAT))
return processStructConfLoopBlock();
if (key0.startsWith(FAMILY_SHEET_CAT))
return processStructSheetRangeLoopBlock();
// alas -- saved states must not read ligand bonding
// the problem was that these files were not recognized as mmCIF
// files by the resolver when this MMCifReader was created.
if (isLigandBondBug)
return false;
if (key0.startsWith(FAMILY_COMPBOND_CAT))
return processCompBondLoopBlock();
if (key0.startsWith(FAMILY_STRUCTCONN_CAT))
return processStructConnLoopBlock();
return false;
}
private boolean requiresSorting;
/**
* issue here is that mmCIF assembly atoms can be in different blocks by chain:
* Model1:Chain1 Model2:Chain1 Model1:Chain2 Model2:Chain2 ... and so assigned
* to too many atom sets.
*
*/
protected void sortAssemblyModels() {
int natoms = asc.ac;
int lastSet = -1;
Atom[] atoms = asc.atoms;
Atom[] newAtoms = new Atom[natoms];
String[] ids = PT.split("," + modelStrings + ",", ",,");
BS bsAtomsNew = (asc.bsAtoms == null ? null : BS.newN(asc.bsAtoms.size()));
for (int im = 1, n = 0; im < ids.length; im++) {
String sModel = ids[im];
int modelIndex = -1;
for (int is = 0; is < asc.atomSetCount; is++) {
int ia0 = asc.getAtomSetAtomIndex(is);
int ia1 = ia0 + asc.getAtomSetAtomCount(is);
String am = "" + modelMap.get("_" + is);
if (am.equals(sModel)) {
if (modelIndex < 0 && (modelIndex = is) > lastSet)
lastSet = is;
for (int i = ia0; i < ia1; i++) {
if (bsAtomsNew == null || asc.bsAtoms.get(i)) {
if (bsAtomsNew != null)
bsAtomsNew.set(n);
atoms[i].atomSetIndex = modelIndex;
newAtoms[n++] = atoms[i];
}
}
}
}
}
asc.atoms = newAtoms;
asc.bsAtoms = bsAtomsNew;
if (++lastSet < asc.atomSetCount)
asc.atomSetCount = lastSet;
}
@Override
protected boolean finalizeSubclass() throws Exception {
if (byChain && !isBiomolecule)
for (String id : chainAtomMap.keySet())
createParticle(id);
boolean haveBiomolecule = (isBiomolecule && vBiomolecules != null && vBiomolecules.size() > 0);
if (!isCourseGrained && asc.ac == nAtoms) {
asc.removeCurrentAtomSet();
} else {
if ((dssr != null || validation != null || addedData != null) && !isCourseGrained && !requiresSorting) {
MMCifValidationParser vs = ((MMCifValidationParser) getInterface("org.jmol.adapter.readers.cif.MMCifValidationParser"))
.set(this);
String note = null;
if (addedData == null) {
if (validation != null || dssr != null)
note = vs.finalizeValidations(vwr, modelMap);
} else if (addedDataKey.equals("_rna3d")) {
note = vs.finalizeRna3d(modelMap);
}
if (note != null)
appendLoadNote(note);
}
setHetero();
if (doSetBonds)
setBonds();
}
if (asc.ac == 0 && !isCourseGrained)
return false;
String spaceGroup = sgName;
if (htSites != null)
addSites(htSites);
if (haveBiomolecule) {
asc.setCurrentModelInfo("biomolecules", vBiomolecules);
setBiomolecules();
if (thisBiomolecule != null) {
if (iHaveFractionalCoordinates)
fractionalizeCoordinates(false);
asc.getXSymmetry().applySymmetryBio(thisBiomolecule,
applySymmetryToBonds, filter);
asc.xtalSymmetry = null;
}
doCheckUnitCell &= iHaveUnitCell && doApplySymmetry;
if (doCheckUnitCell) {
ignoreFileSpaceGroupName = true;
sgName = spaceGroup;
fractionalizeCoordinates(true);
asc.setModelInfoForSet("biosymmetry", null, asc.iSet);
asc.setModelInfoForSet("biosymmetryCount", null, asc.iSet);
asc.checkSpecial = false;
if (byChain)
return true;
}
}
if (latticeCells != null && latticeCells[0] != 0)
addJmolScript("unitcell;axes on;axes unitcell;");
if (requiresSorting)
sortAssemblyModels();
return true;
}
////////////////////////////////////////////////////////////////
// assembly data
////////////////////////////////////////////////////////////////
@Override
protected boolean checkSubclassSymmetry() {
asc.checkSpecial = false;
int modelIndex = asc.iSet;
asc.setCurrentModelInfo(
"PDB_CONECT_firstAtom_count_max",
new int[] { asc.getAtomSetAtomIndex(modelIndex),
asc.getAtomSetAtomCount(modelIndex), maxSerial });
return false;
}
/**
* Note that setting bonds from _struct_conn is only done if we have updated
* CIF files, which include _chem_comp_bond.
*/
private void setBonds() {
if (htBondMap == null)
return;
BS bsAtoms = asc.bsAtoms;
if (bsAtoms == null)
bsAtoms = BSUtil.newBitSet2(0, asc.ac);
Atom[] atoms = asc.atoms;
float seqid = -1;
String comp = null;
Map map = null;
for (int i = bsAtoms.nextSetBit(0); i >= 0; i = bsAtoms.nextSetBit(i + 1)) {
Atom a = atoms[i];
float pt = (a.vib == null ? a.sequenceNumber : a.vib.x);
if (pt != seqid) {
seqid = pt;
if (comp != null)
processBonds(htBondMap.get(comp), map, false);
map = new Hashtable();
comp = atoms[i].group3;
if (!htBondMap.containsKey(comp)) {
comp = null;
continue;
}
}
if (comp == null)
continue;
map.put(a.atomName, Integer.valueOf(a.index));
}
if (comp != null)
processBonds(htBondMap.get(comp), map, false);
if (structConnMap != null) {
map = new Hashtable();
seqid = -1;
comp = null;
for (int i = bsAtoms.nextSetBit(0); i >= 0; i = bsAtoms.nextSetBit(i + 1)) {
Atom a = atoms[i];
float pt = (a.vib == null ? a.sequenceNumber : a.vib.x);
if (pt != seqid) {
seqid = pt;
String ckey = a.chainID + a.group3 + seqid;
if (structConnList.indexOf(ckey) < 0) {
comp = null;
continue;
}
comp = ckey;
}
if (comp == null)
continue;
map.put(comp + a.atomName + a.altLoc, Integer.valueOf(a.index));
}
processBonds(structConnMap, map, true);
}
appendLoadNote(asc.bondCount + " bonds added");
}
private void processBonds(Lst cmap, Map map, boolean isStructConn) {
Integer i1, i2;
for (int i = 0, n = cmap.size(); i < n; i++) {
Object[] o = cmap.get(i);
if ((i1 = map.get(o[0])) == null || (i2 = map.get(o[1])) == null)
continue;
if (debugging)
Logger.debug((isStructConn ? "_struct_conn" : "_comp_bond") + " adding bond " + i1 + " " + i2 + " order=" + o[2]);
asc.addNewBondWithOrder(i1.intValue(), i2.intValue(), ((Integer) o[2]).intValue());
}
}
final private static byte OPER_ID = 12;
final private static byte OPER_XYZ = 13;
final private static String FAMILY_NCS_CAT = "_struct_ncs_oper.";
final private static String FAMILY_NCS = "_struct_ncs_oper";
final private static String[] ncsoperFields = {
"*_matrix[1][1]",
"*_matrix[1][2]",
"*_matrix[1][3]",
"*_vector[1]",
"*_matrix[2][1]",
"*_matrix[2][2]",
"*_matrix[2][3]",
"*_vector[2]",
"*_matrix[3][1]",
"*_matrix[3][2]",
"*_matrix[3][3]",
"*_vector[3]",
"*_id",
"*_symmetry_operation"
};
final private static String FAMILY_OPER_CAT = "_pdbx_struct_oper_list.";
final private static String FAMILY_OPER = "_pdbx_struct_oper_list";
final private static String[] operFields = {
"*_matrix[1][1]",
"*_matrix[1][2]",
"*_matrix[1][3]",
"*_vector[1]",
"*_matrix[2][1]",
"*_matrix[2][2]",
"*_matrix[2][3]",
"*_vector[2]",
"*_matrix[3][1]",
"*_matrix[3][2]",
"*_matrix[3][3]",
"*_vector[3]",
"*_id",
"*_symmetry_operation"
};
final private static byte ASSEM_ID = 0;
final private static byte ASSEM_OPERS = 1;
final private static byte ASSEM_LIST = 2;
final private static String FAMILY_ASSEM_CAT = "_pdbx_struct_assembly_gen.";
final private static String[] assemblyFields = {
"_pdbx_struct_assembly_gen_assembly_id",
"_pdbx_struct_assembly_gen_oper_expression",
"_pdbx_struct_assembly_gen_asym_id_list"
};
/*
_pdbx_struct_assembly_gen.assembly_id 1
_pdbx_struct_assembly_gen.oper_expression 1,2,3,4
_pdbx_struct_assembly_gen.asym_id_list A,B,C
#
loop_
_pdbx_struct_oper_list.id
_pdbx_struct_oper_list.type
_pdbx_struct_oper_list.name
_pdbx_struct_oper_list.symmetry_operation
_pdbx_struct_oper_list.matrix[1][1]
_pdbx_struct_oper_list.matrix[1][2]
_pdbx_struct_oper_list.matrix[1][3]
_pdbx_struct_oper_list.vector[1]
_pdbx_struct_oper_list.matrix[2][1]
_pdbx_struct_oper_list.matrix[2][2]
_pdbx_struct_oper_list.matrix[2][3]
_pdbx_struct_oper_list.vector[2]
_pdbx_struct_oper_list.matrix[3][1]
_pdbx_struct_oper_list.matrix[3][2]
_pdbx_struct_oper_list.matrix[3][3]
_pdbx_struct_oper_list.vector[3]
1 'identity operation' 1_555 x,y,z 1.0000000000 0.0000000000 0.0000000000 0.0000000000 0.0000000000
1.0000000000 0.0000000000 0.0000000000 0.0000000000 0.0000000000 1.0000000000 0.0000000000
2 'crystal symmetry operation' 15_556 y,x,-z+1 0.0000000000 1.0000000000 0.0000000000 0.0000000000 1.0000000000
0.0000000000 0.0000000000 0.0000000000 0.0000000000 0.0000000000 -1.0000000000 52.5900000000
3 'crystal symmetry operation' 10_665 -x+1,-y+1,z -1.0000000000 0.0000000000 0.0000000000 68.7500000000 0.0000000000
-1.0000000000 0.0000000000 68.7500000000 0.0000000000 0.0000000000 1.0000000000 0.0000000000
4 'crystal symmetry operation' 8_666 -y+1,-x+1,-z+1 0.0000000000 -1.0000000000 0.0000000000 68.7500000000 -1.0000000000
0.0000000000 0.0000000000 68.7500000000 0.0000000000 0.0000000000 -1.0000000000 52.5900000000
#
*/
final private static String FAMILY_SEQUENCEDIF_CAT = "_struct_ref_seq_dif.";
final private static byte STRUCT_REF_G3 = 0;
final private static byte STRUCT_REF_G1 = 1;
final private static String[] structRefFields = {
"_struct_ref_seq_dif_mon_id",
"_struct_ref_seq_dif_db_mon_id"
};
/**
* get canonical 1-letter DNA/RNA sequence code from 3-letter code. For example, "2MG" --> "G"
* @return true
* @throws Exception
*/
private boolean processSequence() throws Exception {
parseLoopParameters(structRefFields);
String g1, g3;
while (parser.getData()) {
if (isNull(g1 = getField(STRUCT_REF_G1).toLowerCase())
|| g1.length() != 1 || isNull(g3 = getField(STRUCT_REF_G3)))
continue;
if (htGroup1 == null)
asc.setInfo("htGroup1", htGroup1 = new Hashtable());
htGroup1.put(g3, g1);
}
return true;
}
private boolean processAssemblyGenBlock() throws Exception {
parseLoopParameters(assemblyFields);
while (parser.getData()) {
String[] assem = new String[3];
int count = 0;
int p;
int n = parser.getColumnCount();
for (int i = 0; i < n; ++i) {
switch (p = fieldProperty(i)) {
case ASSEM_ID:
case ASSEM_OPERS:
case ASSEM_LIST:
count++;
assem[p] = field;
break;
}
}
if (count == 3)
addAssembly(assem);
}
return true;
}
@SuppressWarnings("unchecked")
protected void addAssembly(String[] assem) throws Exception {
String id = assem[ASSEM_ID];
String list = assem[ASSEM_LIST];
String operators = assem[ASSEM_OPERS];
String name = "biomolecule " + id;
Logger.info(name + " operators " + operators
+ " ASYM_IDs " + list);
appendLoadNote("found " + name + ": " + list);
if (vBiomolecules == null)
vBiomolecules = new Lst>();
Map info = null;
for (int i = vBiomolecules.size(); --i >= 0;)
if (vBiomolecules.get(i).get("name").equals(name)) {
info = vBiomolecules.get(i);
break;
}
if (info == null) {
info = new Hashtable();
info.put("name", name);
int iMolecule = parseIntStr(id);
info.put("molecule",
iMolecule == Integer.MIN_VALUE ? id : Integer.valueOf(iMolecule));
info.put("biomts", new Lst());
info.put("chains", new Lst());
info.put("assemblies", new Lst());
info.put("operators", new Lst());
vBiomolecules.addLast(info);
}
((Lst) info.get("assemblies")).addLast("$" + list.replace(',', '$'));
((Lst) info.get("operators")).addLast(decodeAssemblyOperators(operators));
checkFilterAssembly(id, info);
}
protected void checkFilterAssembly(String id, Map info) {
if (checkFilterKey("ASSEMBLY " + id + ";") || checkFilterKey("ASSEMBLY=" + id + ";"))
thisBiomolecule = info;
}
private String decodeAssemblyOperators(String ops) {
// Identifies the operation of collection of operations
// from category PDBX_STRUCT_OPER_LIST.
//
// Operation expressions may have the forms:
//
// (1) the single operation 1
// (1,2,5) the operations 1, 2, 5
// (1-4) the operations 1,2,3 and 4
// (1,2)(3,4) the combinations of operations
// 3 and 4 followed by 1 and 2 (i.e.
// the cartesian product of parenthetical
// groups applied from right to left)
int pt = ops.indexOf(")(");
if (pt >= 0)
return crossBinary(decodeAssemblyOperators(ops.substring(0, pt + 1)),
decodeAssemblyOperators(ops.substring(pt + 1)));
if (ops.startsWith("(")) {
if (ops.indexOf("-") >= 0)
ops = BS.unescape(
"({" + ops.substring(1, ops.length() - 1).replace('-', ':').replace(',', ' ') + "})")
.toJSON();
ops = PT.rep(ops, " ", "");
ops = ops.substring(1, ops.length() - 1);
}
return ops;
}
private String crossBinary(String ops1, String ops2) {
SB sb = new SB();
String[] opsLeft = PT.split(ops1, ",");
String[] opsRight = PT.split(ops2, ",");
for (int i = 0; i < opsLeft.length; i++)
for (int j = 0; j < opsRight.length; j++)
sb.append(",").append(opsLeft[i]).append("|").append(opsRight[j]);
return sb.toString().substring(1);
}
private boolean processStructOperListBlock(boolean isNCS) throws Exception {
parseLoopParametersFor((isNCS ? FAMILY_NCS : FAMILY_OPER), isNCS ? ncsoperFields : operFields);
float[] m = new float[16];
m[15] = 1;
while (parser.getData()) {
int count = 0;
String id = null;
String xyz = null;
int n = parser.getColumnCount();
for (int i = 0; i < n; ++i) {
int p = fieldProperty(i);
switch (p) {
case NONE:
break;
case OPER_ID:
id = field;
break;
case OPER_XYZ:
xyz = field;
break;
default:
m[p] = parseFloatStr(field);
++count;
}
}
if (id != null && (count == 12 || xyz != null && symmetry != null)) {
Logger.info((isNCS ? "noncrystallographic symmetry operator " : "assembly operator ") + id + " " + xyz);
M4 m4 = new M4();
if (count != 12) {
symmetry.getMatrixFromString(xyz, m, false, 0);
m[3] *= symmetry.getUnitCellInfoType(SimpleUnitCell.INFO_A) / 12;
m[7] *= symmetry.getUnitCellInfoType(SimpleUnitCell.INFO_B) / 12;
m[11] *= symmetry.getUnitCellInfoType(SimpleUnitCell.INFO_C) / 12;
}
m4.setA(m);
addMatrix(id, m4, isNCS);
}
}
return true;
}
protected void addMatrix(String id, M4 m4, boolean isNCS) {
if (isNCS) {
if (m4.equals(mident))
return;
m4.m33 = 0; // flag for normalization
if (lstNCS == null)
lstNCS = new Lst();
lstNCS.addLast(m4);
} else {
if (htBiomts == null)
htBiomts = new Hashtable();
htBiomts.put(id, m4);
}
}
////////////////////////////////////////////////////////////////
// HETATM identity
////////////////////////////////////////////////////////////////
final private static byte CHEM_COMP_ID = 0;
final private static byte CHEM_COMP_NAME = 1;
final private static String FAMILY_CHEMCOMP_CAT = "_chem_comp.";
final private static String[] chemCompFields = {
"_chem_comp_id",
"_chem_comp_name"
};
/**
*
* a general name definition field. Not all hetero
*
* @return true if successful; false to skip
*
* @throws Exception
*/
private boolean processChemCompLoopBlock() throws Exception {
parseLoopParameters(chemCompFields);
String groupName, hetName;
while (parser.getData())
if (!isNull(groupName = getField(CHEM_COMP_ID))
&& !isNull(hetName = getField(CHEM_COMP_NAME)))
addHetero(groupName, hetName, true, true);
return true;
}
// final private static byte NONPOLY_NAME = 0;
// final private static byte NONPOLY_COMP_ID = 1;
//
// private static final String FAMILY_PDBX_NONPOLY_CAT = "_pdbx_entity_nonpoly.";
//
// final private static String[] nonpolyFields = {
// "_pdbx_entity_nonpoly_name",
// "_pdbx_entity_nonpoly_comp_id", };
//
//
// /**
// *
// * a HETERO name definition field. Maybe not all hetero? nonpoly?
// *
// * @return true if successful; false to skip
// *
// * @throws Exception
// */
// private boolean processNonpolyLoopBlock() throws Exception {
// parseLoopParameters(nonpolyFields);
// String groupName, hetName;
// while (parser.getData()) {
// if (isNull(groupName = getField(NONPOLY_COMP_ID))
// || isNull(hetName = getField(NONPOLY_NAME)))
// return false;
// addHetero(groupName, hetName, true);
// }
// return true;
// }
protected void addHetero(String groupName, String hetName, boolean doCheck, boolean addNote) {
if (doCheck && !vwr.getJBR().isHetero(groupName))
return;
if (htHetero == null)
htHetero = new Hashtable();
if (doCheck && htHetero.containsKey(groupName))
return;
htHetero.put(groupName, hetName);
if (addNote)
appendLoadNote(groupName + " = " + hetName);
}
////////////////////////////////////////////////////////////////
// helix and turn structure data
////////////////////////////////////////////////////////////////
final private static byte CONF_TYPE_ID = 0;
final private static byte BEG_ASYM_ID = 1;
final private static byte BEG_SEQ_ID = 2;
final private static byte BEG_INS_CODE = 3;
final private static byte END_ASYM_ID = 4;
final private static byte END_SEQ_ID = 5;
final private static byte END_INS_CODE = 6;
final private static byte STRUCT_ID = 7;
final private static byte SERIAL_NO = 8;
final private static byte HELIX_CLASS = 9;
final private static String FAMILY_STRUCTCONF_CAT = "_struct_conf.";
final private static String FAMILY_STRUCTCONF = "_struct_conf";
final private static String[] structConfFields = {
"*_conf_type_id",
"*_beg_auth_asym_id",
"*_beg_auth_seq_id",
"*_pdbx_beg_pdb_ins_code",
"*_end_auth_asym_id",
"*_end_auth_seq_id",
"*_pdbx_end_pdb_ins_code",
"*_id",
"*_pdbx_pdb_helix_id",
"*_pdbx_pdb_helix_class" };
/**
* identifies ranges for HELIX and TURN
*
* @return true if successful; false to skip
* @throws Exception
*/
private boolean processStructConfLoopBlock() throws Exception {
if (ignoreStructure) {
parser.skipLoop(false);
return false;
}
parseLoopParametersFor(FAMILY_STRUCTCONF, structConfFields);
if (!checkAllFieldsPresent(structConfFields, -1, true)) {
parser.skipLoop(true);
return false;
}
while (parser.getData()) {
Structure structure = new Structure(-1, STR.HELIX, STR.HELIX, null, 0, 0, null);
String type = getField(CONF_TYPE_ID);
if (type.startsWith("TURN"))
structure.structureType = structure.substructureType = STR.TURN;
else if (!type.startsWith("HELX"))
structure.structureType = structure.substructureType = STR.NONE;
else
structure.substructureType = Structure.getHelixType(parseIntStr(getField(HELIX_CLASS)));
structure.serialID = parseIntStr(getField(SERIAL_NO));
structure.structureID = getField(STRUCT_ID);
addStructure(structure);
}
return true;
}
////////////////////////////////////////////////////////////////
// sheet structure data
////////////////////////////////////////////////////////////////
private void addStructure(Structure structure) {
structure.startChainID = vwr.getChainID(structure.startChainStr = getField(BEG_ASYM_ID), true);
structure.startSequenceNumber = parseIntStr(getField(BEG_SEQ_ID));
structure.startInsertionCode = getField(BEG_INS_CODE).charAt(0);
structure.endChainID = vwr.getChainID(structure.endChainStr = getField(END_ASYM_ID), true);
structure.endSequenceNumber = parseIntStr(getField(END_SEQ_ID));
structure.endInsertionCode = getField(END_INS_CODE).charAt(0);
asc.addStructure(structure);
}
final private static byte SHEET_ID = 0;
final private static byte STRAND_ID = 7;
final private static String FAMILY_SHEET_CAT = "_struct_sheet_range.";
final private static String FAMILY_SHEET = "_struct_sheet_range";
final private static String[] structSheetRangeFields = {
"*_sheet_id",
"*_beg_auth_asym_id",
"*_beg_auth_seq_id",
"*_pdbx_beg_pdb_ins_code",
"*_end_auth_asym_id",
"*_end_auth_seq_id",
"*_pdbx_end_pdb_ins_code",
"*_id"
};
/**
*
* identifies sheet ranges
*
* @return true if successful; false to skip
*
* @throws Exception
*/
private boolean processStructSheetRangeLoopBlock() throws Exception {
if (ignoreStructure) {
parser.skipLoop(false);
return false;
}
parseLoopParametersFor(FAMILY_SHEET, structSheetRangeFields);
if (!checkAllFieldsPresent(structSheetRangeFields, -1, true)) {
parser.skipLoop(true);
return false;
}
while (parser.getData())
addStructure(new Structure(-1, STR.SHEET, STR.SHEET, getField(SHEET_ID),
parseIntStr(getField(STRAND_ID)), 1, null));
return true;
}
final private static byte SITE_ID = 0;
final private static byte SITE_COMP_ID = 1;
final private static byte SITE_ASYM_ID = 2;
final private static byte SITE_SEQ_ID = 3;
final private static byte SITE_INS_CODE = 4; //???
final private static String FAMILY_STRUCSITE_CAT = "_struct_site_gen.";
final private static String FAMILY_STRUCSITE = "_struct_site_gen";
final private static String[] structSiteFields = {
"*_site_id",
"*_auth_comp_id",
"*_auth_asym_id",
"*_auth_seq_id",
"*_label_alt_id", //should be an insertion code, not an alt ID?
};
// loop_
// _struct_site_gen.id
// _struct_site_gen.site_id
// _struct_site_gen.pdbx_num_res
// _struct_site_gen.label_comp_id
// _struct_site_gen.label_asym_id
// _struct_site_gen.label_seq_id
// _struct_site_gen.auth_comp_id
// _struct_site_gen.auth_asym_id
// _struct_site_gen.auth_seq_id
// _struct_site_gen.label_atom_id
// _struct_site_gen.label_alt_id
// _struct_site_gen.symmetry
// _struct_site_gen.details
// 1 CAT 5 GLN A 92 GLN A 92 . . ? ?
// 2 CAT 5 GLU A 58 GLU A 58 . . ? ?
// 3 CAT 5 HIS A 40 HIS A 40 . . ? ?
// 4 CAT 5 TYR A 38 TYR A 38 . . ? ?
// 5 CAT 5 PHE A 100 PHE A 100 . . ? ?
// #
/**
*
* identifies structure sites
*
* @return true if successful; false to skip
*
* @throws Exception
*/
private boolean processStructSiteBlock() throws Exception {
parseLoopParametersFor(FAMILY_STRUCSITE, structSiteFields);
Map htSite = null;
htSites = new Hashtable>();
String seqNum, resID;
while (parser.getData()) {
if (isNull(seqNum = getField(SITE_SEQ_ID))
|| isNull(resID = getField(SITE_COMP_ID)))
continue;
String siteID = getField(SITE_ID);
htSite = htSites.get(siteID);
if (htSite == null) {
htSite = new Hashtable();
htSite.put("groups", "");
htSites.put(siteID, htSite);
}
String insCode = getField(SITE_INS_CODE);
String chainID = getField(SITE_ASYM_ID);
String group = "[" + resID + "]" + seqNum
+ (isNull(insCode) ? "" : "^" + insCode)
+ (isNull(chainID) ? "" : ":" + chainID);
String groups = (String) htSite.get("groups");
groups += (groups.length() == 0 ? "" : ",") + group;
htSite.put("groups", groups);
}
return true;
}
private void setBiomolecules() {
if (assemblyIdAtoms == null && chainAtomCounts == null)
return;
BS bsAll = new BS();
for (int i = vBiomolecules.size(); --i >= 0;) {
Map biomolecule = vBiomolecules.get(i);
setBiomolecule(biomolecule, (biomolecule == thisBiomolecule ? bsAll : null));
}
if (isBiomolecule && bsAll.cardinality() < asc.ac) {
if (asc.bsAtoms != null)
asc.bsAtoms.and(bsAll);
else if (!isCourseGrained)
asc.bsAtoms = bsAll;
}
}
@SuppressWarnings("unchecked")
private int setBiomolecule(Map biomolecule, BS bsAll) {
Lst biomtchains = (Lst) biomolecule.get("chains");
Lst biomts = (Lst) biomolecule.get("biomts");
Lst operators = (Lst) biomolecule.get("operators");
Lst assemblies = (Lst) biomolecule.get("assemblies");
P3 sum = new P3();
int count = 0;
BS bsAtoms = new BS();
int nAtomsTotal = 0;
boolean isBioCourse = (isBiomolecule && isCourseGrained);
for (int i = operators.size(); --i >= 0;) {
String[] ops = PT.split(operators.get(i), ",");
String[] ids = PT.split(assemblies.get(i), "$");
String chainlist = "";
int nAtoms = 0;
for (int j = 1; j < ids.length; j++) {
String id = ids[j];
chainlist += ":" + id + ";";
if (assemblyIdAtoms != null) {
biomolecule.put("asemblyIdAtoms", assemblyIdAtoms);
BS bs = assemblyIdAtoms.get(id);
if (bs != null) {
bsAtoms.or(bs);
if (bsAll != null)
bsAll.or(bs);
nAtoms += bs.cardinality();
}
} else if (isBioCourse) {
P3 asum = chainAtomMap.get(id);
if (asum != null) {
if (bySymop) {
sum.add(asum);
count += chainAtomCounts.get(id)[0];
} else {
createParticle(id);
nAtoms++;
}
}
}
}
if (!isBiomolecule)
continue;
for (int j = 0; j < ops.length; j++) {
M4 m = getOpMatrix(ops[j]);
if (m == null)
return 0;
if (m.equals(mident)) {
biomts.add(0, mident);
biomtchains.add(0, chainlist);
} else {
biomts.addLast(m);
biomtchains.addLast(chainlist);
}
}
if (bySymop && bsAll != null) {
nAtoms = 1;
Atom a1 = new Atom();
a1.setT(sum);
a1.scale(1f / count);
a1.radius = 16;
asc.addAtom(a1);
}
nAtoms *= ops.length;
nAtomsTotal += nAtoms;
}
biomolecule.put("atomCount", Integer.valueOf(nAtomsTotal));
return nAtomsTotal;
}
private void createParticle(String id) {
P3 asum = chainAtomMap.get(id);
int c = chainAtomCounts.get(id)[0];
Atom a = new Atom();
a.setT(asum);
a.scale(1f / c);
a.elementSymbol = "Pt";
setChainID(a, id);
a.radius = 16;
asc.addAtom(a);
}
private M4 getOpMatrix(String ops) {
if (htBiomts == null)
return M4.newM4(null);
int pt = ops.indexOf("|");
if (pt >= 0) {
M4 m = M4.newM4(htBiomts.get(ops.substring(0, pt)));
m.mul(htBiomts.get(ops.substring(pt + 1)));
return m;
}
return htBiomts.get(ops);
}
////////////////////////////////////////////////////////////////
// bond data
////////////////////////////////////////////////////////////////
// _STRUCT_CONN is only processed in the presence of _CHEM_CONN (2015 updated cif from EBI)
final private static byte STRUCT_CONN_ASYM1 = 0;
final private static byte STRUCT_CONN_SEQ1 = 1;
final private static byte STRUCT_CONN_COMP1 = 2;
final private static byte STRUCT_CONN_ATOM1 = 3;
final private static byte STRUCT_CONN_ALT1 = 4;
final private static byte STRUCT_CONN_SYMM1 = 5;
final private static byte STRUCT_CONN_ASYM2 = 6;
final private static byte STRUCT_CONN_SEQ2 = 7;
final private static byte STRUCT_CONN_COMP2 = 8;
final private static byte STRUCT_CONN_ATOM2 = 9;
final private static byte STRUCT_CONN_ALT2 = 10;
final private static byte STRUCT_CONN_SYMM2 = 11;
final private static byte STRUCT_CONN_TYPE = 12;
final private static byte STRUCT_CONN_ORDER = 13;
final private static String FAMILY_STRUCTCONN_CAT = "_struct_conn.";
final private static String FAMILY_STRUCTCONN = "_struct_conn";
final private static String[] structConnFields = {
"*_ptnr1_auth_asym_id",
"*_ptnr1_auth_seq_id",
"*_ptnr1_auth_comp_id",
"*_ptnr1_label_atom_id",
"*_pdbx_ptnr1_label_alt_id",
"*_ptnr1_symmetry",
"*_ptnr2_auth_asym_id",
"*_ptnr2_auth_seq_id",
"*_ptnr2_auth_comp_id",
"*_ptnr2_label_atom_id",
"*_pdbx_ptnr2_label_alt_id",
"*_ptnr2_symmetry",
"*_conn_type_id",
"*_pdbx_value_order"
};
//Allowed Value Details
//covale covalent bond
//covale_base covalent modification of a nucleotide base
//covale_phosphate covalent modification of a nucleotide phosphate
//covale_sugar covalent modification of a nucleotide sugar
//disulf disulfide bridge
//metalc metal coordination
//
//// not used:
//hydrog hydrogen bond
//mismat mismatched base pairs
//modres covalent residue modification
//saltbr ionic interaction
private Lst structConnMap;
private String structConnList = "";
private boolean doSetBonds;
private boolean processStructConnLoopBlock() throws Exception {
parseLoopParametersFor(FAMILY_STRUCTCONN, structConnFields);
while (parser.getData()) {
String sym1 = getField(STRUCT_CONN_SYMM1);
String sym2 = getField(STRUCT_CONN_SYMM2);
if (!sym1.equals(sym2) || !isNull(sym1) && !sym1.equals("1_555"))
continue;
String type = getField(STRUCT_CONN_TYPE);
if (!type.startsWith("covale") && !type.equals("disulf")
&& !type.equals("metalc"))
continue;
if (htBondMap == null)
htBondMap = new Hashtable>();
String key1 = vwr.getChainID(getField(STRUCT_CONN_ASYM1), true) + getField(STRUCT_CONN_COMP1)
+ parseFloatStr(getField(STRUCT_CONN_SEQ1))
+ getField(STRUCT_CONN_ATOM1) + getField(STRUCT_CONN_ALT1);
String key2 = vwr.getChainID(getField(STRUCT_CONN_ASYM2), true) + getField(STRUCT_CONN_COMP2)
+ parseFloatStr(getField(STRUCT_CONN_SEQ2))
+ getField(STRUCT_CONN_ATOM2) + getField(STRUCT_CONN_ALT2);
int order = getBondOrder(getField(STRUCT_CONN_ORDER));
if (structConnMap == null)
structConnMap = new Lst();
structConnMap
.addLast(new Object[] { key1, key2, Integer.valueOf(order) });
if (structConnList.indexOf(key1) < 0)
structConnList += key1;
if (structConnList.indexOf(key2) < 0)
structConnList += key2;
}
return true;
}
final private static byte CHEM_COMP_BOND_ID = 0;
final private static byte CHEM_COMP_BOND_ATOM_ID_1 = 1;
final private static byte CHEM_COMP_BOND_ATOM_ID_2 = 2;
final private static byte CHEM_COMP_BOND_VALUE_ORDER = 3;
final private static byte CHEM_COMP_BOND_AROMATIC_FLAG = 4;
final private static String FAMILY_COMPBOND_CAT = "_chem_comp_bond.";
final private static String FAMILY_COMPBOND = "_chem_comp_bond";
final private static String[] chemCompBondFields = {
"*_comp_id",
"*_atom_id_1",
"*_atom_id_2",
"*_value_order",
"*_pdbx_aromatic_flag"
};
private boolean processCompBondLoopBlock() throws Exception {
doSetBonds = true;
parseLoopParametersFor(FAMILY_COMPBOND, chemCompBondFields);
while (parser.getData()) {
String comp = getField(CHEM_COMP_BOND_ID);
String atom1 = getField(CHEM_COMP_BOND_ATOM_ID_1);
String atom2 = getField(CHEM_COMP_BOND_ATOM_ID_2);
int order = getBondOrder(getField(CHEM_COMP_BOND_VALUE_ORDER));
if ((getField(CHEM_COMP_BOND_AROMATIC_FLAG).charAt(0) == 'Y'))
switch (order) {
case JmolAdapter.ORDER_COVALENT_SINGLE:
order = JmolAdapter.ORDER_AROMATIC_SINGLE;
break;
case JmolAdapter.ORDER_COVALENT_DOUBLE:
order = JmolAdapter.ORDER_AROMATIC_DOUBLE;
break;
}
if (isLigand) {
asc.addNewBondWithOrderA(asc.getAtomFromName(atom1),
asc.getAtomFromName(atom2), order);
} else if (haveHAtoms || htHetero != null && htHetero.containsKey(comp)) {
if (htBondMap == null)
htBondMap = new Hashtable>();
Lst cmap = htBondMap.get(comp);
if (cmap == null)
htBondMap.put(comp, cmap = new Lst());
cmap.addLast(new Object[] { atom1, atom2,
Integer.valueOf(haveHAtoms ? order : 1) });
}
}
return true;
}
@Override
public boolean processSubclassAtom(Atom atom, String assemblyId, String strChain) {
if (isBiomolecule) {
if (isCourseGrained) {
P3 sum = chainAtomMap.get(assemblyId);
if (sum == null) {
chainAtomMap.put(assemblyId, sum = new P3());
chainAtomCounts.put(assemblyId, new int[1]);
}
chainAtomCounts.get(assemblyId)[0]++;
sum.add(atom);
return false;
}
} else if (byChain) {
if (thisChain != atom.chainID) {
thisChain = atom.chainID;
chainSum = chainAtomMap.get(strChain);
if (chainSum == null) {
chainAtomMap.put(strChain, chainSum = new P3());
chainAtomCounts.put(strChain, chainAtomCount = new int[1]);
}
}
chainSum.add(atom);
chainAtomCount[0]++;
return false;
}
if (assemblyId != null) {
if (assemblyIdAtoms == null)
assemblyIdAtoms = new Hashtable();
BS bs = assemblyIdAtoms.get(assemblyId);
if (bs == null)
assemblyIdAtoms.put(assemblyId, bs = new BS());
bs.set(ac);
}
return true;
}
private String modelStrings = "";
protected boolean done;
@Override
protected int checkPDBModelField(int modelField, int currentModelNo) throws Exception {
// the model field value is only used if
// it is indicated AFTER the file name in the load command,
// not if we have a MODEL keyword before the file name.
fieldProperty(modelField);
int modelNo = parseIntStr(field);
return (modelNo == currentModelNo ? modelNo : incrementModel(modelNo));
}
protected int incrementModel(int modelNo) throws Exception {
boolean isAssembly = (thisDataSetName != null && thisDataSetName.indexOf("-assembly-") >= 0);
if (isAssembly) {
// Files such as http://www.ebi.ac.uk/pdbe/static/entry/download/2lev-assembly-1.cif.gz
// may require sorting if there are multiple models, since the models are by chain, not by model.
useFileModelNumbers = true;
String key = "," + modelNo + ",";
if (modelStrings.indexOf(key) >= 0) {
requiresSorting = true;
} else {
modelStrings += key;
}
}
if (iHaveDesiredModel && asc.atomSetCount > 0 && !isAssembly) {
done = true;
if (parser != null) {
parser.skipLoop(false);
// but only this atom loop
skipping = false;
}
continuing = true;
return Integer.MIN_VALUE;
}
int modelNumberToUse = (useFileModelNumbers ? modelNo : ++modelIndex);
setHetero();
newModel(modelNumberToUse);
if (!skipping) {
nextAtomSet();
if (modelMap == null || asc.ac == 0)
modelMap = new Hashtable();
modelMap.put("" + modelNo, Integer.valueOf(Math.max(0, asc.iSet)));
modelMap
.put("_" + Math.max(0, asc.iSet), Integer.valueOf(modelNo));
}
return modelNo;
}
private void setHetero() {
if (htHetero != null) {
asc.setCurrentModelInfo("hetNames", htHetero);
asc.setInfo("hetNames", htHetero);
}
}
}