All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.biojava.nbio.structure.EntityInfo Maven / Gradle / Ivy

There is a newer version: 7.1.3
Show newest version
/*
 *                    BioJava development code
 *
 * This code may be freely distributed and modified under the
 * terms of the GNU Lesser General Public Licence.  This should
 * be distributed with the code.  If you do not have a copy,
 * see:
 *
 *      http://www.gnu.org/copyleft/lesser.html
 *
 * Copyright for this code is held jointly by the individual
 * authors.  These should be listed in @author doc comments.
 *
 * For more information on the BioJava project and its aims,
 * or to join the biojava-l mailing list, visit the home page
 * at:
 *
 *      http://www.biojava.org/
 *
 * Created on 22.01.2007
 *
 */
package org.biojava.nbio.structure;


import org.biojava.nbio.structure.io.FileParsingParameters;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.Serializable;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeSet;

/**
 * An object to contain the info from the PDB header for a Molecule.
 * In mmCIF dictionary, it is called an Entity. In the case of polymers it
 * is defined as each group of sequence identical NCS-related chains
 *
 * Now PDB file format 3.2 aware - contains the new TAX_ID fields for the
 * organism studied and the expression system.
 *
 * @author Jules Jacobsen
 * @author Jose Duarte
 * @author Anthony Bradley
 * @since 1.5
 */
public class EntityInfo implements Serializable {

	private final static Logger logger = LoggerFactory.getLogger(EntityInfo.class);


	// TODO We could drop a lot of the stuff here that is PDB-file related (actually many PDB files don't contain many of these fields) - JD 2016-03-25
	//     The only really essential part of a EntityInfo is the member chains and the entity_id/mol_id
	// See also issue https://github.com/biojava/biojava/issues/219

	private static final long serialVersionUID = 2991897825657586356L;

	/**
	 * The list of chains that are described by this EntityInfo
	 */
	private List chains;

	/**
	 * The Molecule identifier, called entity_id in mmCIF dictionary
	 */
	private int molId;

	/**
	 * A map to cache residue number mapping, between ResidueNumbers and index (1-based) in aligned sequences (SEQRES).
	 * Initialised lazily upon call to {@link #getAlignedResIndex(Group, Chain)}
	 * Keys are asym_ids of chains, values maps of residue numbers to indices.
	 */
	private final Map> chains2pdbResNums2ResSerials;

	private String refChainId;
	private String description = null;
	private String title = null;
	/**
	 * The type of entity (polymer, non-polymer, water)
	 */
	private EntityType type = null;
	private List synonyms = null;
	private List ecNums = null;
	private String engineered = null;
	private String mutation = null;
	private String biologicalUnit = null;
	private String details = null;
	private String numRes = null;
	private String resNames = null;
	private String headerVars = null;
	private String synthetic = null;
	private String fragment = null;
	private String organismScientific = null;
	private String organismTaxId = null;
	private String organismCommon = null;
	private String strain = null;
	private String variant = null;
	private String cellLine = null;
	private String atcc = null;
	private String organ = null;
	private String tissue = null;
	private String cell = null;
	private String organelle = null;
	private String secretion = null;
	private String gene = null;
	private String cellularLocation = null;
	private String expressionSystem = null;
	private String expressionSystemTaxId = null;
	private String expressionSystemStrain = null;
	private String expressionSystemVariant = null;
	private String expressionSystemCellLine = null;
	private String expressionSystemAtccNumber = null;
	private String expressionSystemOrgan = null;
	private String expressionSystemTissue = null;
	private String expressionSystemCell = null;
	private String expressionSystemOrganelle = null;
	private String expressionSystemCellularLocation = null;
	private String expressionSystemVectorType = null;
	private String expressionSystemVector = null;
	private String expressionSystemPlasmid = null;
	private String expressionSystemGene = null;
	private String expressionSystemOtherDetails = null;

	private Long id;

	public EntityInfo () {
		chains = new ArrayList<>();
		chains2pdbResNums2ResSerials = new HashMap<>();
		molId = -1;
	}

	/**
	 * Constructs a new EntityInfo copying all data from the given one
	 * but not setting the Chains
	 * @param c
	 */
	public EntityInfo (EntityInfo c) {

	    this.id = c.id;

		this.chains = new ArrayList<>();

		this.chains2pdbResNums2ResSerials = new HashMap<>();

		this.molId = c.molId;

		this.type = c.type;

		this.refChainId = c.refChainId;

		this.description = c.description;
		this.title = c.title;

		if (c.synonyms!=null) {
			this.synonyms = new ArrayList<>();
			synonyms.addAll(c.synonyms);
		}
		if (c.ecNums!=null) {
			this.ecNums = new ArrayList<>();
			ecNums.addAll(c.ecNums);
		}

		this.engineered = c.engineered;
		this.mutation = c.mutation;
		this.biologicalUnit = c.biologicalUnit;
		this.details = c.details;

		this.numRes = c.numRes;
		this.resNames = c.resNames;

		this.headerVars = c.headerVars;

		this.synthetic = c.synthetic;
		this.fragment = c.fragment;
		this.organismScientific = c.organismScientific;
		this.organismTaxId = c.organismTaxId;
		this.organismCommon = c.organismCommon;
		this.strain = c.strain;
		this.variant = c.variant;
		this.cellLine = c.cellLine;
		this.atcc = c.atcc;
		this.organ = c.organ;
		this.tissue = c.tissue;
		this.cell = c.cell;
		this.organelle = c.organelle;
		this.secretion = c.secretion;
		this.gene = c.gene;
		this.cellularLocation = c.cellularLocation;
		this.expressionSystem = c.expressionSystem;
		this.expressionSystemTaxId = c.expressionSystemTaxId;
		this.expressionSystemStrain = c.expressionSystemStrain;
		this.expressionSystemVariant = c.expressionSystemVariant;
		this.expressionSystemCellLine = c.expressionSystemCellLine;
		this.expressionSystemAtccNumber = c.expressionSystemAtccNumber;
		this.expressionSystemOrgan = c.expressionSystemOrgan;
		this.expressionSystemTissue = c.expressionSystemTissue;
		this.expressionSystemCell = c.expressionSystemCell;
		this.expressionSystemOrganelle = c.expressionSystemOrganelle;
		this.expressionSystemCellularLocation = c.expressionSystemCellularLocation;
		this.expressionSystemVectorType = c.expressionSystemVectorType;
		this.expressionSystemVector = c.expressionSystemVector;
		this.expressionSystemPlasmid = c.expressionSystemPlasmid;
		this.expressionSystemGene = c.expressionSystemGene;
		this.expressionSystemOtherDetails = c.expressionSystemOtherDetails;


	}

	@Override
	public String toString(){
		StringBuilder buf = new StringBuilder();
		buf.append("EntityInfo: ").append(molId).append(" ");
		buf.append(description==null?"(no name)":"("+description+")");
		buf.append(" asymIds: ");
		if (chains!=null) {
			for (int i=0;i chainIds = new ArrayList<>();
		for (Chain chain:chains) {
			chainIds.add(chain.getId());
		}

		Collections.sort(chainIds, String.CASE_INSENSITIVE_ORDER);

		for (Chain chain:chains) {
			if (chain.getId().equals(chainIds.get(0))) {
				return chain;
			}
		}

		logger.error("Could not find a representative chain for EntityInfo '{}'", this.toString());

		return null;
	}

	/** get the ID used by Hibernate
	 *
	 * @return the ID used by Hibernate
	 */
	public Long getId() {
		return id;
	}

	/** set the ID used by Hibernate
	 *
	 * @param id
	 */
	public void setId(Long id) {
		this.id = id;
	}

	/**
	 * Return the list of member chain ids (asym ids) that are described by this EntityInfo,
	 * only unique chain IDs are contained in the list.
	 * Note that in the case of multimodel structures this will return just the unique
	 * chain identifiers whilst {@link #getChains()} will return a corresponding chain
	 * per model.
	 * @return the list of unique ChainIDs that are described by this EnityInfo
	 * @see #setChains(List)
	 * @see #getChains()
	 */
	public List getChainIds() {

		Set uniqChainIds = new TreeSet<>();
		for (int i=0;i(uniqChainIds);
	}

	/**
	 * Given a Group g of Chain c (member of this EntityInfo) return the corresponding position in the
	 * alignment of all member sequences (1-based numbering), i.e. the index (1-based) in the SEQRES sequence.
	 * This allows for comparisons of residues belonging to different chains of the same EntityInfo (entity).
	 * 

* Note this method should only be used for entities of type {@link EntityType#POLYMER} *

* If {@link FileParsingParameters#setAlignSeqRes(boolean)} is not used or SEQRES not present, a mapping * will not be available and this method will return {@link ResidueNumber#getSeqNum()} for all residues, which * in some cases will be correctly aligned indices (when no insertion codes are * used and when all chains within the entity are numbered in the same way), but * in general they will be neither unique (because of insertion codes) nor aligned. *

* @param g the group * @param c the chain * @return the aligned residue index (1 to n), if no SEQRES groups are available at all then {@link ResidueNumber#getSeqNum()} * is returned as a fall-back, if the group is not found in the SEQRES groups then -1 is returned * for the given group and chain * @throws IllegalArgumentException if the given Chain is not a member of this EntityInfo * @see Chain#getSeqResGroup(int) */ public int getAlignedResIndex(Group g, Chain c) { boolean contained = false; for (Chain member:getChains()) { if (c.getId().equals(member.getId())) { contained = true; break; } } if (!contained) throw new IllegalArgumentException("Given chain with asym_id "+c.getId()+" is not a member of this entity: "+getChainIds().toString()); if (!chains2pdbResNums2ResSerials.containsKey(c.getId())) { // we do lazy initialisation of the map initResSerialsMap(c); } // if no seqres groups are available at all the map will be null Map map = chains2pdbResNums2ResSerials.get(c.getId()); int serial; if (map!=null) { ResidueNumber resNum = g.getResidueNumber(); // the resNum will be null for groups that are SEQRES only and not in ATOM, // still it can happen that a group is in ATOM in one chain but not in other of the same entity. // This is what we try to find out here (analogously to what we do in initResSerialsMap() ): if (resNum==null && c.getSeqResGroups()!=null && !c.getSeqResGroups().isEmpty()) { int index = c.getSeqResGroups().indexOf(g); resNum = findResNumInOtherChains(index, c); } if (resNum == null) { // still null, we really can't map serial = -1; } else { Integer alignedSerial = map.get(resNum); if (alignedSerial==null) { // the map doesn't contain this group, something's wrong: return -1 serial = -1; } else { serial = alignedSerial; } } } else { // no seqres groups available we resort to using the pdb residue numbers are given serial = g.getResidueNumber().getSeqNum(); } return serial; } private void initResSerialsMap(Chain c) { if (c.getSeqResGroups()==null || c.getSeqResGroups().isEmpty()) { logger.warn("No SEQRES groups found in chain with asym_id {}, will use residue numbers as given (no insertion codes, not necessarily aligned). " + "Make sure your structure has SEQRES records and that you use FileParsingParameters.setAlignSeqRes(true)", c.getId()); // we add a explicit null to the map so that we flag it as unavailable for this chain chains2pdbResNums2ResSerials.put(c.getId(), null); return; } Map resNums2ResSerials = new HashMap<>(); chains2pdbResNums2ResSerials.put(c.getId(), resNums2ResSerials); for (int i=0;i getSynonyms() { return synonyms; } public void setSynonyms(List synonyms) { this.synonyms = synonyms; } public List getEcNums() { return ecNums; } public void setEcNums(List ecNums) { this.ecNums = ecNums; } public String getEngineered() { return engineered; } public void setEngineered(String engineered) { this.engineered = engineered; } public String getMutation() { return mutation; } public void setMutation(String mutation) { this.mutation = mutation; } public String getBiologicalUnit() { return biologicalUnit; } public void setBiologicalUnit(String biologicalUnit) { this.biologicalUnit = biologicalUnit; } public String getDetails() { return details; } public void setDetails(String details) { this.details = details; } public String getNumRes() { return numRes; } public void setNumRes(String numRes) { this.numRes = numRes; } public String getResNames() { return resNames; } public void setResNames(String resNames) { this.resNames = resNames; } public String getHeaderVars() { return headerVars; } public void setHeaderVars(String headerVars) { this.headerVars = headerVars; } public String getSynthetic() { return synthetic; } public void setSynthetic(String synthetic) { this.synthetic = synthetic; } public String getFragment() { return fragment; } public void setFragment(String fragment) { this.fragment = fragment; } public String getOrganismScientific() { return organismScientific; } public void setOrganismScientific(String organismScientific) { this.organismScientific = organismScientific; } public String getOrganismTaxId() { return organismTaxId; } public void setOrganismTaxId(String organismTaxId) { this.organismTaxId = organismTaxId; } public String getOrganismCommon() { return organismCommon; } public void setOrganismCommon(String organismCommon) { this.organismCommon = organismCommon; } public String getStrain() { return strain; } public void setStrain(String strain) { this.strain = strain; } public String getVariant() { return variant; } public void setVariant(String variant) { this.variant = variant; } public String getCellLine() { return cellLine; } public void setCellLine(String cellLine) { this.cellLine = cellLine; } public String getAtcc() { return atcc; } public void setAtcc(String atcc) { this.atcc = atcc; } public String getOrgan() { return organ; } public void setOrgan(String organ) { this.organ = organ; } public String getTissue() { return tissue; } public void setTissue(String tissue) { this.tissue = tissue; } public String getCell() { return cell; } public void setCell(String cell) { this.cell = cell; } public String getOrganelle() { return organelle; } public void setOrganelle(String organelle) { this.organelle = organelle; } public String getSecretion() { return secretion; } public void setSecretion(String secretion) { this.secretion = secretion; } public String getGene() { return gene; } public void setGene(String gene) { this.gene = gene; } public String getCellularLocation() { return cellularLocation; } public void setCellularLocation(String cellularLocation) { this.cellularLocation = cellularLocation; } public String getExpressionSystem() { return expressionSystem; } public String getExpressionSystemTaxId() { return expressionSystemTaxId; } public void setExpressionSystemTaxId(String expressionSystemTaxId) { this.expressionSystemTaxId = expressionSystemTaxId; } public void setExpressionSystem(String expressionSystem) { this.expressionSystem = expressionSystem; } public String getExpressionSystemStrain() { return expressionSystemStrain; } public void setExpressionSystemStrain(String expressionSystemStrain) { this.expressionSystemStrain = expressionSystemStrain; } public String getExpressionSystemVariant() { return expressionSystemVariant; } public void setExpressionSystemVariant(String expressionSystemVariant) { this.expressionSystemVariant = expressionSystemVariant; } public String getExpressionSystemCellLine() { return expressionSystemCellLine; } public void setExpressionSystemCellLine(String expressionSystemCellLine) { this.expressionSystemCellLine = expressionSystemCellLine; } public String getExpressionSystemAtccNumber() { return expressionSystemAtccNumber; } public void setExpressionSystemAtccNumber(String expressionSystemAtccNumber) { this.expressionSystemAtccNumber = expressionSystemAtccNumber; } public String getExpressionSystemOrgan() { return expressionSystemOrgan; } public void setExpressionSystemOrgan(String expressionSystemOrgan) { this.expressionSystemOrgan = expressionSystemOrgan; } public String getExpressionSystemTissue() { return expressionSystemTissue; } public void setExpressionSystemTissue(String expressionSystemTissue) { this.expressionSystemTissue = expressionSystemTissue; } public String getExpressionSystemCell() { return expressionSystemCell; } public void setExpressionSystemCell(String expressionSystemCell) { this.expressionSystemCell = expressionSystemCell; } public String getExpressionSystemOrganelle() { return expressionSystemOrganelle; } public void setExpressionSystemOrganelle(String expressionSystemOrganelle) { this.expressionSystemOrganelle = expressionSystemOrganelle; } public String getExpressionSystemCellularLocation() { return expressionSystemCellularLocation; } public void setExpressionSystemCellularLocation(String expressionSystemCellularLocation) { this.expressionSystemCellularLocation = expressionSystemCellularLocation; } public String getExpressionSystemVectorType() { return expressionSystemVectorType; } public void setExpressionSystemVectorType(String expressionSystemVectorType) { this.expressionSystemVectorType = expressionSystemVectorType; } public String getExpressionSystemVector() { return expressionSystemVector; } public void setExpressionSystemVector(String expressionSystemVector) { this.expressionSystemVector = expressionSystemVector; } public String getExpressionSystemPlasmid() { return expressionSystemPlasmid; } public void setExpressionSystemPlasmid(String expressionSystemPlasmid) { this.expressionSystemPlasmid = expressionSystemPlasmid; } public String getExpressionSystemGene() { return expressionSystemGene; } public void setExpressionSystemGene(String expressionSystemGene) { this.expressionSystemGene = expressionSystemGene; } public String getExpressionSystemOtherDetails() { return expressionSystemOtherDetails; } public void setExpressionSystemOtherDetails(String expressionSystemOtherDetails) { this.expressionSystemOtherDetails = expressionSystemOtherDetails; } /** * Get the list of chains that are part of this EntityInfo. Note that for multi-model * structures chains from all models are returned. * * @return a List of Chain objects */ public List getChains(){ return this.chains; } private List getFirstModelChains() { Map firstModelChains = new LinkedHashMap<>(); Set lookupChainIds = new HashSet<>(getChainIds()); for (Chain chain : chains) { if (lookupChainIds.contains(chain.getId())) { if (!firstModelChains.containsKey(chain.getId())) { firstModelChains.put(chain.getId(), chain); } } } return new ArrayList<>(firstModelChains.values()); } /** * Add new Chain to this EntityInfo * @param chain */ public void addChain(Chain chain){ this.chains.add(chain); } /** * Set the chains for this EntityInfo * @param chains */ public void setChains(List chains){ this.chains = chains; } /** * Get the type of entity this EntityInfo describes. * Options are polymer, non-polymer or water. * @return a string describing the type of entity. (polymer, non-polymer or water). */ public EntityType getType() { return this.type; } /** * Set the type of entity this EntityInfo describes. * Options are polymer, non-polymer or water. * @param type a string describing the type of entity. (polymer, non-polymer or water). */ public void setType(EntityType type) { this.type = type; } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy