pl.poznan.put.pdb.analysis.PdbParser Maven / Gradle / Ivy
package pl.poznan.put.pdb.analysis;
import org.apache.commons.collections4.CollectionUtils;
import org.apache.commons.lang3.RandomUtils;
import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import pl.poznan.put.pdb.ImmutablePdbExpdtaLine;
import pl.poznan.put.pdb.ImmutablePdbHeaderLine;
import pl.poznan.put.pdb.ImmutablePdbRemark2Line;
import pl.poznan.put.pdb.PdbAtomLine;
import pl.poznan.put.pdb.PdbExpdtaLine;
import pl.poznan.put.pdb.PdbHeaderLine;
import pl.poznan.put.pdb.PdbModresLine;
import pl.poznan.put.pdb.PdbParsingException;
import pl.poznan.put.pdb.PdbRemark2Line;
import pl.poznan.put.pdb.PdbRemark465Line;
import pl.poznan.put.pdb.PdbResidueIdentifier;
import pl.poznan.put.pdb.PdbTitleLine;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Date;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.TreeMap;
import java.util.stream.Collectors;
/** A parser of PDB format. */
public class PdbParser {
private static final Logger LOGGER = LoggerFactory.getLogger(PdbParser.class);
private final List modifiedResidues = new ArrayList<>();
private final List missingResidues = new ArrayList<>();
private final Collection processedIdentifiers = new HashSet<>();
private final Set chainTerminatedAfter = new HashSet<>();
private final Collection endedModelNumbers = new HashSet<>();
private final Map> modelAtoms = new TreeMap<>();
private final Collection titleLines = new ArrayList<>();
private final boolean strictMode;
private Optional headerLine = Optional.empty();
private Optional experimentalDataLine = Optional.empty();
private Optional resolutionLine = Optional.empty();
private Optional currentIdentifier = Optional.empty();
private int currentModelNumber;
/**
* Creates an instance with the possibility to set {@code strictMode}.
*
* @param strictMode If false, then some of the checks on PDB format conformity are relaxed.
*/
public PdbParser(final boolean strictMode) {
super();
this.strictMode = strictMode;
}
/** Creates an instance with {@code strictMode} set to true. */
public PdbParser() {
super();
strictMode = true;
}
/**
* Parses a string in PDB format.
*
* @param structureContent A string containing data in PDB format.
* @return An object representing the parsed data.
*/
public final synchronized List parse(final String structureContent) {
resetState();
for (final String line : structureContent.split("\n")) {
if (line.startsWith("MODEL")) {
handleModelLine(line);
} else if (line.startsWith("ATOM") || line.startsWith("HETATM")) {
handleAtomLine(line);
} else if (line.startsWith("TER ")) {
handleTerLine();
} else if (line.startsWith("REMARK 465")) {
handleMissingResidueLine(line);
} else if (line.startsWith("MODRES")) {
handleModifiedResidueLine(line);
} else if (line.startsWith("HEADER")) {
handleHeaderLine(line);
} else if (line.startsWith("EXPDTA")) {
handleExperimentalDataLine(line);
} else if (line.startsWith("REMARK 2 RESOLUTION.")) {
handleResolutionLine(line);
} else if (line.startsWith("TITLE ")) {
handleTitleLine(line);
}
}
final String titleBuilder =
titleLines.stream().map(PdbTitleLine::title).collect(Collectors.joining());
final List result = new ArrayList<>();
for (final Map.Entry> entry : modelAtoms.entrySet()) {
final int modelNumber = entry.getKey();
final List atoms = entry.getValue();
final PdbModel structureModel =
ImmutableDefaultPdbModel.of(
headerLine.orElse(ImmutablePdbHeaderLine.of("", new Date(0L), "")),
experimentalDataLine.orElse(ImmutablePdbExpdtaLine.of(Collections.emptyList())),
resolutionLine.orElse(ImmutablePdbRemark2Line.of(Double.NaN)),
modelNumber,
atoms,
modifiedResidues,
missingResidues,
titleBuilder,
chainTerminatedAfter);
result.add(structureModel);
}
return result;
}
private void resetState() {
modifiedResidues.clear();
missingResidues.clear();
processedIdentifiers.clear();
chainTerminatedAfter.clear();
endedModelNumbers.clear();
modelAtoms.clear();
titleLines.clear();
headerLine = Optional.empty();
experimentalDataLine = Optional.empty();
currentModelNumber = 0;
currentIdentifier = Optional.empty();
}
private void handleModelLine(final String line) {
endedModelNumbers.add(currentModelNumber);
final String modelNumberString =
(line.length() > 14) ? line.substring(10, 14).trim() : line.substring(5).trim();
int modelNumber = Integer.parseInt(modelNumberString);
while (endedModelNumbers.contains(modelNumber)) {
// model number has four digits
modelNumber = RandomUtils.nextInt(1, 10000);
}
currentModelNumber = modelNumber;
processedIdentifiers.clear();
chainTerminatedAfter.clear();
currentIdentifier = Optional.empty();
}
private void handleAtomLine(final String line) {
try {
final PdbAtomLine atomLine = PdbAtomLine.parse(line, strictMode);
final PdbResidueIdentifier identifier = PdbResidueIdentifier.from(atomLine);
if (processedIdentifiers.contains(identifier)) {
PdbParser.LOGGER.warn("Duplicate residue, ignoring it: {}", identifier);
return;
}
if (currentIdentifier.isPresent() && !identifier.equals(currentIdentifier.get())) {
processedIdentifiers.add(currentIdentifier.get());
currentIdentifier = Optional.of(identifier);
}
if (!modelAtoms.containsKey(currentModelNumber)) {
modelAtoms.put(currentModelNumber, new ArrayList<>());
}
final List atomList = modelAtoms.get(currentModelNumber);
atomList.add(atomLine);
} catch (final PdbParsingException e) {
PdbParser.LOGGER.warn("Invalid ATOM line: {}", line, e);
}
}
private void handleTitleLine(final String line) {
try {
final PdbTitleLine titleLine = PdbTitleLine.parse(line);
if (((CollectionUtils.isEmpty(titleLines)) && (StringUtils.isBlank(titleLine.continuation())))
|| (StringUtils.isNotBlank(titleLine.continuation()))) {
titleLines.add(titleLine);
}
} catch (final PdbParsingException e) {
PdbParser.LOGGER.warn("Invalid TITLE line: {}", line, e);
}
}
private void handleTerLine() {
final List atomLines = modelAtoms.get(currentModelNumber);
chainTerminatedAfter.add(PdbResidueIdentifier.from(atomLines.get(atomLines.size() - 1)));
}
private void handleMissingResidueLine(final String line) {
try {
if (PdbRemark465Line.isCommentLine(line)) {
return;
}
final PdbRemark465Line remark465Line = PdbRemark465Line.parse(line);
missingResidues.add(remark465Line);
} catch (final PdbParsingException e) {
PdbParser.LOGGER.warn("Invalid REMARK 465 line: {}", line, e);
}
}
private void handleModifiedResidueLine(final String line) {
try {
final PdbModresLine modresLine = PdbModresLine.parse(line);
modifiedResidues.add(modresLine);
} catch (final PdbParsingException e) {
PdbParser.LOGGER.warn("Invalid MODRES line: {}", line, e);
}
}
private void handleHeaderLine(final String line) {
try {
headerLine = Optional.of(PdbHeaderLine.parse(line));
} catch (final PdbParsingException e) {
PdbParser.LOGGER.warn("Invalid HEADER line: {}", line, e);
}
}
private void handleExperimentalDataLine(final String line) {
try {
experimentalDataLine = Optional.of(PdbExpdtaLine.parse(line));
} catch (final PdbParsingException e) {
PdbParser.LOGGER.warn("Invalid EXPDTA line: {}", line, e);
}
}
private void handleResolutionLine(final String line) {
try {
resolutionLine = Optional.of(PdbRemark2Line.parse(line));
} catch (final PdbParsingException e) {
PdbParser.LOGGER.warn("Invalid REMARK 2 RESOLUTION. line: {}", line, e);
}
}
}
© 2015 - 2024 Weber Informatics LLC | Privacy Policy