All Downloads are FREE. Search and download functionalities are using the official Maven repository.

pl.poznan.put.rna.ChainReorderer Maven / Gradle / Ivy

package pl.poznan.put.rna;

import org.apache.commons.collections4.CollectionUtils;
import org.apache.commons.lang3.tuple.Pair;
import org.apache.commons.math3.stat.correlation.SpearmansCorrelation;
import pl.poznan.put.pdb.PdbAtomLine;
import pl.poznan.put.pdb.PdbNamedResidueIdentifier;
import pl.poznan.put.pdb.analysis.ImmutableDefaultPdbModel;
import pl.poznan.put.pdb.analysis.MoleculeType;
import pl.poznan.put.pdb.analysis.PdbModel;
import pl.poznan.put.pdb.analysis.ResidueCollection;
import pl.poznan.put.structure.CanonicalStructureExtractor;
import pl.poznan.put.structure.ClassifiedBasePair;
import pl.poznan.put.structure.formats.BpSeq;
import pl.poznan.put.structure.formats.Converter;
import pl.poznan.put.structure.formats.ImmutableDefaultConverter;

import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.SortedMap;
import java.util.TreeMap;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
import java.util.stream.Stream;

/**
 * A set of methods to reorder chains in an RNA structure. The order is derived from base pairing
 * information in two steps: (1) a graph of connection is traversed to find connected components
 * which are processed together, and (2) all permutations of each component's order are analyzed to
 * find one which minimizes the pseudoknot order.
 */
public final class ChainReorderer {
  private static final SpearmansCorrelation SPEARMAN = new SpearmansCorrelation();

  private ChainReorderer() {
    super();
  }

  /**
   * Finds canonical base pairs (see {@link
   * CanonicalStructureExtractor#basePairs(ResidueCollection)}) and reorders chains to keep
   * connected ones together while minimizing the overall pseudoknot order.
   *
   * @param model The input PDB model.
   * @return The PDB model filtered to contain only RNA and with chains reordered.
   */
  public static PdbModel reorderAtoms(final PdbModel model) {
    final PdbModel rna = model.filteredNewInstance(MoleculeType.RNA);
    return ChainReorderer.reorderAtoms(rna, CanonicalStructureExtractor.basePairs(rna));
  }

  /**
   * Reorders chains according to given canonical base pairs to keep connected chains together while
   * minimizing the overall pseudoknot order.
   *
   * @param model The input PDB model.
   * @param basePairs The list of base pairs to take into account.
   * @return The PDB model filtered to contain only RNA and with chains reordered.
   */
  public static PdbModel reorderAtoms(
      final PdbModel model, final Collection basePairs) {
    final PdbModel rna = model.filteredNewInstance(MoleculeType.RNA);
    final List order = ChainReorderer.chainOrder(rna.namedResidueIdentifiers(), basePairs);
    final List atoms =
        rna.atoms().stream()
            .sorted(Comparator.comparingInt(t -> order.indexOf(t.chainIdentifier())))
            .collect(Collectors.toList());
    return ImmutableDefaultPdbModel.of(
        rna.header(),
        rna.experimentalData(),
        rna.resolution(),
        rna.modelNumber(),
        atoms,
        rna.modifiedResidues(),
        rna.missingResidues(),
        rna.title(),
        rna.chainTerminatedAfter());
  }

  private static List chainOrder(
      final Collection residues,
      final Collection basePairs) {
    final List distinct =
        residues.stream()
            .map(PdbNamedResidueIdentifier::chainIdentifier)
            .distinct()
            .collect(Collectors.toList());

    // 1 or 2 chains do not reordering
    if (distinct.size() <= 2) {
      return distinct;
    }

    final Map> graph = ChainReorderer.buildGraph(basePairs);
    final Collection visited = new HashSet<>();
    final List order = new ArrayList<>();

    for (final String chain : distinct) {
      if (!visited.contains(chain)) {
        final List component = new ArrayList<>();
        ChainReorderer.depthFirstSearch(chain, graph, visited, component);
        order.addAll(ChainReorderer.componentOrder(component, distinct, residues, basePairs));
      }
    }

    return order;
  }

  private static List componentOrder(
      final List component,
      final List originalChainOrder,
      final Collection residues,
      final Collection basePairs) {
    // gather all permutations of chains which have the minimal pseudoknot order
    final SortedMap>> map = new TreeMap<>();
    CollectionUtils.permutations(component)
        .forEach(
            order -> {
              final int pseudoknots = ChainReorderer.countPseudoknots(order, residues, basePairs);
              map.putIfAbsent(pseudoknots, new ArrayList<>());
              map.get(pseudoknots).add(order);
            });

    // return one of the selected permutations which is the most similar to the input chain order
    // (i.e. introduce the minimal number of changes).
    final double[] yArray =
        IntStream.range(0, originalChainOrder.size())
            .filter(i -> component.contains(originalChainOrder.get(i)))
            .mapToDouble(i -> i)
            .toArray();

    return map.get(map.firstKey()).stream()
        .map(
            candidate ->
                Pair.of(
                    candidate,
                    ChainReorderer.spearmanCorrelation(originalChainOrder, yArray, candidate)))
        .max(Comparator.comparingDouble(Pair::getValue))
        .map(Pair::getKey)
        .orElse(component);
  }

  private static double spearmanCorrelation(
      final List originalChainOrder,
      final double[] yArray,
      final Collection candidate) {
    final double[] xArray =
        candidate.stream().map(originalChainOrder::indexOf).mapToDouble(i -> i).toArray();
    return ChainReorderer.SPEARMAN.correlation(xArray, yArray);
  }

  private static int countPseudoknots(
      final List candidateOrder,
      final Collection residues,
      final Collection basePairs) {
    final List reordered =
        residues.stream()
            .filter(identifier -> candidateOrder.contains(identifier.chainIdentifier()))
            .sorted(Comparator.comparingInt(t -> candidateOrder.indexOf(t.chainIdentifier())))
            .collect(Collectors.toList());
    final List filteredBasePairs =
        basePairs.stream()
            .filter(
                basePair -> candidateOrder.contains(basePair.basePair().left().chainIdentifier()))
            .filter(
                basePair -> candidateOrder.contains(basePair.basePair().right().chainIdentifier()))
            .collect(Collectors.toList());
    final BpSeq bpSeq = BpSeq.fromBasePairs(reordered, filteredBasePairs);
    final Converter converter = ImmutableDefaultConverter.of();
    return converter.convert(bpSeq).pseudoknotOrder();
  }

  private static Map> buildGraph(
      final Collection basePairs) {
    final Map> map = new HashMap<>();
    basePairs.stream()
        .map(ClassifiedBasePair::basePair)
        .flatMap(basePair -> Stream.of(basePair, basePair.invert()))
        .map(
            basePair ->
                Pair.of(basePair.left().chainIdentifier(), basePair.right().chainIdentifier()))
        .filter(pair -> !pair.getLeft().equals(pair.getRight()))
        .distinct()
        .forEach(
            pair -> {
              map.putIfAbsent(pair.getLeft(), new HashSet<>());
              map.get(pair.getLeft()).add(pair.getRight());
            });
    return map;
  }

  private static void depthFirstSearch(
      final String u,
      final Map> graph,
      final Collection visited,
      final Collection component) {
    visited.add(u);
    component.add(u);

    for (final String v : graph.getOrDefault(u, Collections.emptySet())) {
      if (!visited.contains(v)) {
        ChainReorderer.depthFirstSearch(v, graph, visited, component);
      }
    }
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy