All Downloads are FREE. Search and download functionalities are using the official Maven repository.

pl.poznan.put.structure.formats.Ct Maven / Gradle / Ivy

package pl.poznan.put.structure.formats;

import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.Validate;
import org.immutables.value.Value;
import pl.poznan.put.pdb.ChainNumberICode;
import pl.poznan.put.pdb.analysis.MoleculeType;
import pl.poznan.put.pdb.analysis.PdbChain;
import pl.poznan.put.pdb.analysis.PdbModel;
import pl.poznan.put.pdb.analysis.PdbResidue;
import pl.poznan.put.pdb.analysis.SingleTypedResidueCollection;
import pl.poznan.put.structure.DotBracketSymbol;
import pl.poznan.put.structure.pseudoknots.Region;

import java.io.Serializable;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.SortedSet;
import java.util.TreeSet;
import java.util.stream.Collectors;
import java.util.stream.IntStream;

/** An RNA secondary structure encoded in CT (connect) format. */
@Value.Immutable
public abstract class Ct implements Serializable {
  /**
   * Parses string into an instance of this class.
   *
   * @param data The string with CT data.
   * @return An instance of this class with parsed data.
   */
  public static Ct fromString(final String data) {
    final List entries =
        Arrays.stream(data.split("\n"))
            .map(String::trim)
            .map(line -> line.indexOf('#') == -1 ? line : line.substring(0, line.indexOf('#')))
            .filter(StringUtils::isNotBlank)
            .skip(1L)
            .map(ExtendedEntry::fromString)
            .collect(Collectors.toList());
    return ImmutableCt.of(entries);
  }

  /**
   * Converts RNA secondary structure in BPSEQ format to CT format.
   *
   * @param bpSeq The data in BPSEQ format.
   * @return An instance of this class with converted data.
   */
  public static Ct fromBpSeq(final BpSeq bpSeq) {
    final List entries =
        bpSeq.entries().stream().map(ExtendedEntry::fromEntry).collect(Collectors.toList());
    return ImmutableCt.of(entries);
  }

  /**
   * Converts RNA secondary structure in BPSEQ format to CT format, taking into account information
   * from 3D coordinates (residue numbering, chain sizes).
   *
   * @param bpSeq The data in BPSEQ format.
   * @param model The 3D data.
   * @return An instance of this class with converted data.
   */
  public static Ct fromBpSeqAndPdbModel(final BpSeq bpSeq, final PdbModel model) {
    final List residues =
        model.chains().stream()
            .filter(chain -> chain.moleculeType() == MoleculeType.RNA)
            .map(PdbChain::residues)
            .flatMap(Collection::stream)
            .collect(Collectors.toList());
    final List entries = new ArrayList<>(bpSeq.entries());

    if (residues.size() != entries.size()) {
      throw new IllegalArgumentException(
          String.format(
              "Failed to create CT from BPSEQ and PDB data, because there are %d BPSEQ entries and %d residues",
              entries.size(), residues.size()));
    }

    final List extendedEntries =
        IntStream.range(0, entries.size())
            .mapToObj(
                i -> ExtendedEntry.fromEntryAndPdbResidue(entries.get(i), residues.get(i), model))
            .collect(Collectors.toList());
    return ImmutableCt.of(extendedEntries);
  }

  /**
   * Converts RNA secondary structure in dot-bracket format to CT format.
   *
   * @param dotBracket The data in dot-bracket format.
   * @return An instance of this class with converted data.
   */
  public static Ct fromDotBracket(final DotBracket dotBracket) {
    final List entries =
        dotBracket.strands().stream()
            .map(Strand::symbols)
            .flatMap(
                symbols ->
                    IntStream.range(0, symbols.size())
                        .mapToObj(i -> ExtendedEntry.fromDotBracketSymbol(dotBracket, symbols, i)))
            .collect(Collectors.toList());
    return ImmutableCt.of(entries);
  }

  /** @return The list of CT entries. */
  @Value.Parameter(order = 1)
  @Value.NaturalOrder
  public abstract SortedSet entries();

  /** @return The number of strands. */
  public final int strandCount() {
    return (int) entries().stream().filter(entry -> entry.after() == 0).count();
  }

  /**
   * Creates a copy of this instance, but with the given pair removed.
   *
   * @param entry The pair to remove.
   * @return A copy of this instance without the given pair.
   */
  public final Ct withoutPair(final ExtendedEntry entry) {
    if (!entry.isPaired()) {
      return ImmutableCt.copyOf(this);
    }

    final SortedSet entrySet = new TreeSet<>(entries());
    entrySet.remove(entry);
    entrySet.add(ImmutableExtendedEntry.copyOf(entry).withPair(0));

    final Optional paired =
        entries().stream().filter(e -> e.pair() == entry.index()).findFirst();

    if (paired.isPresent()) {
      entrySet.remove(paired.get());
      entrySet.add(ImmutableExtendedEntry.copyOf(paired.get()).withPair(0));
    }

    return ImmutableCt.of(entrySet);
  }

  /**
   * Finds all isolated base pairs and creates a copy of this instance without them.
   *
   * @return A copy of this instance, but with all isolated base pairs removed.
   */
  public final Ct withoutIsolatedPairs() {
    Ct copy = ImmutableCt.copyOf(this);
    for (final Region region : Region.createRegions(BpSeq.fromCt(this))) {
      if (region.length() == 1) {
        final Optional entry =
            entries().stream()
                .filter(e -> e.index() == region.entries().get(0).index())
                .findFirst();
        if (entry.isPresent()) {
          copy = copy.withoutPair(entry.get());
        }
      }
    }
    return copy;
  }

  @Override
  public final String toString() {
    final StringBuilder builder = new StringBuilder();
    builder.append(entries().size());
    builder.append('\n');

    for (final ExtendedEntry e : entries()) {
      builder.append(e);
      builder.append('\n');
    }

    return builder.toString();
  }

  @Value.Check
  protected Ct validate() {
    final List list = new ArrayList<>(entries());

    // fix the last entry if required
    final ExtendedEntry lastEntry = list.get(list.size() - 1);
    if (lastEntry.after() != 0) {
      list.remove(list.size() - 1);
      list.add(ImmutableExtendedEntry.copyOf(lastEntry).withAfter(0));
      return ImmutableCt.of(list);
    }

    // check on the first entry
    Validate.isTrue(
        list.get(0).before() == 0,
        "Invalid `before` column (expected value is 0 for the first entry):%n  %s",
        list.get(0));

    for (int i = 1; i < list.size(); i++) {
      final ExtendedEntry previous = list.get(i - 1);
      final ExtendedEntry current = list.get(i);

      // sequential check on `index` column
      Validate.isTrue(
          current.index() - previous.index() == 1,
          "Invalid `index` column (expected next value than its predecessor):%n  %s%n  %s",
          previous,
          current);

      if (current.before() != 0) {
        // sequential check on `before` column
        Validate.isTrue(
            current.before() - previous.before() == 1,
            "Invalid `before` column (expected next value than its predecessor):%n  %s%n  %s",
            previous,
            current);
      }

      if (previous.after() == 0) {
        // check on `before` column for new strands
        Validate.isTrue(
            current.before() == 0,
            "Invalid `before` column (expected 0 for new strand):%n  %s%n  %s",
            previous,
            current);
        // check on `after` column for new strands
        Validate.isTrue(
            current.after() == 0 || current.after() == 2,
            "Invalid `after` column (expected 2 for new strand or 0 for a 1nt long strand):%n  %s%n  %s",
            previous,
            current);
      } else {
        // sequential check on `after` column
        Validate.isTrue(
            current.after() == 0 || current.after() - previous.after() == 1,
            "Invalid `after` column (expected next value than its predecessor):%n  %s%n  %s",
            previous,
            current);
      }
    }

    final Map map =
        entries().stream().collect(Collectors.toMap(ExtendedEntry::index, ExtendedEntry::pair));
    final int lastIndex = lastEntry.index();

    for (final ExtendedEntry entry : list) {
      if (entry.pair() != 0) {
        // checks on `pair` column
        Validate.isTrue(map.containsKey(entry.index()), "Missing mapping for:%n  %s", entry);
        Validate.isTrue(map.containsKey(entry.pair()), "Missing mapping for:%n  %s", entry);
        Validate.isTrue(
            map.get(entry.index()) == entry.pair(),
            "Incorrect mapping:%n  %s%n  mapping[entry.index]=%d",
            entry,
            map.get(entry.index()));
        Validate.isTrue(
            map.get(entry.pair()) == entry.index(),
            "Incorrect mapping:%n  %s%n  mapping[entry.pair]=%d",
            entry,
            map.get(entry.pair()));
      }

      // checks on `before` column
      Validate.isTrue(
          entry.before() >= 0, "Invalid `before` column (expected positive value):%n  %s", entry);
      Validate.isTrue(
          entry.before() < lastIndex,
          "Invalid `before` column (expected value less than %d):%n  %s",
          lastIndex,
          entry);

      // checks on `after` columns
      Validate.isTrue(
          entry.after() == 0 || entry.after() >= 2,
          "Invalid `after` column (expected value at least 2):%n  %s",
          entry);
      Validate.isTrue(
          entry.after() <= lastIndex,
          "Invalid `after` column (expected value at most %d):%n  %s",
          lastIndex,
          entry);
    }

    return this;
  }

  /** A single entry in the CT formatted structure. */
  @Value.Immutable
  public abstract static class ExtendedEntry implements Comparable {
    /**
     * Creates an instance from a string in format: int string int int int int.
     *
     * @param line A line of text formatted as a CT content line.
     * @return An instance of this class.
     */
    public static ExtendedEntry fromString(final String line) {
      final String[] split = StringUtils.split(line);
      if (split.length != 6) {
        throw new IllegalArgumentException("Line does not conform to CT format: " + line);
      }
      try {
        final int index = Integer.parseInt(split[0]);
        final char seq = split[1].charAt(0);
        final int before = Integer.parseInt(split[2]);
        final int after = Integer.parseInt(split[3]);
        final int pair = Integer.parseInt(split[4]);
        final int original = Integer.parseInt(split[5]);
        return ImmutableExtendedEntry.of(index, seq, before, after, pair, original);
      } catch (final NumberFormatException e) {
        throw new IllegalArgumentException(
            "Invalid CT format. Failed to parse column values: " + line, e);
      }
    }

    /**
     * Converts a BPSEQ entry into an instance of this class.
     *
     * @param entry A BPSEQ entry to convert.
     * @return An instance of this class.
     */
    public static ExtendedEntry fromEntry(final BpSeq.Entry entry) {
      return ImmutableExtendedEntry.of(
          entry.index(),
          entry.seq(),
          entry.index() - 1,
          entry.index() + 1,
          entry.pair(),
          entry.index());
    }

    /**
     * Converts a BPSEQ entry into an instance of this class using information from a parsed 3D
     * data.
     *
     * @param entry A BPSEQ entry to convert.
     * @param residue A PDB residue mapped to the BPSEQ entry.
     * @param model The PDB model that contains the residue.
     * @return An instance of this class.
     */
    public static ExtendedEntry fromEntryAndPdbResidue(
        final BpSeq.Entry entry, final ChainNumberICode residue, final PdbModel model) {
      final SingleTypedResidueCollection chain = model.findChainContainingResidue(residue);
      final int before = chain.indexOf(residue);
      final int after = (before + 2) % (chain.residues().size() + 1);
      return ImmutableExtendedEntry.of(
              entry.index(), entry.seq(), before, after, entry.pair(), residue.residueNumber())
          .withComment(entry.comment());
    }

    /**
     * Converts a dot-bracket symbol into an instance of this class.
     *
     * @param dotBracket The whole dot-bracket structure.
     * @param symbols The list of symbols in the current strand.
     * @param i The index of the current symbol.
     * @return An instance of this class.
     */
    public static ExtendedEntry fromDotBracketSymbol(
        final DotBracket dotBracket, final List symbols, final int i) {
      final Map pairs = dotBracket.pairs();
      final DotBracketSymbol symbol = symbols.get(i);
      return ImmutableExtendedEntry.of(
          symbol.index() + 1,
          symbol.sequence(),
          i,
          i == symbols.size() - 1 ? 0 : i + 2,
          pairs.containsKey(symbol) ? pairs.get(symbol).index() + 1 : 0,
          dotBracket.originalIndex(symbol));
    }

    /** @return The value of `index` column. */
    @Value.Parameter(order = 1)
    public abstract int index();

    /** @return The value of `seq` column. */
    @Value.Parameter(order = 2)
    public abstract char seq();

    /** @return The value of `before` column. */
    @Value.Parameter(order = 3)
    public abstract int before();

    /** @return The value of `after` column. */
    @Value.Parameter(order = 4)
    public abstract int after();

    /** @return The value of `pair` column. */
    @Value.Parameter(order = 5)
    public abstract int pair();

    /** @return The value of `original` column. */
    @Value.Parameter(order = 6)
    public abstract int original();

    /** @return An optional comment. */
    @Value.Default
    public String comment() {
      return "";
    }

    /** @return True if `pair` column is non-zero. */
    public boolean isPaired() {
      return pair() != 0;
    }

    @Override
    public String toString() {
      final StringBuilder builder = new StringBuilder();
      builder.append(index());
      builder.append(' ');
      builder.append(seq());
      builder.append(' ');
      builder.append(before());
      builder.append(' ');
      builder.append(after());
      builder.append(' ');
      builder.append(pair());
      builder.append(' ');
      builder.append(original());
      if (!StringUtils.isBlank(comment())) {
        builder.append(" # ");
        builder.append(comment());
      }
      return builder.toString();
    }

    @Override
    public int compareTo(final ExtendedEntry t) {
      return Integer.compare(index(), t.index());
    }
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy