All Downloads are FREE. Search and download functionalities are using the official Maven repository.

pl.poznan.put.structure.formats.DotBracket Maven / Gradle / Ivy

package pl.poznan.put.structure.formats;

import org.apache.commons.collections4.BidiMap;
import org.apache.commons.collections4.bidimap.DualHashBidiMap;
import pl.poznan.put.structure.DotBracketSymbol;

import java.util.ArrayDeque;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.Deque;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.function.Function;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
import java.util.stream.Stream;

/** An RNA structure encoded in dot-bracket format. */
public interface DotBracket {
  /** @return The list of dot-bracket symbols. */
  List symbols();

  /**
   * Combines strands which share a base pair into a new dot-bracket instance and returns a list of
   * those.
   *
   * @return The list of dot-bracket instances, each containing strands which only pair with each
   *     other.
   */
  List combineStrands();

  /**
   * Returns the index of a dot-bracket symbol according to some external source like PDB numbering.
   *
   * @param symbol Dot-bracket symbol for which the original index is sought.
   * @return An index which reflects the numbering in real structure (e.g. PDB).
   */
  default int originalIndex(final DotBracketSymbol symbol) {
    return symbol.index() + 1;
  }

  /** @return The list of strands. */
  default List strands() {
    return Collections.singletonList(ImmutableStrandView.of("", this, 0, structure().length()));
  }

  /** @return The sequence of nucleotides. */
  default String sequence() {
    return symbols().stream()
        .map(DotBracketSymbol::sequence)
        .map(c -> Character.toString(c))
        .collect(Collectors.joining());
  }

  /** @return The sequence of dots and brackets representing paired and unpaired residues. */
  default String structure() {
    return symbols().stream()
        .map(DotBracketSymbol::structure)
        .map(c -> Character.toString(c))
        .collect(Collectors.joining());
  }

  default Map pairs() {
    final String opening = "([{ parentheses =
        new DualHashBidiMap<>(
            IntStream.range(0, opening.length())
                .boxed()
                .collect(Collectors.toMap(opening::charAt, closing::charAt)));

    final Map> parenthesesStacks =
        parentheses.keySet().stream()
            .collect(Collectors.toMap(Function.identity(), ignored -> new ArrayDeque<>()));

    final Map result = new HashMap<>();

    for (final DotBracketSymbol symbol : symbols()) {
      final char structure = symbol.structure();

      if (parentheses.containsKey(structure)) {
        // catch opening '(', '[', etc.
        parenthesesStacks.get(structure).push(symbol);
      } else if (parentheses.containsValue(structure)) {
        // catch closing ')', ']', etc.
        final DotBracketSymbol pair = parenthesesStacks.get(parentheses.getKey(structure)).pop();
        result.put(symbol, pair);
        result.put(pair, symbol);
      }
    }

    return result;
  }

  /**
   * @return A string representation of this dot-bracket, where every strand is written out
   *     separately.
   */
  default String toStringWithStrands() {
    return strands().stream().map(String::valueOf).collect(Collectors.joining("\n"));
  }

  /** @return The number of nucleotides in this structure. */
  default int length() {
    return symbols().size();
  }

  /** @return True, if at least one symbol represents a missing residue. */
  default boolean containsMissing() {
    return symbols().stream().anyMatch(DotBracketSymbol::isMissing);
  }

  /** @return The list of missing symbols at 5' and 3' ends of all strands. */
  default List missingTerminal() {
    return strands().stream()
        .flatMap(strand -> Stream.of(strand.missingBegin(), strand.missingEnd()))
        .collect(Collectors.toList());
  }

  /** @return The list of missing symbols which are not at 5' or 3' ends of any strand. */
  default List missingInternal() {
    // collect all missing from beginning and ends of strands
    final Set missingTerminal =
        missingTerminal().stream()
            .map(TerminalMissing::symbols)
            .flatMap(Collection::stream)
            .collect(Collectors.toSet());

    // get all missing symbols which are internal
    return strands().stream()
        .flatMap(strand -> strand.symbols().stream())
        .filter(DotBracketSymbol::isMissing)
        .filter(dotBracketSymbol -> !missingTerminal.contains(dotBracketSymbol))
        .collect(Collectors.toList());
  }

  /** @return The pseudoknot order of this structure. */
  default int pseudoknotOrder() {
    return symbols().stream().map(DotBracketSymbol::order).max(Comparator.naturalOrder()).orElse(0);
  }

  /**
   * Finds a strand which contains the given symbol.
   *
   * @param symbol The symbol to look for.
   * @return The strand containing the symbol.
   */
  default Strand findStrand(final DotBracketSymbol symbol) {
    return strands().stream()
        .filter(strand -> strand.symbols().contains(symbol))
        .findFirst()
        .orElseThrow(
            () ->
                new IllegalArgumentException("Failed to find strand containing symbol: " + symbol));
  }

  /**
   * Creates a string representation of nucleotide sequence, where strands may be separated with
   * ampersand {@code &} or not.
   *
   * @param separateStrands If true, the result will contain ampersand between strands.
   * @return The string representation of nucleotide sequence.
   */
  default String sequence(final boolean separateStrands) {
    final StringBuilder builder = new StringBuilder();
    for (final Strand strand : strands()) {
      builder.append(strand.sequence());
      if (separateStrands) {
        builder.append('&');
      }
    }
    return builder.toString();
  }

  /**
   * Creates a string of dots and brackets which represents base pairing, where strands may be
   * separated with ampersand {@code &} or not.
   *
   * @param separateStrands If true, the result will contain ampersand between strands.
   * @return The string representation of this structure.
   */
  default String structure(final boolean separateStrands) {
    final StringBuilder builder = new StringBuilder();
    for (final Strand strand : strands()) {
      builder.append(strand.structure());
      if (separateStrands) {
        builder.append('&');
      }
    }
    return builder.toString();
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy