All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.vladsch.flexmark.util.sequence.BasedSequence Maven / Gradle / Ivy

There is a newer version: 0.64.8
Show newest version
package com.vladsch.flexmark.util.sequence;

import com.vladsch.flexmark.util.data.DataHolder;
import com.vladsch.flexmark.util.data.DataKeyBase;
import com.vladsch.flexmark.util.misc.CharPredicate;
import com.vladsch.flexmark.util.misc.Pair;
import com.vladsch.flexmark.util.sequence.builder.IBasedSegmentBuilder;
import com.vladsch.flexmark.util.sequence.builder.SequenceBuilder;
import com.vladsch.flexmark.util.sequence.builder.tree.SegmentTree;
import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.Nullable;

import java.util.ArrayList;
import java.util.List;

/**
 * A CharSequence that references original char sequence with offsets into original preserved.
 * 

* NOTE: '\0' changed to '\uFFFD' use {@link com.vladsch.flexmark.util.sequence.mappers.NullEncoder#decodeNull} mapper to get original null chars. *

* Since equals is used for comparison of sequences and strings by base sequence manager, a base sequence with NUL may not compare equal to * an equivalent unwrapped sequence because NUL chars are not converted. For Strings this is handled by using String.equals() for comparison. For * other CharacterSequence types the match will fail if original has NUL in it. *

* a subSequence() returns a sub-sequence from the original base sequence with corresponding offsets */ @SuppressWarnings("SameParameterValue") public interface BasedSequence extends IRichSequence, BasedOptionsHolder { BasedSequence NULL = new EmptyBasedSequence(); BasedSequence EMPTY = new EmptyBasedSequence(); BasedSequence EOL = CharSubSequence.of(SequenceUtils.EOL); BasedSequence SPACE = CharSubSequence.of(SequenceUtils.SPACE); List EMPTY_LIST = new ArrayList<>(); BasedSequence[] EMPTY_ARRAY = new BasedSequence[0]; BasedSequence[] EMPTY_SEGMENTS = new BasedSequence[0]; BasedSequence LINE_SEP = CharSubSequence.of(SequenceUtils.LINE_SEP); @NotNull static BasedSequence of(@Nullable CharSequence charSequence) { return BasedSequenceImpl.create(charSequence); } @NotNull @Deprecated static BasedSequence of(@Nullable CharSequence charSequence, int startIndex) { return of(charSequence).subSequence(startIndex); } @NotNull @Deprecated static BasedSequence of(@Nullable CharSequence charSequence, int startIndex, int endIndex) { return of(charSequence).subSequence(startIndex, endIndex); } @NotNull static BasedSequence ofSpaces(int count) { return of(RepeatedSequence.ofSpaces(count)); } @NotNull static BasedSequence repeatOf(char c, int count) { return of(RepeatedSequence.repeatOf(String.valueOf(c), 0, count)); } @NotNull static BasedSequence repeatOf(@NotNull CharSequence chars, int count) { return of(RepeatedSequence.repeatOf(chars, 0, chars.length() * count)); } @NotNull static BasedSequence repeatOf(@NotNull CharSequence chars, int startIndex, int endIndex) { return of(RepeatedSequence.repeatOf(chars, startIndex, endIndex)); } @SuppressWarnings("unchecked") @Override @NotNull SequenceBuilder getBuilder(); /** * Get the underlying object on which this sequence contents are based * * @return underlying object containing original text */ @NotNull Object getBase(); /** * Get the base sequence for the text * * @return base sequence */ @NotNull BasedSequence getBaseSequence(); /** * Get the start offset of this sequence into {@link #getBaseSequence()} and {@link #getBase()} original text source. * * @return start offset in original text */ int getStartOffset(); /** * Get the end offset of this sequence into {@link #getBaseSequence()} and {@link #getBase()} original text source. * * @return end offset in original text */ int getEndOffset(); /** * Get the offset of index in this sequence mapped to offset into {@link #getBaseSequence()} and {@link #getBase()} original text source. * NOTE: if the character at given index does not equal the corresponding character in the base sequence then this method should return -1 otherwise segmented based sequence will be created for original base character * * @param index index for which to get the offset in original source * @return offset of index of this sequence in original text */ int getIndexOffset(int index); /** * Add segments for this sequence, replacing out of base characters with strings * * @param builder builder */ void addSegments(@NotNull IBasedSegmentBuilder builder); /** * Get the segment tree for this sequence * * @return segment tree */ @NotNull SegmentTree getSegmentTree(); /** * Get the range of this sequence in original {@link #getBaseSequence()} and {@link #getBase()} original text source. * * @return Range of start offset and end offset */ @NotNull Range getSourceRange(); /** * Get a portion of this sequence * * @param startIndex offset from startIndex of this sequence * @param endIndex offset from startIndex of this sequence * @return based sequence which represents the requested range of this sequence. */ @NotNull @Override BasedSequence subSequence(int startIndex, int endIndex); /** * Get a portion of this sequence's base sequence *

* NOTE: this means that if this sequence applies modifications to the original sequence then these modifications are NOT be applied to the returned sequence. *

* NOTE: It should only be implemented in classes which provide base sequences such as {@link CharSubSequence} and {@link SubSequence} others use inherited implementation of {@link BasedSequenceImpl} * * @param startIndex offset from 0 of original sequence * @param endIndex offset from 0 of original sequence * @return based sequence whose contents reflect the selected portion */ @NotNull BasedSequence baseSubSequence(int startIndex, int endIndex); /** * Get a portion of the original sequence that this sequence is based on * * @param startIndex offset from 0 of original sequence * @return based sequence from startIndex to the endIndex */ @NotNull BasedSequence baseSubSequence(int startIndex); /** * Safe, if index out of range but based sequence has characters will return those, else returns '\0' *

* Allows peeking into preceding/following characters to the ones contained in this sequence * * @param index index in string * @return character or '\0' if index out of base sequence */ char safeBaseCharAt(int index); /** * Safe, if index out of range but based sequence has characters will return those, else returns '\0' *

* Allows peeking into preceding/following characters to the ones contained in this sequence * * @param index index in string * @param predicate character set predicate * @return true if character at index tests true */ boolean isBaseCharAt(int index, @NotNull CharPredicate predicate); /** * Get empty prefix to this sequence * * @return same as subSequence(0,0) */ @NotNull BasedSequence getEmptyPrefix(); /** * Get empty suffix to this sequence * * @return same as subSequence(length()) */ @NotNull BasedSequence getEmptySuffix(); /** * Get the unescaped string of this sequence content * * @return unescaped text */ @NotNull String unescape(); /** * Get the unescaped string of this sequence content without unescaping entities * * @return unescaped text */ @NotNull String unescapeNoEntities(); /** * Get the unescaped string of this sequence content * * @param textMapper replaced text mapper which will be uses to map unescaped index to original source index * @return unescaped text in based sequence */ @NotNull BasedSequence unescape(@NotNull ReplacedTextMapper textMapper); /** * replace any \r\n and \r by \n * * @param textMapper replaced text mapper which will be uses to map unescaped index to original source index * @return based sequence with only \n for line separators */ @NotNull BasedSequence normalizeEOL(@NotNull ReplacedTextMapper textMapper); /** * replace any \r\n and \r by \n, append terminating EOL if one is not present * * @param textMapper replaced text mapper which will be uses to map unescaped index to original source index * @return based sequence with only \n for line separators and terminated by \n */ @NotNull BasedSequence normalizeEndWithEOL(@NotNull ReplacedTextMapper textMapper); /** * Test if the given sequence is a continuation of this sequence in original source text * * @param other sequence to test * @return true if the given sequence is a continuation of this one in the original text */ boolean isContinuedBy(@NotNull BasedSequence other); /** * Test if this sequence is a continuation of the given sequence in original source text * * @param other sequence to test * @return true if this sequence is a continuation of the given sequence in original source text */ boolean isContinuationOf(@NotNull BasedSequence other); /** * Splice the given sequence to the end of this one and return a BasedSequence of the result. * Does not copy anything, creates a new based sequence of the original text but one that spans * characters of this sequence and other * * @param other sequence to append to end of this one * @return based sequence that contains the span from start of this sequence and end of other *

* assertion will fail if the other sequence is not a continuation of this one */ @NotNull BasedSequence spliceAtEnd(@NotNull BasedSequence other); /** * start/end offset based containment, not textual * * @param other based sequence from the same base * @return true if other is contained in this */ boolean containsAllOf(@NotNull BasedSequence other); /** * start/end offset based containment, not textual * * @param other based sequence from the same base * @return true if other is contained in this */ boolean containsSomeOf(@NotNull BasedSequence other); /** * Get the prefix part of this from other, start/end offset based containment, not textual * * @param other based sequence from the same base * @return prefix part of this as compared to other, start/end offset based, not content */ @NotNull BasedSequence prefixOf(@NotNull BasedSequence other); /** * Get the suffix part of this from other, start/end offset based containment, not textual * * @param other based sequence from the same base * @return suffix part of this as compared to other, start/end offset based, not content */ @NotNull BasedSequence suffixOf(@NotNull BasedSequence other); /** * start/end offset based intersection, not textual * * @param other based sequence from the same parent * @return sequence which is the intersection of the range of this and other */ @NotNull BasedSequence intersect(@NotNull BasedSequence other); /** * Extend this based sequence to include characters from underlying based sequence * * @param charSet set of characters to include * @param maxCount maximum extra characters to include * @return sequence which */ @NotNull BasedSequence extendByAny(@NotNull CharPredicate charSet, int maxCount); @NotNull BasedSequence extendByAny(@NotNull CharPredicate charSet); @NotNull BasedSequence extendByOneOfAny(@NotNull CharPredicate charSet); /** * Extend this based sequence to include characters from underlying based sequence not in character set * * @param charSet set of characters to include * @param maxCount maximum extra characters to include * @return sequence which */ @NotNull BasedSequence extendByAnyNot(@NotNull CharPredicate charSet, int maxCount); @NotNull BasedSequence extendByAnyNot(@NotNull CharPredicate charSet); @NotNull BasedSequence extendByOneOfAnyNot(@NotNull CharPredicate charSet); @NotNull @Deprecated default BasedSequence extendToAny(@NotNull CharPredicate charSet, int maxCount) { return extendByAnyNot(charSet, maxCount); } @NotNull @Deprecated default BasedSequence extendToAny(@NotNull CharPredicate charSet) { return extendByAnyNot(charSet); } /** * Extend in contained based sequence * * @param eolChars characters to consider as EOL, note {@link #eolStartLength(int)} {@link #eolEndLength(int)} should report length of EOL found if length > 1 * @param includeEol if to include the eol in the string * @return resulting sequence after extension. If already spanning the line then this sequence is returned. * if the last character of this sequence are found in eolChars then no extension will be performed since it already includes the line end */ @NotNull BasedSequence extendToEndOfLine(@NotNull CharPredicate eolChars, boolean includeEol); @NotNull BasedSequence extendToEndOfLine(@NotNull CharPredicate eolChars); @NotNull BasedSequence extendToEndOfLine(boolean includeEol); @NotNull BasedSequence extendToEndOfLine(); /** * Extend in contained based sequence * * @param eolChars characters to consider as EOL, note {@link #eolStartLength(int)} {@link #eolEndLength(int)} should report length of EOL found if length > 1 * @param includeEol if to include the eol in the string * @return resulting sequence after extension. If already spanning the line then this sequence is returned. * if the first character of this sequence are found in eolChars then no extension will be performed since it already includes the line end */ @NotNull BasedSequence extendToStartOfLine(@NotNull CharPredicate eolChars, boolean includeEol); @NotNull BasedSequence extendToStartOfLine(@NotNull CharPredicate eolChars); @NotNull BasedSequence extendToStartOfLine(boolean includeEol); @NotNull BasedSequence extendToStartOfLine(); /** * Extend this based sequence to include characters from underlying based sequence * taking tab expansion to 4th spaces into account * * @param maxColumns maximum columns to include, default {@link Integer#MAX_VALUE} * @return sequence which */ @NotNull BasedSequence prefixWithIndent(int maxColumns); @NotNull BasedSequence prefixWithIndent(); /* These are convenience methods returning coordinates in Base Sequence of this sequence */ @NotNull Pair baseLineColumnAtIndex(int index); @NotNull Range baseLineRangeAtIndex(int index); int baseEndOfLine(int index); int baseEndOfLineAnyEOL(int index); int baseStartOfLine(int index); int baseStartOfLineAnyEOL(int index); int baseColumnAtIndex(int index); @NotNull Pair baseLineColumnAtStart(); @NotNull Pair baseLineColumnAtEnd(); int baseEndOfLine(); int baseEndOfLineAnyEOL(); int baseStartOfLine(); int baseStartOfLineAnyEOL(); @NotNull Range baseLineRangeAtStart(); @NotNull Range baseLineRangeAtEnd(); int baseColumnAtEnd(); int baseColumnAtStart(); class EmptyBasedSequence extends BasedSequenceImpl { public EmptyBasedSequence() { super(0); } @Override public int getOptionFlags() { return 0; } @Override public boolean allOptions(int options) { return false; } @Override public boolean anyOptions(int options) { return false; } @Override public T getOption(DataKeyBase dataKey) { return dataKey.get(null); } @Override public @Nullable DataHolder getOptions() { return null; } @Override public int length() { return 0; } @Override public char charAt(int index) { throw new StringIndexOutOfBoundsException("EMPTY sequence has no characters"); } @Override public int getIndexOffset(int index) { SequenceUtils.validateIndexInclusiveEnd(index, length()); return 0; } @NotNull @Override public BasedSequence subSequence(int i, int i1) { SequenceUtils.validateStartEnd(i, i1, length()); return this; } @NotNull @Override public BasedSequence baseSubSequence(int startIndex, int endIndex) { return subSequence(startIndex, endIndex); } @NotNull @Override public BasedSequence getBaseSequence() { return this; } @NotNull @Override public BasedSequence getBase() { return this; } @Override public int getStartOffset() { return 0; } @Override public int getEndOffset() { return 0; } @NotNull @Override public Range getSourceRange() { return Range.NULL; } } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy