src.java.net.htmlparser.jericho.ParseText Maven / Gradle / Ivy
// Jericho HTML Parser - Java based library for analysing and manipulating HTML
// Version 3.1
// Copyright (C) 2004-2009 Martin Jericho
// http://jericho.htmlparser.net/
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of either one of the following licences:
//
// 1. The Eclipse Public License (EPL) version 1.0,
// included in this distribution in the file licence-epl-1.0.html
// or available at http://www.eclipse.org/legal/epl-v10.html
//
// 2. The GNU Lesser General Public License (LGPL) version 2.1 or later,
// included in this distribution in the file licence-lgpl-2.1.txt
// or available at http://www.gnu.org/licenses/lgpl.txt
//
// This library is distributed on an "AS IS" basis,
// WITHOUT WARRANTY OF ANY KIND, either express or implied.
// See the individual licence texts for more details.
package net.htmlparser.jericho;
/**
* Represents the text from the {@linkplain Source source} document that is to be parsed.
*
* This interface is normally only of interest to users who wish to create custom tag types.
*
* The parse text is defined as the entire text of the source document in lower case, with all
* {@linkplain Segment#ignoreWhenParsing() ignored} segments replaced by space characters.
*
* The text is stored in lower case to make case insensitive parsing as efficient as possible.
*
* This interface provides many methods which are also provided by the java.lang.String
class,
* but adds an extra parameter called breakAtIndex
to the various indexOf
methods.
* This parameter allows a search on only a specified segment of the text, which is not possible using the normal String
class.
*
* ParseText
instances are obtained using the {@link Source#getParseText()} method.
*/
public interface ParseText extends CharSequence {
/** A value to use as the breakAtIndex
argument in certain methods to indicate that the search should continue to the start or end of the parse text. */
public static final int NO_BREAK=-1;
/**
* Returns the character at the specified index.
* @param index the index of the character.
* @return the character at the specified index, which is always in lower case.
*/
public char charAt(int index);
/**
* Indicates whether this parse text contains the specified string at the specified position.
*
* This method is analogous to the java.lang.String.startsWith(String prefix, int toffset)
method.
*
* @param str a string.
* @param pos the position (index) in this parse text at which to check for the specified string.
* @return true
if this parse text contains the specified string at the specified position, otherwise false
.
*/
public boolean containsAt(String str, int pos);
/**
* Returns the index within this parse text of the first occurrence of the specified character,
* starting the search at the position specified by fromIndex
.
*
* If the specified character is not found then -1 is returned.
*
* @param searchChar a character.
* @param fromIndex the index to start the search from.
* @return the index within this parse text of the first occurrence of the specified character within the specified range, or -1 if the character is not found.
*/
public int indexOf(char searchChar, int fromIndex);
/**
* Returns the index within this parse text of the first occurrence of the specified character,
* starting the search at the position specified by fromIndex
,
* and breaking the search at the index specified by breakAtIndex
.
*
* The position specified by breakAtIndex
is not included in the search.
*
* If the search is to continue to the end of the text,
* the value {@link #NO_BREAK ParseText.NO_BREAK} should be specified as the breakAtIndex
.
*
* If the specified character is not found then -1 is returned.
*
* @param searchChar a character.
* @param fromIndex the index to start the search from.
* @param breakAtIndex the index at which to break off the search, or {@link #NO_BREAK} if the search is to continue to the end of the text.
* @return the index within this parse text of the first occurrence of the specified character within the specified range, or -1 if the character is not found.
*/
public int indexOf(char searchChar, int fromIndex, int breakAtIndex);
/**
* Returns the index within this parse text of the first occurrence of the specified string,
* starting the search at the position specified by fromIndex
.
*
* If the specified string is not found then -1 is returned.
*
* @param searchString a string.
* @param fromIndex the index to start the search from.
* @return the index within this parse text of the first occurrence of the specified string within the specified range, or -1 if the string is not found.
*/
public int indexOf(String searchString, int fromIndex);
/**
* Returns the index within this parse text of the first occurrence of the specified string,
* starting the search at the position specified by fromIndex
,
* and breaking the search at the index specified by breakAtIndex
.
*
* The position specified by breakAtIndex
is not included in the search.
*
* If the search is to continue to the end of the text,
* the value {@link #NO_BREAK ParseText.NO_BREAK} should be specified as the breakAtIndex
.
*
* If the specified string is not found then -1 is returned.
*
* @param searchString a string.
* @param fromIndex the index to start the search from.
* @param breakAtIndex the index at which to break off the search, or {@link #NO_BREAK} if the search is to continue to the end of the text.
* @return the index within this parse text of the first occurrence of the specified string within the specified range, or -1 if the string is not found.
*/
public int indexOf(String searchString, int fromIndex, int breakAtIndex);
/**
* Returns the index within this parse text of the last occurrence of the specified character,
* searching backwards starting at the position specified by fromIndex
.
*
* If the specified character is not found then -1 is returned.
*
* @param searchChar a character.
* @param fromIndex the index to start the search from.
* @return the index within this parse text of the last occurrence of the specified character within the specified range, or -1 if the character is not found.
*/
public int lastIndexOf(char searchChar, int fromIndex);
/**
* Returns the index within this parse text of the last occurrence of the specified character,
* searching backwards starting at the position specified by fromIndex
,
* and breaking the search at the index specified by breakAtIndex
.
*
* The position specified by breakAtIndex
is not included in the search.
*
* If the search is to continue to the start of the text,
* the value {@link #NO_BREAK ParseText.NO_BREAK} should be specified as the breakAtIndex
.
*
* If the specified character is not found then -1 is returned.
*
* @param searchChar a character.
* @param fromIndex the index to start the search from.
* @param breakAtIndex the index at which to break off the search, or {@link #NO_BREAK} if the search is to continue to the start of the text.
* @return the index within this parse text of the last occurrence of the specified character within the specified range, or -1 if the character is not found.
*/
public int lastIndexOf(char searchChar, int fromIndex, int breakAtIndex);
/**
* Returns the index within this parse text of the last occurrence of the specified string,
* searching backwards starting at the position specified by fromIndex
.
*
* If the specified string is not found then -1 is returned.
*
* @param searchString a string.
* @param fromIndex the index to start the search from.
* @return the index within this parse text of the last occurrence of the specified string within the specified range, or -1 if the string is not found.
*/
public int lastIndexOf(String searchString, int fromIndex);
/**
* Returns the index within this parse text of the last occurrence of the specified string,
* searching backwards starting at the position specified by fromIndex
,
* and breaking the search at the index specified by breakAtIndex
.
*
* The position specified by breakAtIndex
is not included in the search.
*
* If the search is to continue to the start of the text,
* the value {@link #NO_BREAK ParseText.NO_BREAK} should be specified as the breakAtIndex
.
*
* If the specified string is not found then -1 is returned.
*
* @param searchString a string.
* @param fromIndex the index to start the search from.
* @param breakAtIndex the index at which to break off the search, or {@link #NO_BREAK} if the search is to continue to the start of the text.
* @return the index within this parse text of the last occurrence of the specified string within the specified range, or -1 if the string is not found.
*/
public int lastIndexOf(String searchString, int fromIndex, int breakAtIndex);
/**
* Returns the length of the parse text.
* @return the length of the parse text.
*/
public int length();
/**
* Returns a new character sequence that is a subsequence of this sequence.
*
* @param begin the begin position, inclusive.
* @param end the end position, exclusive.
* @return a new character sequence that is a subsequence of this sequence.
*/
public CharSequence subSequence(int begin, int end);
/**
* Returns the content of the parse text as a String
.
* @return the content of the parse text as a String
.
*/
public String toString();
}