All Downloads are FREE. Search and download functionalities are using the official Maven repository.

net.htmlparser.jericho.CharSequenceParseText Maven / Gradle / Ivy

Go to download

Jericho HTML Parser is a java library allowing analysis and manipulation of parts of an HTML document, including server-side tags, while reproducing verbatim any unrecognised or invalid HTML.

The newest version!
// Jericho HTML Parser - Java based library for analysing and manipulating HTML
// Version 3.4
// Copyright (C) 2004-2013 Martin Jericho
// http://jericho.htmlparser.net/
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of either one of the following licences:
//
// 1. The Eclipse Public License (EPL) version 1.0,
// included in this distribution in the file licence-epl-1.0.html
// or available at http://www.eclipse.org/legal/epl-v10.html
//
// 2. The GNU Lesser General Public License (LGPL) version 2.1 or later,
// included in this distribution in the file licence-lgpl-2.1.txt
// or available at http://www.gnu.org/licenses/lgpl.txt
//
// 3. The Apache License version 2.0,
// included in this distribution in the file licence-apache-2.0.html
// or available at http://www.apache.org/licenses/LICENSE-2.0.html
//
// This library is distributed on an "AS IS" basis,
// WITHOUT WARRANTY OF ANY KIND, either express or implied.
// See the individual licence texts for more details.

package net.htmlparser.jericho;

class CharSequenceParseText implements ParseText {
	private final CharSequence charSequence;

	CharSequenceParseText(final CharSequence charSequence) {
		this.charSequence=charSequence;
	}

	public final char charAt(final int index) {
		final char ch=internalCharAt(index);
		if (ch==StreamedText.END_OF_STREAM && atEndOfStream()) throw new IndexOutOfBoundsException(); // checking ch==StreamedText.END_OF_STREAM first is superfluous but is much faster than calling atEndOfStream() in the normal case.
		return ch;
	}

	private char internalCharAt(final int index) {
		final char ch=charSequence.charAt(index);
		return (ch>='A' && ch<='Z') ? ((char)(ch ^ 0x20)) : ch;
	}

	public final boolean containsAt(final String str, final int pos) {
		for (int i=0; igetEnd() ? getEnd() : breakAtIndex);
		for (int i=(fromIndex<0 ? 0 : fromIndex); ilastPossibleBreakAtIndex) ? lastPossibleBreakAtIndex : breakAtIndex;
		outerLoop: for (int i=(fromIndex<0 ? 0 : fromIndex); igetEnd() ? getEnd() : fromIndex); i>breakAtIndex; i--)
			if (internalCharAt(i)==searchChar) return i; // no need to check for end of stream because we're searching backwards
		return -1;
	}

	public final int lastIndexOf(final String searchString, final int fromIndex) {
		return lastIndexOf(searchString,fromIndex,NO_BREAK);
	}

	public final int lastIndexOf(final String searchString, int fromIndex, final int breakAtIndex) {
		if (searchString.length()==1) return lastIndexOf(searchString.charAt(0),fromIndex,breakAtIndex);
		if (searchString.length()==0) return fromIndex;
		final int rightIndex=getEnd()-searchString.length();
		if (breakAtIndex>rightIndex) return -1;
		if (fromIndex>rightIndex) fromIndex=rightIndex;
		final int lastCharIndex=searchString.length()-1;
		final char lastChar=searchString.charAt(lastCharIndex);
		final int actualBreakAtPos=breakAtIndex+lastCharIndex;
		outerLoop: for (int i=fromIndex+lastCharIndex; i>actualBreakAtPos; i--) {
			if (internalCharAt(i)==lastChar) { // no need to check for end of stream because we're searching backwards
				final int startIndex=i-lastCharIndex;
				for (int j=lastCharIndex-1; j>=0; j--)
					if (searchString.charAt(j)!=internalCharAt(j+startIndex)) continue outerLoop;
				return startIndex;
			}
		}
		return -1;
	}

	public final int length() {
		return charSequence.length();
	}

	public final CharSequence subSequence(final int begin, final int end) {
		// doesn't have to be efficient because it is not actually used anywhere internally.
		return substring(begin,end);
	}

	public final String toString() {
		return charSequence.toString();
	}

	protected int getEnd() {
		return charSequence.length();
	}

	protected String substring(final int begin, final int end) {
		return charSequence.subSequence(begin,end).toString().toLowerCase();
	}
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy