org.htmlparser.lexer.PageIndex Maven / Gradle / Ivy
// HTMLParser Library $Name: v1_5 $ - A java-based parser for HTML
// http://sourceforge.org/projects/htmlparser
// Copyright (C) 2004 Derrick Oswald
//
// Revision Control Information
//
// $Source: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/PageIndex.java,v $
// $Author: derrickoswald $
// $Date: 2005/05/15 11:49:04 $
// $Revision: 1.18 $
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 2.1 of the License, or (at your option) any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License along with this library; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
//
package org.htmlparser.lexer;
import java.io.Serializable;
import org.htmlparser.util.sort.Ordered;
import org.htmlparser.util.sort.Sort;
import org.htmlparser.util.sort.Sortable;
/**
* A sorted array of integers, the positions of the first characters of each line.
* To facilitate processing the first element should be maintained at position 0.
* Facilities to add, remove, search and determine row and column are provided.
* This class provides similar functionality to a Vector but
* does not incur the overhead of an Integer
object per element.
*/
public class PageIndex
implements
Serializable,
Sortable
{
/**
* Starting increment for allocations.
*/
protected static final int mStartIncrement = 100;
/**
* Increment for allocations.
*/
protected int mIncrement;
/**
* The number of valid elements.
*/
protected int mCount;
/**
* The elements.
*/
protected int[] mIndices;
/**
* The page associated with this index.
*/
protected Page mPage;
/**
* Create an empty index.
* @param page The page associated with this index.
*/
public PageIndex (Page page)
{
mPage = page;
mIndices = new int[mIncrement];
mCount = 0;
mIncrement = mStartIncrement * 2;
}
/**
* Create an index with the one element given.
* @param page The page associated with this index.
* @param cursor The single element for the new index.
*/
public PageIndex (Page page, int cursor)
{
this (page);
mIndices[0] = cursor;
mCount = 1;
}
/**
* Create an index with the elements given.
* @param page The page associated with this index.
* @param cursors The initial elements of the index.
* NOTE: The list must be sorted in ascending order.
*/
public PageIndex (Page page, int[] cursors)
{
mPage = page;
mIndices = cursors;
mCount = cursors.length;
}
/**
* Get this index's page.
* @return The page associated with this index.
*/
public Page getPage ()
{
return (mPage);
}
/**
* Get the count of elements.
* @return The number of valid elements.
*/
public int size ()
{
return (mCount);
}
/**
* Get the capacity for elements without reallocation.
* @return The number of spaces for elements.
*/
public int capacity ()
{
return (mIndices.length);
}
/**
* Add an element to the list
* @param cursor The element to add.
* @return The position at which the element was inserted or
* the index of the existing element if it is a duplicate.
*/
public int add (Cursor cursor)
{
int position;
int last;
int ret;
position = cursor.getPosition ();
if (0 == mCount)
{
ret = 0;
insertElementAt (position, ret);
}
else
{
last = mIndices[mCount - 1];
if (position == last)
ret = mCount - 1;
else
if (position > last)
{
ret = mCount;
insertElementAt (position, ret);
}
else
{
// find where it goes
ret = Sort.bsearch (this, cursor);
// insert, but not twice
if (!((ret < size ()) && (position == mIndices[ret])))
insertElementAt (position, ret);
}
}
return (ret);
}
/**
* Add an element to the list
* @param cursor The element to add.
* @return The position at which the element was inserted or
* the index of the existing element if it is a duplicate.
*/
public int add (int cursor)
{
return (add (new Cursor (getPage (), cursor)));
}
/**
* Remove an element from the list
* @param cursor The element to remove.
*/
public void remove (Cursor cursor)
{
int i;
// find it
i = Sort.bsearch (this, cursor);
// remove
if ((i < size ()) && (cursor.getPosition () == mIndices[i]))
removeElementAt (i);
}
/**
* Remove an element from the list
* @param cursor The element to remove.
*/
public void remove (int cursor)
{
remove (new Cursor (getPage (), cursor));
}
/**
* Get an element from the list.
* @param index The index of the element to get.
* @return The element.
*/
public int elementAt (int index)
{
if (index >= mCount) // negative index is handled by array.. below
throw new IndexOutOfBoundsException ("index " + index + " beyond current limit");
else
return (mIndices[index]);
}
/**
* Get the line number for a cursor.
* @param cursor The character offset into the page.
* @return The line number the character is in.
*/
public int row (Cursor cursor)
{
int ret;
ret = Sort.bsearch (this, cursor);
// handle line transition, the search returns the index if it matches
// exactly one of the line end positions, so we advance one line if
// it's equal to the offset at the row index, since that position is
// actually the beginning of the next line
if ((ret < mCount) && (cursor.getPosition () == mIndices[ret]))
ret++;
return (ret);
}
/**
* Get the line number for a position.
* @param cursor The character offset into the page.
* @return The line number the character is in.
*/
public int row (int cursor)
{
return (row (new Cursor (getPage (), cursor)));
}
/**
* Get the column number for a cursor.
* @param cursor The character offset into the page.
* @return The character offset into the line this cursor is on.
*/
public int column (Cursor cursor)
{
int row;
int previous;
row = row (cursor);
if (0 != row)
previous = this.elementAt (row - 1);
else
previous = 0;
return (cursor.getPosition () - previous);
}
/**
* Get the column number for a position.
* @param cursor The character offset into the page.
* @return The character offset into the line this cursor is on.
*/
public int column (int cursor)
{
return (column (new Cursor (getPage (), cursor)));
}
/**
* Get the elements as an array of int.
* @return A new array containing the elements,
* i.e. a snapshot of the index.
*/
public int[] get ()
{
int[] ret = new int[size ()];
System.arraycopy (mIndices, 0, ret, 0, size ());
return (ret);
}
/**
* Binary search for the element.
* @param cursor The element to search for.
* @return The index at which the element was found or is to be inserted.
*/
protected int bsearch (int cursor)
{
return (Sort.bsearch (this, new Cursor (getPage (), cursor)));
}
/**
* Binary search for the element.
* @param cursor The element to search for.
* @param first The index to start at.
* @param last The index to stop at.
* @return The index at which the element was found or is to be inserted.
*/
protected int bsearch (int cursor, int first, int last)
{
return (Sort.bsearch (this, new Cursor (getPage (), cursor), first, last));
}
/**
* Inserts an element into the list.
* The index must be a value greater than or equal to 0 and less than
* or equal to the current size of the array.
* @param cursor The element to insert.
* @param index The index in the list to insert it at.
*/
protected void insertElementAt (int cursor, int index)
{
if ((index >= capacity ()) || (size () == capacity ()))
{ // allocate more space
int[] new_values = new int[Math.max (capacity () + mIncrement, index + 1)];
mIncrement *= 2;
if (index < capacity ())
{
// copy and shift up in two pieces
System.arraycopy (mIndices, 0, new_values, 0, index);
System.arraycopy (mIndices, index, new_values, index + 1, capacity () - index);
}
else
System.arraycopy (mIndices, 0, new_values, 0, capacity ());
mIndices = new_values;
}
else if (index < size ())
// shift up
System.arraycopy (mIndices, index, mIndices, index + 1, capacity () - (index + 1));
mIndices[index] = cursor;
mCount++;
}
/**
* Remove an element from the list.
* @param index The index of the item to remove.
*/
protected void removeElementAt (int index)
{
// shift
System.arraycopy (mIndices, index + 1, mIndices, index, capacity () - (index + 1));
mIndices[capacity() - 1] = 0;
mCount--;
}
//
// Sortable interface
//
/**
* Returns the first index of the Sortable.
* @return The index of the first element.
*/
public int first ()
{
return (0);
}
/**
* Returns the last index of the Sortable.
* @return The index of the last element.
* If this were an array object this would be (object.length - 1).
* For an empty index this will return -1.
*/
public int last ()
{
return (mCount - 1);
}
/**
* Fetch the object at the given index.
* @param index The item number to get.
* @param reuse If this argument is not null, it is an object
* acquired from a previous fetch that is no longer needed and
* may be returned as the result if it makes mores sense to alter
* and return it than to fetch or create a new element. That is, the
* reuse object is garbage and may be used to avoid allocating a new
* object if that would normally be the strategy.
* @return The Ordered object at that index.
*/
public Ordered fetch (int index, Ordered reuse)
{
Cursor ret;
if (null != reuse)
{
ret = (Cursor)reuse;
ret.mPosition = mIndices[index];
ret.mPage = getPage (); // redundant
}
else
ret = new Cursor (getPage (), mIndices[index]);
return (ret);
}
/**
* Swaps the elements at the given indicies.
* @param i One index.
* @param j The other index.
*/
public void swap (int i, int j)
{
int temp = mIndices[i];
mIndices[i] = mIndices[j];
mIndices[j] = temp;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy