fqlite.base.RecoveryTask Maven / Gradle / Ivy

Go to download
package fqlite.base;

import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.BitSet;
import java.util.LinkedList;
import java.util.List;

import fqlite.descriptor.AbstractDescriptor;
import fqlite.descriptor.TableDescriptor;
import fqlite.pattern.SerialTypeMatcher;
import fqlite.types.CarverTypes;
import fqlite.util.Auxiliary;

/**
 * This class represents a recovery task. 
 * Since FQLite supports concurrent search 
 * in databases this class implements the interface Runnable. 
 * 
 * @author pawlaszc
 *
 */
public class RecoveryTask extends Base implements Runnable {

	public int pagesize;
	public long offset;
	public ByteBuffer buffer;
	public BitSet visit;
	private List tables = new LinkedList();
	public int pagenumber;
    private Job job;
	private Auxiliary ct;
    private StringBuffer firstcol = new StringBuffer(); 
    private boolean freeList = false;
    
	/**
	 * Constructor method.
	 * 
	 * @param ct auxiliary object.
	 * @param job the job object.
	 * @param offset the offset
	 * @param pagenumber the page number
	 * @param pagesize the page size
	 * @param freeList if it is a free list
	 * @param tables tables recovered in job
	 * 
	 * @throws IOException if an I/O error occurs.
	 */
	public RecoveryTask(Auxiliary ct, Job job, long offset, int pagenumber, int pagesize, boolean freeList, List tables) throws IOException {
		
		
		if (job.file.size() < offset)
			throw new IOException("offset is out of bounds");
		
		this.job = job;
		this.pagesize = pagesize;
		this.offset = offset;
		this.pagenumber = pagenumber;
		this.ct = ct;
		this.freeList = freeList;
		this.tables = tables;
		this.visit = new BitSet(pagesize);
	}

	/**
	 * This method called to recover regular data records startRegion a database page.
	 * 
	 * @return 0 if successful, -1 otherwise.
	 * @throws IOException io exception
	 */
	public int recover() throws IOException {

		boolean withoutROWID = false;
		
		try {
			
			debug("Offset in recover()::", offset);
			/* read the db page into buffer */
			buffer = job.readPageWithOffset(offset, pagesize);
			if (buffer == null) {
			    return -1;
			}
			byte pageType = buffer.get();

			// offset 0
			buffer.position(0);

			/* check type of the page by reading the first byte */
			int type = Auxiliary.getPageType(pageType);

			/* mark bytes as visited */
			visit.set(0, 2);

			/************** component page was dropped ******************/

			/*
			 * Tricky thing, since a zero page type has normally two possible reasons: 
			 * 
			 * reason 1:
			 * 
			 * It is a dropped page. 
			 * We have to carve for deleted cells but without cell pointers, cause this list
			 * is dropped too or is damaged. 
			 * 
			 * reason 2: 
			 * 
			 * It is an overflow page -> skip it!
			 */
			if (type == 0) {
				
				/* if page was dropped - because of a DROP TABLE command - first 8 Bytes are zero-bytes */
				buffer.position(0);
				Integer checksum = buffer.getInt();
				/* was page dropped ? */
				if (checksum == 0)
				{
					info(" DROPPED PAGE !!!");
					/* no overflow page -> carve for data records - we do our best! ;-)*/
					carve(null);
				}
				/* otherwise it seems to be a overflow page - however, that is not 100% save !!! */
				
				/* we have to leave in any case */
				return 0;
			}

			/************** skip unkown page types ******************/

			// no leaf page -> skip this page
			if (type < 0) {
				info("No Data page. ", pagenumber);
				return -1;
			} else if (type == 12) {
				info("Internal Table page ", pagenumber);
				return -1;
			} else if (type == 10) {
				info("Index leaf page ", pagenumber);	
				// note: WITHOUT ROWID tables are saved here.
				withoutROWID=true;
			} else {
				info("Data page ", pagenumber, " Offset: ", offset);

			}

			/************** regular leaf page with data ******************/

			//boolean freeblocks = false;
			if (type == 8)
			{
				// offset 1-2 let us find the first free block offset for carving
				byte fboffset[] = new byte[2];
				buffer.position(1);
				buffer.get(fboffset);
				//ByteBuffer fboff = ByteBuffer.wrap(fboffset);
				//int ffb = Auxiliary.TwoByteBuffertoInt(fboff);

				// Note: The two-byte integer at offset 1 of the page gives the start of the first freeblock 
				// on the page, or is zero if there are no freeblocks.
				//if (ffb > 0)
				//	freeblocks = true;  
				// A freeblock marks an area between 2 normal cells (that was removed for example)
				// every byte before the cell content region (on offset 5) is no part of the freeblock!!! 
			}	
				
			int ccrstart = job.ps;

			// found Data-Page - determine number of cell pointers at offset 3-4 of this page
			byte cpn[] = new byte[2];
			buffer.position(3);
			buffer.get(cpn);
			
			// get start pointer for the cell content region
			byte ccr[] = new byte[2];
			buffer.position(5);
			buffer.get(ccr);
			
			ByteBuffer contentregionstart = ByteBuffer.wrap(ccr);
			ccrstart = Auxiliary.TwoByteBuffertoInt(contentregionstart);
			
			/* mark as visited */
			visit.set(2, 8);

			ByteBuffer size = ByteBuffer.wrap(cpn);
			int cp = Auxiliary.TwoByteBuffertoInt(size);

			debug(" number of cells: ", cp, " type of page ",  type);
			job.numberofcells.addAndGet(cp);
			if (0 == cp)
				debug(" Page seems to be dropped. No cell entries.");

			int headerend = 8 + (cp * 2);
			visit.set(0, headerend);
			//System.out.println("headerend:" + headerend);

			/***************************************************************
			 * STEP 2:
			 * 
			 * Cell pointer array scan (if possible)
			 * 
			 ***************************************************************/
			int last = 0;

			/* go on with the cell pointer array */
			for (int i = 0; i < cp; i++) {

				// address of the next cell pointer
				byte pointer[] = new byte[2];
				if (type == 5)
					buffer.position(12+2*i);
				else
					buffer.position(8+2*i);
				buffer.get(pointer);
				ByteBuffer celladdr = ByteBuffer.wrap(pointer);
				int celloff = Auxiliary.TwoByteBuffertoInt(celladdr);

				if (last > 0) {
					if (celloff == last) {
						continue;
					}
				}
				last = celloff;
					
				SqliteInternalRow row = null;
				
				if (celloff < buffer.limit() - 20) {
				    row = ct.readRecord(celloff, buffer, pagenumber, visit, type, Integer.MAX_VALUE, firstcol,withoutROWID,-1);
				}
								
				// add new line to output
				if (null != row) {
					
					int p;
					String tableName = row.getTableName();
					if ((p = tableName.indexOf("_node;")) > 0)
					{
						String tbln = tableName.substring(0, p);
						
						if (job.virtualTables.containsKey(tbln))
						{
							TableDescriptor tds = job.virtualTables.get(tbln);
							
							/* we use the xxx_node shadow component to construct the 
							 * virtual component
							 */
							//String BLOB = tableName.substring(p);
							//System.out.println(BLOB);
							
							/* skip the first information -> go directly to the 5th element
							 * of the data record line, i.e. go to the BLOB with the row data
							 */
							//int pp = Auxiliary.findNthOccur(rc, ';', 4);
							//String data = rc.substring(pp+1);

							String data = row.getRowData().get(1).toString();
						
							/* transform String data into an byte array */
							byte[] binary = Auxiliary.decode(data);
							ByteBuffer bf = ByteBuffer.wrap(binary);
                            
							/* skip the first to bytes */
							bf.getShort();
							/* first get the total number of entries for this rtree branch */
                            int entries = bf.getShort();
					
                            /* create a new line for every data row */ 
                            while(entries>0)
                            {
                                SqliteInternalRow vrow = new SqliteInternalRow();
                                vrow.setTableName(tbln);
                                vrow.setRecordType("VT");
                                vrow.setOffset(0);
    					
                            	// The first column is always a 64-bit signed integer primary key.
                            	long primarykey = bf.getLong();
                                vrow.append(new SqliteElementData(primarykey, job.db_encoding));
                            	
                            	//Each R*Tree indices is a virtual component with an odd number of columns between 3 and 11
                            	//The other columns are pairs, one pair per dimension, containing the minimum and maximum values for that dimension, respectively.
                            	int number = tds.columnnames.size() - 1;
                            	
                            	while (number > 0)
                            	{	
	                            	float rv = bf.getFloat();
	                                vrow.append(new SqliteElementData(rv, job.db_encoding));
	                            	number--;
                            	}


	                            //vrow.append("\n");
                                //job.ll.add(vrow.toString());
                                job.addRow(vrow);

    							info(vrow.toString());

    							entries--;

                            }	


						}

					}
					
					/* if record resides inside a free page -> add a flag char to document this */
					if(freeList)
					{    
					   row.setRecordType(Global.FREELIST_ENTRY + row.getRecordType());
					   
					}
					job.addRow(row);
				}

			} // end of for - cell pointer

			
			/***************************************************************
			 * STEP 3:
			 * 
			 * Scan unallocated space between header and  the cell
			 * content region 
			 * 
			 ***************************************************************/
			
			/* before we go to the free blocks an gaps let us first check the area between the header and 
			   the start byte of the cell content region */
			
			buffer.position(headerend);
			
			/* 	Although we have already reached the official end of the cell pointer array, 
			 *  there may be more pointers startRegion deleted records. They do not belong to the
			 *  official content region. We have to skip them, before we can search for more 
			 *  artifacts in the unallocated space. 
			 */
			
			byte garbage[] = new byte[2];
			
			int garbageoffset = -1;
			do
			{
				
				buffer.get(garbage);
				ByteBuffer ignore = ByteBuffer.wrap(garbage);
				garbageoffset = Auxiliary.TwoByteBuffertoInt(ignore);
				//System.out.println("garbage bytes " + buffer.position());
			} while (buffer.position() < pagesize && garbageoffset > 0);
			
			
			/*  Now, skip all zeros - no information to recover just empty space */
			byte zerob = 0;
			while(buffer.position() < pagesize && zerob == 0)
			{
				zerob = buffer.get();
			}
			
			/* mark the region startRegion the end of page header till end of zero space as visited */
			visit.set(headerend,buffer.position());
			
			/* go back one byte */
			buffer.position(buffer.position()-1);
		
			//System.out.println("First none-zero Byte " + zerob);
			
			//System.out.println("Cell Content Region start offset : " + ccrstart);
			//System.out.println("First none zero byte in unallocated space : " + buffer.position());
			
			/* only if there is a significant number of bytes in the unallocated area, evaluate it more closely. */
			if (ccrstart - buffer.position() > 3)
			{
				/* try to read record as usual */
				SqliteInternalRow row;
				
				/* Tricky thing : data record could be partly overwritten with a new data record!!!  */
				/* We should read until the end of the unallocated area and not above! */
				row = ct.readRecord(buffer.position(), buffer, pagenumber, visit, type, ccrstart - buffer.position(),firstcol,withoutROWID,-1);
				
				// add new line to output
				if (null != row) { // && rc.length() > 0) {
					
					//int idx = rc.indexOf(";");
					//rc = rc.substring(0, idx) + ";" + Global.DELETED_RECORD_IN_PAGE  + rc.substring(idx+1);
					row.setRecordType(Global.DELETED_RECORD_IN_PAGE + row.getRecordType());
					
					//if (job.doublicates.add(rc.hashCode()))
					//job.ll.add(rc);
					job.addRow(row);
				}
				
			}
			
			
			/***************************************************************
			 * STEP 4:
			 * 
			 * if there are still gaps, go for it and carve it  
			 * 
			 ***************************************************************/
			
			/* now we are ready to carve the rest of the page */
			carve(null);
			
		} catch (IOException err) {
			throw err;
		}

		return 0;
	}

	/**
	 * Quick lookup. Does a given hex-String starts with Zeros?
	 * @param s the String to check
	 * @return true, if zero bytes could be found
	 */
	static boolean allCharactersZero(String s) {
		if (!s.startsWith("0000"))
			return false;

		int n = s.length();
		for (int i = 1; i < n; i++)
			if (s.charAt(i) != s.charAt(0))
				return false;

		return true;
	}

	/**
	 * Check the BitSet for gaps, i.e. regions we still have to carve.
	 * 
	 * @return the gaps found
	 */
	public LinkedList findGaps() {
		LinkedList  gaps = new LinkedList();

		int from = 0;

		/* are there any regions left in the page ? */
		for (int i = 0; i < pagesize; i++) {

			if (!visit.get(i)) {
				from = i;

				int to = i;

				while (!visit.get(++i) && i < (pagesize - 1)) {
					to++;
				}

				if (to - from >= 4) {

					/* check for zero bytes */
					boolean isNull = false;
					if (buffer.get(from) == 0) {
						isNull = true;
						for (int index = from; index < to; index++) {
							if (0 != buffer.get(index))
								isNull = false;
						}
					}
					// skip NULL-Byte areas - mark as visited
					if (isNull)
						visit.set(from, to);
					else {
						Gap g = new Gap(from, to);
						if (!gaps.contains(g))
						debug("ohne match : ", (job.ps * (pagenumber - 1) + from), " - ",
								(job.ps * (pagenumber - 1) + to), " Bytes");
						gaps.add(g);
					}
				}
				from = i;

			}

		} // end of finding gaps in BitSet
	
		
		return gaps;
	}

	/**
	 * This method is called to carve a data page for records.
	 * 
	 * @param crv the carver
	 */
	public void carve(Carver crv) {

		Carver c = crv;
		
		if (null == c)
			/* no type could be found in the first byte */
			/* Maybe the whole page was drop because of a drop component command ? */
			/* start carving on the complete page */
			c = new Carver(job, buffer, visit, pagenumber);

		//Matcher mat = null;
		// boolean match = false;

		/* try to get component schema for the current page, if possible */
		TableDescriptor tdesc = null;
		if (job.pages.length > pagenumber)
		{
			AbstractDescriptor ad = job.pages[pagenumber]; 
			if (ad instanceof TableDescriptor)
					tdesc = (TableDescriptor)ad;
		}
			
		List tab;
		debug(" tables :: ", tables.size());

		if (null != tdesc) {
			/* there is a schema for this page */
			tab = new LinkedList();
			tab.add(tdesc);
			debug(" added tdsec ");
		} else {
			warning(" No component description!");
			tab = tables;
		}
		
		List gaps = findGaps();

		info("gaps.size()", gaps.size());
		if (gaps.size() == 0)
		{
			debug("no gaps anymore. Stopp search");
			return;
		}	
		
		/* try out all component schema(s) */
		for (int n = 0; n < tab.size(); n++) {
			tdesc = tab.get(n);
			debug("pagenumber :: ", pagenumber, " component size :: ", tab.size());
			debug("n " + n);
			//TableDescriptor tdb = tab.get(n);
		
			/* access pattern for a particular component */
			String tablename = tab.get(n).tblname;
			if (tablename.startsWith("__UNASSIGNED"))
				continue;
			/* create matcher object for constrain check */
			SerialTypeMatcher stm = new SerialTypeMatcher(buffer);

			gaps = findGaps();
			
			for (int a = 0; a < gaps.size(); a++) {
			
				Gap next = gaps.get(a);

				
				if (next.to - next.from > 5)
					/* do we have at least one match ? */
					if (c.carve(next.from+4,next.to, stm, CarverTypes.NORMAL, tab.get(n),firstcol) != Global.CARVING_ERROR) {
						debug("*****************************  STEP NORMAL finished with matches");
						
					}
			}
				
			gaps = findGaps();
			
			for (int a = 0; a < gaps.size(); a++) {
				
				Gap next = gaps.get(a);
				
 				if (c.carve(next.from+4,next.to, stm, CarverTypes.COLUMNSONLY, tab.get(n),firstcol) != Global.CARVING_ERROR) {
					debug("*****************************  STEP COLUMNSONLY finished with matches");
					
				}
			}
			
			gaps = findGaps();
			
			
			for (int a = 0; a < gaps.size(); a++) {
				
				Gap next = gaps.get(a);
				
				
				if (c.carve(next.from+4,next.to, stm, CarverTypes.FIRSTCOLUMNMISSING, tab.get(n),firstcol) != Global.CARVING_ERROR) {
					debug("*****************************  STEP FIRSTCOLUMNMISSING finished with matches");
					
				}
			
			}
			
			
		
			/**
			 * When a record deletion occurs, the first 2 bytes of the cell are set to the
			 * offset value of next free block and latter 2 bytes covers the length of the
			 * current free block. Because of this, the first 4 bytes of a deleted cell
			 * differ startRegion the normal data. Accordingly, we need a different approach to
			 * recover the data records.
			 * 
			 * In most cases, at least the header length information is overwritten. Boyond
			 * this, sometimes, also the first column type field is overwritten too.
			 * 
			 * We have to cases:
			 * 
			 * (1) only the first column of the header is missing, but the rest of the
			 * header is intact.
			 * 
			 * (2) both header length field plus first column are overwritten.
			 * 
			 * [cell size | rowid | header size | header bytes | payload ]
			 * 
			 * for a deleted cell is looks maybe like this
			 * 
			 * [offset of next free block | length of the current free block | ]
			 */

			/* There are still gaps? */
			gaps = findGaps();
			
			for (int a = 0; a < gaps.size(); a++) {
				
				
				Gap next = gaps.get(a);
			
				/* one last try with 4+1 instead of 4 Bytes */
				c.carve(next.from+4+1,next.to, stm, CarverTypes.FIRSTCOLUMNMISSING, tab.get(n),firstcol); 
				
			}
			

		} // end of tables ( component fingerprint )

	}

	@Override
	public void run() {
		try {
			runSingleThread();
		} catch (Exception e) {
			err("Error in RecoveryTask: ", e);
		}
	}

	public void runSingleThread() throws IOException {
		
		try
		{
			recover();
			/* if task has finished, decrement this counter to inform the main-thread */
			//System.out.println("task finished" );
		}
		finally
		{
			//System.out.println("task finally" );
			job.runningTasks.decrementAndGet();
		}
		
		
	}
}