com.olapdb.obase.data.index.Indexer Maven / Gradle / Ivy
The newest version!
package com.olapdb.obase.data.index;
import com.olapdb.obase.data.Bytez;
import com.olapdb.obase.data.Entity;
import com.olapdb.obase.utils.Obase;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.util.Bytes;
import java.util.*;
import java.util.stream.Stream;
public class Indexer {
private Table table;
private List searchTags = new Vector();
private List searchScopes = new Vector();
public Indexer(Class entityClass){
this.table = Obase.getTable(entityClass);
}
public Indexer(Entity entity){
this(entity.getClass());
}
public static void destroyTableIndex(Table table){
if(table == null)
return;
byte[] startRow = null;
while(true){
List idcs = Idc.getTableIdcs(table, startRow, 1000);
if(idcs.isEmpty())
return;
Lunnar lunnar = new Lunnar();
for(final Idc idc:idcs){
lunnar.submit(new Runnable(){
@Override
public void run() {
try {
idc.deleteIndexes();
idc.deleteIdxes();
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
idc.delete();
}
});
startRow = idc.getRow();
idc.delete();
}
lunnar.waitForComplete();
}
}
public void destroyColumnIndex(String column) throws Exception{
if(table == null)
return;
Idc idc = Idc.getInstance(table, column);
if(idc.needConnect())
return;
idc.deleteIndexes();
idc.deleteIdxes();
idc.delete();
}
public static long getRowCount(Table table){
return tableRowCounter(table).getReference();
}
public static void setRowCount(Table table, long count){
Idx idx = tableRowCounter(table);
idx.setReference(count);
if(idx.needConnect()){
idx.connect();
}
}
public static long incRowCount(Table table, int num){
synchronized(table){
Idx idx = tableRowCounter(table);
idx.incReference(num);
if(idx.needConnect()){
idx.connect();
}
return idx.getReference();
}
}
public static long decRowCount(Table table){
synchronized(table){
Idx idx = tableRowCounter(table);
idx.decReference();
if(idx.needConnect()){
idx.connect();
}
return idx.getReference();
}
}
public static void updateTableWordCount(Table table) throws Exception{
Idc idc = Idc.getInstance(table, "word");
if(idc.needConnect())
return;
Scan scan = new Scan(Bytez.from(idc.getId()), Bytez.from(idc.getId()+1));
ResultScanner rs = Obase.getIndexTable().getScanner(scan);
int finish = 0;
for(Result r: rs){
byte[] irow = r.getRow();
Index index = new Index(r);
int disp = 4+2; //idc 4bytes + string.len 2bytes
disp += Bytez.toShort(irow, 4);
byte[] row = Bytez.copy(irow, disp, irow.length-disp);
int oldtotal = Bytez.toInt(index.getData(), 4);
int count = Bytez.toInt(index.getData());
String info = count+"/" + oldtotal + "--->";
//wordCount Result
Get get = new Get(row);
get.addColumn(Bytez.from(Obase.FAMILY_ATTR), Bytez.from("wordCount"));
Result wcr = table.get(get);
Cell c = wcr.getColumnLatestCell(Bytez.from(Obase.FAMILY_ATTR), Bytez.from("wordCount"));
if(c != null){
int total = Bytez.toInt( CellUtil.cloneValue(c));
if(total != oldtotal){
index.setData(Bytez.add(Bytez.from(count), Bytez.from(total)));
}
info += "--->"+count+"/"+total;
}
finish++;
if(finish%100 == 0){
System.out.println(finish + "\t" + info);
}
}
}
public static void updateWordCountFast(Table table) throws Exception{
Idc idc = Idc.getInstance(table, "word");
if(idc.needConnect())
return;
Map cache = new TreeMap(Bytes.BYTES_COMPARATOR);
Scan scan = new Scan(Bytez.from(idc.getId()), Bytez.from(idc.getId()+1));
ResultScanner rs = Obase.getIndexTable().getScanner(scan);
int finish = 0;
for(Result r: rs){
byte[] irow = r.getRow();
Index index = new Index(r);
int disp = 4+2; //idc 4bytes + string.len 2bytes
disp += Bytez.toShort(irow, 4);
byte[] row = Bytez.copy(irow, disp, irow.length-disp);
int oldtotal = Bytez.toInt(index.getData(), 4);
int count = Bytez.toInt(index.getData());
String info = count+"/" + oldtotal + "--->";
//wordCount Result
if(!cache.containsKey(row)){
Get get = new Get(row);
get.addColumn(Bytez.from(Obase.FAMILY_ATTR), Bytez.from("wordCount"));
Result wcr = table.get(get);
Cell c = wcr.getColumnLatestCell(Bytez.from(Obase.FAMILY_ATTR), Bytez.from("wordCount"));
if(c != null){
int total = Bytez.toInt( CellUtil.cloneValue(c));
cache.put(row, total);
}
}
int total = cache.getOrDefault(row, 1);
if(total != oldtotal){
index.setData(Bytez.add(Bytez.from(count), Bytez.from(total)));
}
info += "--->"+count+"/"+total;
finish++;
if(finish%100 == 0){
System.out.println(finish + "\t" + info);
}
}
}
private static Idx tableRowCounter(Table table){
return new Idx(Bytez.add(Bytez.from((int)0), table.getName().toBytes()));
}
public int getIndexCount(byte[] row, String column, Value value) throws Exception{
if(!checkIndex(row, column, value))
return 0;
Idc idc = Idc.getInstance(table, column);
if(idc.needConnect())
return 0;
Index index = new Index(idc, value, row);
if(index.needConnect())
return 0;
return Bytez.toInt(index.getData());
}
public byte[] getIndexData(byte[] row, String column, Value value) throws Exception{
if(!checkIndex(row, column, value))
return null;
Idc idc = Idc.getInstance(table, column);
if(idc.needConnect())
return null;
Index index = new Index(idc, value, row);
if(index.needConnect())
return null;
return index.getData();
}
public void setIndexData(byte[] row, String column, Value value, byte[] data){
if(!checkIndex(row, column, value))
return;
Idc idc = Idc.getInstance(table, column);
if(idc.needConnect())
return;
Index index = new Index(idc, value, row);
index.setData(data);
index.connect();
}
public void replaceIndex(byte[] row, String column, Value value, Value value2){
replaceIndex(row, column, value, value2, null);
}
public void replaceIndex(byte[] row, String column, Value value, Value value2, byte[] data){
if(value == value2)return;
if(value!=null&&value2!=null&&value.equal(value2))
return;
try{
this.removeIndex(row, column, value);
this.addIndex(row, column, value2, data);
this.submit();
}catch(Exception e){
e.printStackTrace();
}
}
public Indexer addTag(String column, Value value){
if(value == null || value.equals(""))
return this;
Idc idc = Idc.getInstance(table, column);
try {
searchTags.add(new Idx(idc, value));
} catch (Exception e) {
e.printStackTrace();
}
return this;
}
public Indexer addScope(String column, Value start, Value stop){
if(start == null || stop==null)
return this;
Idc idc = Idc.getInstance(table, column);
try {
searchScopes.add(new Scope(idc, start, stop));
} catch (Exception e) {
e.printStackTrace();
}
return this;
}
public Indexer addIndex(byte[] row, String column, Value value){
return addIndex(row, column, value, null);
}
public Indexer adjustIndex(byte[] row, String column, Value value, int count){
// if(!checkIndex(row, column, value))
// return this;
try{
Idc idc= Idc.getInstance(table, column);
if(idc.needConnect()){
idc.connect();
}
Idx idx = new Idx(idc, value);
Index index = new Index(idc, value, row);
index.setData( Bytez.from(Bytez.toInt(index.getData()) + count));
if(index.needConnect()){
index.connect();
idx.incReference(1);
}
if(Bytez.toInt(index.getData()) == 0){
index.delete();
idx.decReference();
}
}catch(Exception e){
e.printStackTrace();
}
return this;
}
public static Stream find(Class entityClass, String column, Value value){
Idc idc= Idc.getInstance(Obase.getTable(entityClass), column);
Idx idx = new Idx(idc, value);
return idx.stream();
}
public static Stream find(Class entityClass, String column, Value start, Value stop){
Idc idc= Idc.getInstance(Obase.getTable(entityClass), column);
Scan scan = new Scan().withStartRow(new Idx(idc, start).getRow()).withStopRow(new Idx(idc, stop).getRow());
// Bytes.copy(e.getRow(),irow.length,e.getRow().length-irow.length),
byte[] irow = new Idx(idc, start).getRow();
// return Index.stream(scan).map(e->new IndexData(e.getRow(), e.getData()));
return Index.stream(scan).map(e->new IndexData(
Bytes.copy(e.getRow(),irow.length,e.getRow().length-irow.length),
e.getData()));
}
public Indexer addIndex(byte[] row, String column, Value value, byte[] data){
if(!checkIndex(row, column, value))
return this;
Idc idc= Idc.getInstance(table, column);
if(idc.needConnect()){
idc.connect();
}
Index index = new Index(idc, value, row);
index.setData(data);
if(index.needConnect()){
index.connect();
try{
Idx idx = new Idx(idc, value);
idx.connect();
}catch(Exception e){
e.printStackTrace();
}
}
return this;
}
public Indexer removeIndex(byte[] row, String column, Value value){
if(!checkIndex(row, column, value))
return this;
Idc idc= Idc.getInstance(table, column);
if(!idc.needConnect()){
Index index = new Index(idc, value, row);
if(!index.needConnect()){
index.delete();
}
}
return this;
}
public List list(){
submit();
Map elites = new TreeMap(Bytes.BYTES_COMPARATOR);
if(searchTags.isEmpty() && searchScopes.isEmpty())
return new Vector();
for(Scope scope : searchScopes){
if(scope.idc.needConnect() ||scope.start ==null||scope.stop==null||scope.start.getType() != scope.stop.getType()){
return new Vector();
}
}
for(Idx idx : searchTags){
if(idx.needConnect()){
return new Vector();
}
}
Collections.sort(searchTags);
long start = System.currentTimeMillis();
List finds = null;
for(Idx idx : searchTags){
if(finds == null){
finds = idx.getIndexRows();
}else{
finds = idx.filterIndexRows(finds);
}
if(Obase.DebugIndex)
System.out.println("time = " + (System.currentTimeMillis()-start));
if(finds == null || finds.isEmpty())
return new Vector();
for(Find find : finds){
if(!elites.containsKey(find.getRow())){
elites.put(find.getRow(), new Elite(find.getRow()));
}
elites.get(find.getRow()).addMatch(find);
}
if(Obase.DebugIndex)
System.out.println("time = " + (System.currentTimeMillis()-start));
if(Obase.DebugIndex)
System.out.println("find" + idx.getName() +"; reference = " + idx.getReference() + "; matches " +finds.size());
}
for(Scope scope : searchScopes){
if(finds == null){
finds = scope.getIndexRows();
}else{
finds = scope.filterIndexRows(finds);
}
if(Obase.DebugIndex)
System.out.println("time = " + (System.currentTimeMillis()-start));
if(finds == null || finds.isEmpty())
return new Vector();
for(Find find : finds){
if(!elites.containsKey(find.getRow())){
elites.put(find.getRow(), new Elite(find.getRow()));
}
elites.get(find.getRow()).addMatch(find);
}
if(Obase.DebugIndex)
System.out.println("time = " + (System.currentTimeMillis()-start));
if(Obase.DebugIndex)
System.out.println("find scope matches " +finds.size());
}
List rets = new Vector();
for(Find m : finds){
rets.add(elites.get(m.getRow()));
}
return rets;
}
public void submit(){
}
private boolean checkIndex(byte[] row, String column, Value label){
if(row == null || column == null || label == null)
return false;
if(column.equals("")||label.equals(Value.from("")))
return false;
return true;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy