com.scudata.dm.cursor.FileCursor Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of esproc Show documentation
Show all versions of esproc Show documentation
SPL(Structured Process Language) A programming language specially for structured data computing.
package com.scudata.dm.cursor;
import java.io.IOException;
import java.io.InputStream;
import com.scudata.common.MessageManager;
import com.scudata.common.RQException;
import com.scudata.dm.BaseRecord;
import com.scudata.dm.Context;
import com.scudata.dm.DataStruct;
import com.scudata.dm.FileObject;
import com.scudata.dm.KeyWord;
import com.scudata.dm.LineImporter;
import com.scudata.dm.Sequence;
import com.scudata.dm.Table;
import com.scudata.resources.EngineMessage;
import com.scudata.util.Variant;
/**
* ?ļ??α꣬???ڶ?ȡ?ı??ļ?
* @author WangXiaoJun
*
*/
public class FileCursor extends ICursor {
private FileObject fileObject; // ?ļ?????
private LineImporter importer; // ?ı??????࣬???ڰ??ı????ж????ֶ?????
private DataStruct ds; // ?ļ???Ӧ?????ݽṹ
private long start; // ??ȡ????ʼλ?ã?Ҫ????ͷȥβ?????????ڲ??ж??ļ?
private long end = -1; // ??ȡ?Ľ???λ?ã?Ҫ????ͷȥβ?????????ڲ??ж??ļ?
private String []selFields; // ѡ???ֶ???????
private byte []types; // ?ֶ?????
private String []fmts; // ?ֶ?ֵ??ʽ??????????ʱ??
private int []selIndex; // ѡ???ֶ???Դ?ṹ?е????
private DataStruct selDs; // ????????ݽṹ
private String opt; // ѡ??
private byte [] colSeparator; // ?зָ??
private boolean isTitle; // ?ļ??Ƿ??б??⣬????н???Ϊ?ṹ??
private boolean isDeleteFile; // ??????Ƿ?ɾ???ļ?
private boolean isSingleField; // ?Ƿص?????ɵ?????
private boolean isSequenceMember; // ?Ƿ???????ɵ?????
private int sigleFieldIndex; // ????ʱ???ֶ?????
private boolean isExist = true; // ?ֶ??Ƿ????ļ???
private boolean isEnd = false;
private boolean optimize = true; // ide??Ҫ?ã?parseʱ?Ƿ????ж??ܲ???ת????һ????¼??????
/**
* ????һ???ı??ļ????α?
* @param fileObject ?ı??ļ?
* @param segSeq ?κţ???1??ʼ????
* @param segCount ?ֶ???
* @param s ?зָ???
* @param opt ѡ?? t????һ??Ϊ???⣬b?????????ļ???c??д?ɶ??ŷָ???csv?ļ?
* s????????ֶΣ????ɵ??ֶδ????ɵ??????i???????ֻ??1??ʱ???س?????
* q??????ֶδ????????????Ȱ??룬???????ⲿ?֣?k???????????????˵Ŀհ???ȱʡ???Զ???trim
* e??Fi???ļ??в?????ʱ??????null??ȱʡ??????
* @param ctx
*/
public FileCursor(FileObject fileObject, int segSeq, int segCount,
String s, String opt, Context ctx) {
this(fileObject, segSeq, segCount, null, null, s, opt, ctx);
}
/**
* ????һ???ı??ļ????α?
* @param fileObject ?ı??ļ?
* @param segSeq ?κţ???1??ʼ????
* @param segCount ?ֶ???
* @param fields ѡ???ֶ???????
* @param types ѡ???ֶ????????飨?ɿգ???????com.scudata.common.Types
* @param s ?зָ???
* @param opt ѡ?? t????һ??Ϊ???⣬b?????????ļ???c??д?ɶ??ŷָ???csv?ļ?
* s????????ֶΣ????ɵ??ֶδ????ɵ??????i???????ֻ??1??ʱ???س?????
* q??????ֶδ????????????Ȱ??룬???????ⲿ?֣?k???????????????˵Ŀհ???ȱʡ???Զ???trim
* e??Fi???ļ??в?????ʱ??????null??ȱʡ??????
* @param ctx
*/
public FileCursor(FileObject fileObject, int segSeq, int segCount,
String []fields, byte []types, String s, String opt, Context ctx) {
if (segCount > 1) {
if (segSeq < 1 || segSeq > segCount) {
MessageManager mm = EngineMessage.get();
throw new RQException(segSeq + mm.getMessage("function.invalidParam"));
}
long size = fileObject.size();
long blockSize = size / segCount;
if (segSeq == segCount) {
end = size;
start = blockSize * (segSeq - 1);
} else {
end = blockSize * segSeq;
start = blockSize * (segSeq - 1);
}
}
this.fileObject = fileObject;
this.types = types;
this.opt = opt;
this.ctx = ctx;
if (fields != null) {
selFields = new String[fields.length];
System.arraycopy(fields, 0, selFields, 0, fields.length);
}
boolean isCsv = false;
if (opt != null) {
if (opt.indexOf('t') != -1) isTitle = true;
if (opt.indexOf('c') != -1) isCsv = true;
if (opt.indexOf('i') != -1) isSingleField = true;
if (opt.indexOf('e') != -1) isExist = false;
if (opt.indexOf('x') != -1) {
isDeleteFile = true;
if (ctx != null) ctx.addResource(this);
}
if (opt.indexOf('w') != -1) isSequenceMember = true;
}
if (s != null && s.length() > 0) {
String charset = fileObject.getCharset();
try {
colSeparator = s.getBytes(charset);
} catch (Exception e) {
throw new RQException(e.getMessage(), e);
}
} else if (isCsv) {
colSeparator = new byte[]{(byte)','};
} else {
colSeparator = FileObject.COL_SEPARATOR;
}
}
/**
* ???ö??ļ?????ʼλ??
* @param startPos ??ʼλ?ã???????ͷȥβ????
*/
public void setStart(long start) {
this.start = start;
}
/**
* ???ö??ļ??Ľ???λ??
* @param endPos ????λ?ã???????ͷȥβ????
*/
public void setEnd(long end) {
this.end = end;
}
/**
* ????????ʱ???ֶεĸ?ʽ
* @param fmts ????ʱ???ʽ????
*/
public void setFormats(String []fmts) {
this.fmts = fmts;
}
/**
* ȡ?ļ??α??Ӧ???ļ?????
* @return FileObject
*/
public FileObject getFileObject() {
return fileObject;
}
/**
* ȡ?α??ѡ??
* @return String
*/
public String getOption() {
return opt;
}
private LineImporter open() {
if (importer != null) {
return importer;
} else if (fileObject == null || isEnd) {
return null;
}
if (!isDeleteFile && ctx != null) {
ctx.addResource(this);
}
InputStream in = null;
String []selFields = null;
if (this.selFields != null) {
// ?????ֶ???????ֹû???ֶ???ʱ??#1ȡ????????reset????ȡ???????Ҳ????ֶ?
selFields = new String[this.selFields.length];
System.arraycopy(this.selFields, 0, selFields, 0, selFields.length);
}
try {
in = fileObject.getBlockInputStream();
String charset = fileObject.getCharset();
importer = new LineImporter(in, charset, colSeparator, opt);
if (isTitle) {
// ??һ???DZ???
Object []line = importer.readFirstLine();
if (line == null) {
return null;
}
int fcount = line.length;
String []fieldNames = new String[fcount];
for (int f = 0; f < fcount; ++f) {
fieldNames[f] = Variant.toString(line[f]);
}
ds = new DataStruct(fieldNames);
if (selFields != null) {
if (isSingleField) isSingleField = selFields.length == 1;
int maxSeq = 0;
int []index = new int[fcount];
for (int i = 0; i < fcount; ++i) {
index[i] = -1;
}
for (int i = 0, count = selFields.length; i < count; ++i) {
int q = ds.getFieldIndex(selFields[i]);
if (q >= 0) {
if (index[q] != -1) {
MessageManager mm = EngineMessage.get();
throw new RQException(selFields[i] + mm.getMessage("ds.colNameRepeat"));
}
index[q] = i;
selFields[i] = ds.getFieldName(q);
sigleFieldIndex = q;
if (q > maxSeq) {
maxSeq = q;
}
} else if (isExist) {
MessageManager mm = EngineMessage.get();
throw new RQException(selFields[i] + mm.getMessage("ds.fieldNotExist"));
}
}
this.selDs = new DataStruct(selFields);
setDataStruct(selDs);
maxSeq++;
if (maxSeq < fcount) {
int []tmp = new int[maxSeq];
System.arraycopy(index, 0, tmp, 0, maxSeq);
index = tmp;
}
this.selIndex = index;
importer.setColSelectIndex(index);
if (optimize) {
byte []colTypes = new byte[maxSeq];
String []colFormats = new String[maxSeq];
if (types != null) {
for (int i = 0; i < maxSeq; ++i) {
if (index[i] != -1) {
colTypes[i] = types[index[i]];
}
}
}
if (fmts != null) {
for (int i = 0; i < maxSeq; ++i) {
if (index[i] != -1) {
colFormats[i] = fmts[index[i]];
}
}
}
importer.setColTypes(colTypes, colFormats);
}
} else {
setDataStruct(ds);
if (isSingleField && fcount != 1) {
isSingleField = false;
}
if (optimize) {
byte []colTypes = new byte[fcount];
importer.setColTypes(colTypes, fmts);
}
}
} else {
if (selFields != null) {
if (isSingleField) {
isSingleField = selFields.length == 1;
}
int fcount = 0;
for (int i = 0, count = selFields.length; i < count; ++i) {
if (KeyWord.isFieldId(selFields[i])) {
int f = KeyWord.getFiledId(selFields[i]);
if (f > fcount) {
fcount = f;
}
} else if (isExist) {
MessageManager mm = EngineMessage.get();
throw new RQException(selFields[i] + mm.getMessage("ds.fieldNotExist"));
}
}
int []index = new int[fcount];
for (int i = 0; i < fcount; ++i) {
index[i] = -1;
}
String[] fieldNames = new String[fcount];
ds = new DataStruct(fieldNames);
for (int i = 0, count = selFields.length; i < count; ++i) {
int q = ds.getFieldIndex(selFields[i]);
if (q >= 0) {
if (index[q] != -1) {
MessageManager mm = EngineMessage.get();
throw new RQException(selFields[i] + mm.getMessage("ds.colNameRepeat"));
}
index[q] = i;
selFields[i] = ds.getFieldName(q);
sigleFieldIndex = q;
}
}
this.selDs = new DataStruct(selFields);
this.selIndex = index;
importer.setColSelectIndex(index);
if (optimize) {
byte []colTypes = new byte[fcount];
String []colFormats = new String[fcount];
if (types != null) {
for (int i = 0; i < fcount; ++i) {
if (index[i] != -1) {
colTypes[i] = types[index[i]];
}
}
}
if (fmts != null) {
for (int i = 0; i < fcount; ++i) {
if (index[i] != -1) {
colFormats[i] = fmts[index[i]];
}
}
}
importer.setColTypes(colTypes, colFormats);
}
}
}
importer.seek(start);
if (end != -1 && importer.getCurrentPosition() > end) {
return null;
}
return importer;
} catch (Exception e) {
// importer?????????п??ܳ??쳣
if (in != null && importer == null) {
try {
in.close();
} catch (IOException ie) {
}
}
close();
if (e instanceof RQException) {
throw (RQException)e;
} else {
throw new RQException(e.getMessage(), e);
}
}
}
// ȡ?????ֶ?
private Sequence fetchAll(LineImporter importer, int n) throws IOException {
Object []line;
long end = this.end;
int initSize = n > INITSIZE ? INITSIZE : n;
if (isSequenceMember) {
Sequence seq = new Sequence(initSize);
for (int i = 0; i < n; ++i) {
if (end != -1 && importer.getCurrentPosition() > end) {
break;
}
line = importer.readLine();
if (line == null) {
break;
} else {
seq.add(new Sequence(line));
}
}
if (seq.length() != 0) {
return seq;
} else {
return null;
}
}
if (end > 0 && importer.getCurrentPosition() > end) {
return null;
}
int fcount;
if (ds == null) {
// ?״ζ???û?б???
line = importer.readFirstLine();
if (line == null) {
return null;
}
fcount = line.length;
String []fieldNames = new String[fcount];
ds = new DataStruct(fieldNames);
if (isSingleField && fcount != 1) {
isSingleField = false;
}
if (optimize) {
byte []colTypes = new byte[fcount];
for (int i = 0; i < fcount; ++i) {
colTypes[i] = Variant.getObjectType(line[i]);
}
importer.setColTypes(colTypes, fmts);
}
} else {
fcount = ds.getFieldCount();
line = importer.readLine();
if (line == null) {
return null;
}
}
if (isSingleField) {
Sequence seq = new Sequence(initSize);
seq.add(line[0]);
for (int i = 1; i < n; ++i) {
if (end != -1 && importer.getCurrentPosition() > end) {
break;
}
line = importer.readLine();
if (line == null) {
break;
}
seq.add(line[0]);
}
return seq;
} else {
Table table = new Table(ds, initSize);
BaseRecord r = table.newLast();
int curLen = line.length;
if (curLen > fcount) curLen = fcount;
for (int f = 0; f < curLen; ++f) {
r.setNormalFieldValue(f, line[f]);
}
for (int i = 1; i < n; ++i) {
if (end != -1 && importer.getCurrentPosition() > end) {
break;
}
line = importer.readLine();
if (line == null) {
break;
}
r = table.newLast();
curLen = line.length;
if (curLen > fcount) curLen = fcount;
for (int f = 0; f < curLen; ++f) {
r.setNormalFieldValue(f, line[f]);
}
}
return table;
}
}
// ??ѡ???ֶ?ʱ??ȡ??
private Sequence fetchFields(LineImporter importer, int n) throws IOException {
Object []line;
long end = this.end;
int initSize = n > INITSIZE ? INITSIZE : n;
int []selIndex = this.selIndex;
if (isSingleField) {
int index = this.sigleFieldIndex;
Sequence seq = new Sequence(initSize);
for (int i = 0; i < n; ++i) {
if (end != -1 && importer.getCurrentPosition() > end) {
break;
}
line = importer.readLine();
if (line == null) {
break;
}
if (index < line.length) {
seq.add(line[index]);
} else {
seq.add(null);
}
}
if (seq.length() != 0) {
return seq;
} else {
return null;
}
} else {
int curLen;
BaseRecord r;
Table table = new Table(selDs, initSize);
for (int i = 0; i < n; ++i) {
if (end != -1 && importer.getCurrentPosition() > end) {
break;
}
line = importer.readLine();
if (line == null) {
break;
}
r = table.newLast();
curLen = line.length;
for (int f = 0; f < curLen; ++f) {
if (selIndex[f] != -1) r.setNormalFieldValue(selIndex[f], line[f]);
}
}
if (table.length() != 0) {
return table;
} else {
return null;
}
}
}
/**
* ??ȡָ???????????ݷ???
* @param n ????
* @return Sequence
*/
protected Sequence get(int n) {
if (n < 1) return null;
LineImporter importer = open();
if (importer == null) return null;
try {
if (selFields == null) {
return fetchAll(importer, n);
} else {
return fetchFields(importer, n);
}
} catch (IOException e) {
close();
throw new RQException(e.getMessage(), e);
}
}
/**
* ????ָ????????????
* @param n ????
* @return long ʵ????????????
*/
protected long skipOver(long n) {
if (n < 1) return 0;
LineImporter importer = open();
if (importer == null) return 0;
try {
long end = this.end;
for (long i = 0; i < n; ++i) {
if (end != -1 && importer.getCurrentPosition() > end) {
break;
}
if (!importer.skipLine()) {
return i;
}
}
} catch (IOException e) {
close();
throw new RQException(e.getMessage(), e);
}
return n;
}
/**
* ?ر??α?
*/
public synchronized void close() {
super.close();
if (fileObject != null) {
isEnd = true;
if (importer != null) {
if (ctx != null) ctx.removeResource(this);
try {
importer.close();
} catch (IOException e) {
}
}
if (isDeleteFile) {
fileObject.delete();
fileObject = null;
}
importer = null;
ds = null;
selDs = null;
}
}
protected void finalize() throws Throwable {
close();
}
/**
* ?????α?
* @return ?????Ƿ?ɹ???true???α???Դ?ͷ????ȡ????false???????Դ?ͷ????ȡ??
*/
public boolean reset() {
close();
if (fileObject != null) {
isEnd = false;
return true;
} else {
return false;
}
}
/**
* ?Ƿ???ֶ?parse???Ż????Ż?ʱ???Ȱ???һ????¼???ֶ?????ת
* @param optimize true???Ż???false?????Ż?
*/
public void setOptimize(boolean optimize) {
this.optimize = optimize;
}
}
© 2015 - 2024 Weber Informatics LLC | Privacy Policy