io.github.eb4j.pdic.PdicInfoCache Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of pdic4j Show documentation
Show all versions of pdic4j Show documentation
PDIC access library for java
package io.github.eb4j.pdic;
import com.ibm.icu.charset.CharsetICU;
import java.io.IOException;
import java.io.RandomAccessFile;
import java.lang.ref.WeakReference;
import java.util.WeakHashMap;
/**
* @author wak (Apache-2.0)
* @author Hiroshi Miura
*/
class PdicInfoCache {
private final boolean mFix;
private final int mBlockSize;
private final RandomAccessFile mFile;
private final int mStart;
private final int mSize;
private final WeakHashMap> mMap = new WeakHashMap<>();
private byte[] mFixedBuffer;
PdicInfoCache(final RandomAccessFile file, final int start, final int size) {
mFile = file;
mStart = start;
mSize = size;
if (mSize < 1024 * 512) {
mFix = true;
mBlockSize = mSize;
} else {
mFix = false;
mBlockSize = 1024;
}
}
byte[] getSegment(final int segment) {
byte[] segmentdata = null;
if (mFix) {
if (mFixedBuffer == null) {
mFixedBuffer = new byte[mSize];
try {
mFile.seek(mStart);
if (mFile.read(mFixedBuffer, 0, mSize) >= 0) {
return mFixedBuffer;
}
} catch (IOException ignored) {
}
}
}
WeakReference ref = mMap.get(segment);
if (ref != null) {
segmentdata = ref.get();
}
if (segmentdata == null) {
segmentdata = new byte[mBlockSize];
try {
mFile.seek(mStart + (long) segment * mBlockSize);
int len = mFile.read(segmentdata, 0, mBlockSize);
if (len == mBlockSize || len == mSize % mBlockSize) {
mMap.put(segment, new WeakReference<>(segmentdata));
} else {
return null;
}
} catch (IOException e) {
return null;
}
}
return segmentdata;
}
public int getShort(final int ptr) {
int segment = ptr / mBlockSize;
int address = ptr % mBlockSize;
byte[] segmentdata = getSegment(segment++);
int dat = 0;
if (segmentdata != null) {
int b = 0;
b = segmentdata[address++];
b &= 0xFF;
dat |= b;
if (address >= mBlockSize) {
address %= mBlockSize;
segmentdata = getSegment(segment);
}
b = segmentdata[address];
b &= 0xFF;
dat |= (b << 8);
}
return dat;
}
public int getInt(final int ptr) {
int segment = ptr / mBlockSize;
int address = ptr % mBlockSize;
byte[] segmentdata = getSegment(segment++);
int dat = 0;
if (segmentdata != null) {
int b = 0;
b = segmentdata[address++];
b &= 0xFF;
dat |= b;
if (address >= mBlockSize) {
address %= mBlockSize;
segmentdata = getSegment(segment++);
}
b = segmentdata[address++];
b &= 0xFF;
dat |= (b << 8);
if (address >= mBlockSize) {
address %= mBlockSize;
segmentdata = getSegment(segment++);
}
b = segmentdata[address++];
b &= 0xFF;
dat |= (b << 16);
if (address >= mBlockSize) {
address %= mBlockSize;
segmentdata = getSegment(segment);
}
b = segmentdata[address];
b &= 0x7F;
dat |= (b << 24);
}
return dat;
}
@SuppressWarnings("finalparameters")
private static int compareArrayAsUnsigned(byte[] aa, int pa, int la, byte[] ab, int pb, int lb) {
while (la-- > 0) {
short sa = aa[pa++];
if (lb-- > 0) {
short sb = ab[pb++];
if (sa != sb) {
sa &= 0xFF;
sb &= 0xFF;
return (sa - sb);
}
} else {
return 1;
}
}
if (lb > 0) {
short sb = ab[pb];
if (sb == 0x09) { // 比較対象の'\t'は'\0'とみなす
return 0;
}
return -1;
}
return 0;
}
/**
*
* @param aa
* @param pa
* @param la
* @param ptr
* @param len
* @return
*/
@SuppressWarnings("finalparameters")
public int compare(final byte[] aa, final int pa, final int la, final int ptr, final int len) {
int segment = ptr / mBlockSize;
int address = ptr % mBlockSize;
byte[] segmentdata = getSegment(segment++);
if (segmentdata == null) {
return -1;
}
if (len < 0) {
return 1;
}
if (address + len < mBlockSize) {
PdicInfo.decodetoCharBuffer(CharsetICU.forNameICU("BOCU-1"), segmentdata, address, len);
return compareArrayAsUnsigned(aa, pa, la, segmentdata, address, len);
} else {
int lena = mBlockSize - address;
int leno = Math.min(la, lena);
int ret = compareArrayAsUnsigned(aa, pa, leno, segmentdata, address, lena);
PdicInfo.decodetoCharBuffer(CharsetICU.forNameICU("BOCU-1"), segmentdata, address, lena);
if (ret != 0) {
return ret;
}
if (la < lena) {
return -1;
}
address = 0;
segmentdata = getSegment(segment);
PdicInfo.decodetoCharBuffer(CharsetICU.forNameICU("BOCU-1"), segmentdata, address, len - lena);
return compareArrayAsUnsigned(aa, pa + lena, la - lena, segmentdata, address, len - lena);
}
}
/**
* Create index of words.
* @param blockbits
* @param nindex
* @param indexPtr
* @return true when success, otherwise false.
*/
public boolean createIndex(final int blockbits, final int nindex, final int[] indexPtr) {
// インデックスの先頭から見出し語のポインタを拾っていく
int blocksize = 64 * 1024;
int[] params = new int[]{0, 0, nindex, blocksize, blockbits, 1, 0};
boolean hasNext = true;
for (int i = 0; hasNext; i++) {
hasNext = countIndexWords(params, getSegment(i), indexPtr);
}
indexPtr[params[0]] = params[1] + blockbits; // ターミネータを入れておく
return true;
}
private boolean countIndexWords(final int[] params, final byte[] buff, final int[] indexPtr) {
int curidx = params[0];
int curptr = params[1];
int max = params[2];
int buffmax = params[3];
int blockbits = params[4];
int found = params[5];
int ignore = params[6];
int i = 0;
for (; i < buffmax && curidx < max; i++) {
if (ignore > 0) {
ignore--;
} else if (found != 0) {
int ptr = curptr + i + blockbits; // ブロック番号サイズポインタを進める
indexPtr[curidx++] = ptr; // 見出し語部分のポインタを保存
ignore = blockbits - 1;
found = 0;
} else if (buff[i] == 0) {
found = 1;
}
}
params[0] = curidx;
params[1] = curptr + i;
params[5] = found;
params[6] = ignore;
return curidx < max;
}
}
© 2015 - 2024 Weber Informatics LLC | Privacy Policy