io.github.eb4j.pdic.IndexCache Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of pdic4j Show documentation
Show all versions of pdic4j Show documentation
PDIC access library for java
The newest version!
/*
* PDIC4j, a PDIC dictionary access library.
* Copyright (C) 2022 Hiroshi Miura.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see .
*/
package io.github.eb4j.pdic;
import com.ibm.icu.charset.CharsetICU;
import java.io.IOException;
import java.io.RandomAccessFile;
import java.lang.ref.WeakReference;
import java.util.WeakHashMap;
/**
* @author wak (Apache-2.0)
* @author Hiroshi Miura
*/
class IndexCache {
private final boolean fix;
private final int segmentBlockSize;
private final RandomAccessFile randomAccessFile;
private final int start;
private final int size;
private final WeakHashMap> mMap = new WeakHashMap<>();
private byte[] fixedBuffer;
IndexCache(final RandomAccessFile file, final int start, final int size) {
randomAccessFile = file;
this.start = start;
this.size = size;
if (this.size < 1024 * 512) {
fix = true;
segmentBlockSize = this.size;
} else {
fix = false;
segmentBlockSize = 1024;
}
}
byte[] getSegment(final int segment) {
byte[] segmentData = null;
if (fix) {
if (fixedBuffer == null) {
fixedBuffer = new byte[size];
try {
randomAccessFile.seek(start);
if (randomAccessFile.read(fixedBuffer, 0, size) >= 0) {
return fixedBuffer;
}
} catch (IOException ignored) {
}
}
}
WeakReference ref = mMap.get(segment);
if (ref != null) {
segmentData = ref.get();
}
if (segmentData == null) {
segmentData = new byte[segmentBlockSize];
try {
randomAccessFile.seek(start + (long) segment * segmentBlockSize);
int len = randomAccessFile.read(segmentData, 0, segmentBlockSize);
if (len == segmentBlockSize || len == size % segmentBlockSize) {
mMap.put(segment, new WeakReference<>(segmentData));
} else {
return null;
}
} catch (IOException e) {
return null;
}
}
return segmentData;
}
public int getShort(final int ptr) {
int segment = ptr / segmentBlockSize;
int address = ptr % segmentBlockSize;
byte[] segmentdata = getSegment(segment++);
int dat = 0;
if (segmentdata != null) {
int b;
b = segmentdata[address++];
b &= 0xFF;
dat |= b;
if (address >= segmentBlockSize) {
address %= segmentBlockSize;
segmentdata = getSegment(segment);
}
b = segmentdata[address];
b &= 0xFF;
dat |= (b << 8);
}
return dat;
}
public int getInt(final int ptr) {
int segment = ptr / segmentBlockSize;
int address = ptr % segmentBlockSize;
byte[] segmentdata = getSegment(segment++);
int dat = 0;
if (segmentdata != null) {
int b;
b = segmentdata[address++];
b &= 0xFF;
dat |= b;
if (address >= segmentBlockSize) {
address %= segmentBlockSize;
segmentdata = getSegment(segment++);
}
b = segmentdata[address++];
b &= 0xFF;
dat |= (b << 8);
if (address >= segmentBlockSize) {
address %= segmentBlockSize;
segmentdata = getSegment(segment++);
}
b = segmentdata[address++];
b &= 0xFF;
dat |= (b << 16);
if (address >= segmentBlockSize) {
address %= segmentBlockSize;
segmentdata = getSegment(segment);
}
b = segmentdata[address];
b &= 0x7F;
dat |= (b << 24);
}
return dat;
}
@SuppressWarnings("finalparameters")
private static int compareArrayAsUnsigned(byte[] aa, int pa, int la, byte[] ab, int pb, int lb) {
while (la-- > 0) {
short sa = aa[pa++];
if (lb-- > 0) {
short sb = ab[pb++];
if (sa != sb) {
sa &= 0xFF;
sb &= 0xFF;
return (sa - sb);
}
} else {
return 1;
}
}
if (lb > 0) {
short sb = ab[pb];
if (sb == 0x09) { // 比較対象の'\t'は'\0'とみなす
return 0;
}
return -1;
}
return 0;
}
/**
*
* @param aa
* @param pa
* @param la
* @param ptr
* @param len
* @return
*/
@SuppressWarnings("finalparameters")
public int compare(final byte[] aa, final int pa, final int la, final int ptr, final int len) {
int segment = ptr / segmentBlockSize;
int address = ptr % segmentBlockSize;
byte[] segmentdata = getSegment(segment++);
if (segmentdata == null) {
return -1;
}
if (len < 0) {
return 1;
}
if (address + len < segmentBlockSize) {
Utils.decodetoCharBuffer(CharsetICU.forNameICU("BOCU-1"), segmentdata, address, len);
return compareArrayAsUnsigned(aa, pa, la, segmentdata, address, len);
} else {
int lena = segmentBlockSize - address;
int leno = Math.min(la, lena);
int ret = compareArrayAsUnsigned(aa, pa, leno, segmentdata, address, lena);
Utils.decodetoCharBuffer(CharsetICU.forNameICU("BOCU-1"), segmentdata, address, lena);
if (ret != 0) {
return ret;
}
if (la < lena) {
return -1;
}
address = 0;
segmentdata = getSegment(segment);
Utils.decodetoCharBuffer(CharsetICU.forNameICU("BOCU-1"), segmentdata, address, len - lena);
return compareArrayAsUnsigned(aa, pa + lena, la - lena, segmentdata, address, len - lena);
}
}
/**
* Create index of words.
* @param blockBits
* @param nIndex
* @param indexPtr
* @return true when success, otherwise false.
*/
public boolean createIndex(final int blockBits, final int nIndex, final int[] indexPtr) {
// インデックスの先頭から見出し語のポインタを拾っていく
final int blockSize = 64 * 1024;
int[] params = new int[]{0, 0, nIndex, blockSize, blockBits, 1, 0};
boolean hasNext = true;
for (int i = 0; hasNext; i++) {
hasNext = countIndexWords(params, getSegment(i), indexPtr);
}
indexPtr[params[0]] = params[1] + blockBits; // ターミネータを入れておく
return true;
}
private boolean countIndexWords(final int[] params, final byte[] buff, final int[] indexPtr) {
int curidx = params[0];
int curptr = params[1];
int max = params[2];
int buffmax = params[3];
int blockBits = params[4];
int found = params[5];
int ignore = params[6];
int i = 0;
for (; i < buffmax && curidx < max; i++) {
if (ignore > 0) {
ignore--;
} else if (found != 0) {
int ptr = curptr + i + blockBits; // ブロック番号サイズポインタを進める
indexPtr[curidx++] = ptr; // 見出し語部分のポインタを保存
ignore = blockBits - 1;
found = 0;
} else if (buff[i] == 0) {
found = 1;
}
}
params[0] = curidx;
params[1] = curptr + i;
params[5] = found;
params[6] = ignore;
return curidx < max;
}
}
© 2015 - 2024 Weber Informatics LLC | Privacy Policy