All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.mayabot.nlp.collection.dat.DoubleArrayTrieStringIntMap Maven / Gradle / Ivy

There is a newer version: 4.0.0
Show newest version
/*
 * Copyright 2018 mayabot.com authors. All rights reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *       http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

/**
 * DoubleArrayTrieMap: Java implementation of Darts (Double-ARray Trie System)
 * 

*

* Copyright(C) 2001-2007 Taku Kudo <[email protected]>
* Copyright(C) 2009 MURAWAKI Yugo <[email protected]> * Copyright(C) 2012 KOMIYA Atsushi <[email protected]> *

*

*

* The contents of this file may be used under the terms of either of the GNU * Lesser General Public License Version 2.1 or later (the "LGPL"), or the BSD * License (the "BSD"). *

*/ /* * 源代码参考和部分引用来自 https://github.com/hankcs/HanLP https://github.com/NLPchina/ansj_seg */ package com.mayabot.nlp.collection.dat; import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; import java.util.ArrayList; import java.util.Collection; import java.util.TreeMap; import static com.mayabot.nlp.utils.DataInOutputUtils.readIntArray; import static com.mayabot.nlp.utils.DataInOutputUtils.writeIntArray; /** * 【String int】的map * 这是一个定制化的String,Int的DATMap的实现,采用int来存储,比Integer对内存的要求更低。 * * @author jimichan */ public class DoubleArrayTrieStringIntMap { private int[] values; private DoubleArrayTrie dat; /** * 从IO里面恢复 * * @param in * @throws IOException */ public DoubleArrayTrieStringIntMap( DataInput in) throws IOException { DoubleArrayTrie dat = new DoubleArrayTrie(in); int[] values = readIntArray(in); this.dat = dat; this.values = values; } /** * @param dat * @param values */ public DoubleArrayTrieStringIntMap(DoubleArrayTrie dat, int[] values) { this.values = values; this.dat = dat; } /** * @param keys 一定是字典有序 * @param values */ public DoubleArrayTrieStringIntMap(ArrayList keys, int[] values) { this(new DoubleArrayTrie(keys), values); } public DoubleArrayTrieStringIntMap(TreeMap map) { ArrayList keys = new ArrayList<>(map.keySet()); int[] values = new int[map.size()]; Collection values1 = map.values(); int c = 0; for(Integer integer : values1){ values[c++] = integer.intValue(); } this.dat = new DoubleArrayTrie(keys); this.values = values; } public void save(DataOutput out) throws IOException { dat.write(out); writeIntArray(values, out); } /** * DAT的搜索器 * * @param text 带计算的文本 * @param offset 文本中的偏移量 * @return DATMapMatcherInt */ public DATMapMatcherInt match(String text, int offset) { return new DATMapMatcherInt(dat.matcher(text, offset)); } /** * DAT的搜索器 * * @param text 带计算的文本 * @param offset 文本中的偏移量 * @return DATMapMatcherInt */ public DATMapLongMatcherInt matchLong(char[] text, int offset) { return new DATMapLongMatcherInt(dat.matcherLong(text, offset)); } /** * DAT的搜索器 * * @param text 带计算的文本 * @param offset 文本中的偏移量 * @return DATMapMatcherInt */ public DATMapLongMatcherInt matchLong(String text, int offset) { return new DATMapLongMatcherInt(dat.matcherLong(text, offset)); } /** * DAT的搜索器 * * @param text 带计算的文本 * @param offset 文本中的偏移量 * @return DATMapMatcherInt */ public DATMapMatcherInt match(char[] text, int offset) { return new DATMapMatcherInt(dat.matcher(text, offset)); } public class DATMapMatcherInt { DATMatcher datMater; public DATMapMatcherInt(DATMatcher datMater) { this.datMater = datMater; } public boolean next() { return datMater.next(); } public int getBegin() { return datMater.getBegin(); } public int getLength() { return datMater.getLength(); } public int getValue() { int index = datMater.getIndex(); if (index == -1) { return -1; } else { return values[index]; } } public int getIndex() { return datMater.getIndex(); } } public class DATMapLongMatcherInt { DATLongMatcher datMater; public DATMapLongMatcherInt(DATLongMatcher datMater) { this.datMater = datMater; } public boolean next() { return datMater.next(); } public int getBegin() { return datMater.getBegin(); } public int getLength() { return datMater.getLength(); } public int getValue() { int index = datMater.getIndex(); if (index == -1) { return -1; } else { return values[index]; } } public int getIndex() { return datMater.getIndex(); } } /** * 树叶子节点个数 * * @return DATMapMatcherInt */ public int size() { return values.length; } /** * 精确匹配 * * @param key 键 * @return 值 */ public int indexOf(CharSequence key) { return dat.indexOf(key, 0, 0, 0); } public int indexOf(CharSequence key, int pos, int len, int nodePos) { return dat.indexOf(key, pos, len, nodePos); } /** * 精确查询 * * @param chars 键的char数组 * @param pos char数组的起始位置 * @param len 键的长度 * 开始查找的位置(本参数允许从非根节点查询) * @return 查到的节点代表的value ID,负数表示不存在 */ public int indexOf(char[] chars, int pos, int len) { return dat.indexOf(chars, pos, len, 0); } /** * 精确查询 * * @param keyChars 键的char数组 * @param pos char数组的起始位置 * @param len 键的长度 * @param nodePos 开始查找的位置(本参数允许从非根节点查询) * @return 查到的节点代表的value ID,负数表示不存在 */ public int indexOf(char[] keyChars, int pos, int len, int nodePos) { return dat.indexOf(keyChars, pos, len, nodePos); } public int indexOf(char ch) { return dat.indexOf(ch); } /** * 精确查询 * * @param key 键 * @return 值 */ public int get(CharSequence key) { int index = indexOf(key); if (index >= 0) { return getValueAt(index); } return -1; } public int get(CharSequence key, int offset, int length) { int index = indexOf(key, offset, length, 0); if (index >= 0) { return getValueAt(index); } return -1; } public int get(char[] key) { int index = indexOf(key, 0, key.length, 0); if (index >= 0) { return getValueAt(index); } return -1; } public int get(char[] key, int offset, int len) { int index = indexOf(key, offset, len, 0); if (index >= 0) { return getValueAt(index); } return -1; } /** * 获取index对应的值 * * @param index * @return DATMapMatcherInt */ public int getValueAt(int index) { return values[index]; } /** * @param key * @return true or false */ public boolean containsKey(String key) { return indexOf(key) >= 0; } /** * @param key * @return true or false */ public boolean containsKey(char key) { return indexOf(key) >= 0; } /** * 更新某个键对应的值 * * @param key 键 * @param value 值 * @return 是否成功(失败的原因是没有这个键) */ public boolean set(String key, int value) { int index = indexOf(key); if (index >= 0) { values[index] = value; return true; } return false; } /** * 从值数组中提取下标为index的值
* 注意为了效率,此处不进行参数校验 * * @param index 下标 * @return 值 */ public int get(int index) { return values[index]; } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy