com.mayabot.nlp.segment.IterableMode.kt Maven / Gradle / Ivy
package com.mayabot.nlp.segment
import java.util.*
enum class WordTermIterableMode {
/**
* 不输出子词
*/
TOP,
/**
* 输出合并词和子词
* 北京大学
*
* 北京大学 北京 大学
*/
Overlap,
/**
* 只输出子词
*/
ATOM;
}
class OverlapIterable(val wrap: Iterable) : Iterable {
override fun iterator(): Iterator {
return OverlapIterator(wrap.iterator())
}
}
/**
* 北京大学 的 学生
* 北京 大学
*/
class OverlapIterator(val from: Iterator) : AbstractIterator() {
var buffer: LinkedList? = null
override fun computeNext() {
val b = buffer
if (b != null) {
if (b.isEmpty()) {
buffer = null
} else {
setNext(b.poll()!!)
return
}
}
if (from.hasNext()) {
val next = from.next()
setNext(next)
if (next.hasSubword()) {
buffer = LinkedList(next.subword).apply {
//第一个字词的pos是0
//https://lucene.apache.org/core/8_1_0/core/org/apache/lucene/analysis/package-summary.html#package.description
first.posInc = 0
}
}
} else {
done()
}
}
}
class AtomIterable(val wrap: Iterable) : Iterable {
override fun iterator(): Iterator {
return AtomIterator(wrap.iterator())
}
}
class AtomIterator(val from: Iterator) : AbstractIterator() {
var buffer: LinkedList? = null
override fun computeNext() {
val b = buffer
if (b != null) {
if (b.isEmpty()) {
buffer = null
} else {
setNext(b.poll()!!)
return
}
}
if (from.hasNext()) {
val term = from.next()
if (term.hasSubword()) {
val subwords = LinkedList(term.subword)
buffer = subwords
setNext(subwords.poll()!!)
} else {
setNext(term)
}
} else {
done()
}
}
}
//
//
//class GraphIterable(val wrap: Iterable) : Iterable {
//
// override fun iterator(): Iterator {
// return GraphIterator(wrap.iterator())
// }
//
//
//}
//
//
//class GraphIterator(val from: Iterator) : AbstractIterator() {
// var buffer: LinkedList? = null
// override fun computeNext() {
// val b = buffer
// if (b != null) {
// if (b.isEmpty()) {
// buffer = null
// } else {
// setNext(b.poll()!!)
// return
// }
// }
//
// if (from.hasNext()) {
// val next = from.next()
// setNext(next)
// if (next.hasSubword()) {
// buffer = LinkedList(next.subword).apply {
// //第一个字词的pos是0
// //https://lucene.apache.org/core/8_1_0/core/org/apache/lucene/analysis/package-summary.html#package.description
// first.posInc = 0
// }
// }
// } else {
// done()
// }
// }
//
//}