All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.mayabot.nlp.segment.lexer.crf.FeatureTemplate.kt Maven / Gradle / Ivy

/*
 * Copyright 2018 mayabot.com authors. All rights reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *       http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.mayabot.nlp.segment.lexer.crf

import java.util.regex.Pattern

/**
 * @author jimichan
 */
class FeatureTemplateGroup(templates: List) {

    val size: Int

    val list = ArrayList()

    init {
        for (template in templates) {
            list.add(FeatureTemplate(template))
        }
        this.size = templates.size
    }

    companion object {
        val BOS = arrayOf("_B-1", "_B-2", "_B-3", "_B-4", "_B-5", "_B-6", "_B-7", "_B-8")
        val EOS = arrayOf("_B+1", "_B+2", "_B+3", "_B+4", "_B+5", "_B+6", "_B+7", "_B+8")
    }

}


// U6:%x[1,0]/%x[2,0]
/**
 * @author jimichan
 */
class FeatureTemplate(template: String) {

    val list = ArrayList(10)

    private val pattern = Pattern.compile("%x\\[(-?\\d*),(\\d*)]")

    init {
        val matcher = pattern.matcher(template)
        var start = 0
        while (matcher.find()) {
            val offset = matcher.start()
            val end = matcher.end()
            if (offset > start) {
                list.add(FeatureTemplateElement(template.substring(start, offset)))
            }

            list.add(FeatureTemplateElement(matcher.group(1).toInt(), matcher.group(2).toInt()))

            start = end
        }

        val first = list.removeAt(0)

        list.add(first)
    }

    override fun toString(): String {
        return list.joinToString(separator = "")
    }
}

enum class FeatureTemplateElementType {
    String, Offset
}

class FeatureTemplateElement {

    constructor(value: String) {
        if (value.startsWith("U") && value.endsWith(":")) {
            this.value = ":" + value.substring(0, value.length - 1)
        } else {
            this.value = value
        }

    }

    constructor(offset: Int, col: Int) {
        this.offset = offset
        this.col = col
        this.type = FeatureTemplateElementType.Offset
    }

    var value: String = ""
    var offset: Int = 0
    var col: Int = 0
    var type = FeatureTemplateElementType.String

    override fun toString(): String {
        if (type == FeatureTemplateElementType.String) {
            return value
        } else {
            return "%x[$offset,$col]"
        }
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy