All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.oewntk.sql.out.SerializeNIDs.kt Maven / Gradle / Ivy

The newest version!
/*
 * Copyright (c) 2024. Bernard Bou.
 */
package org.oewntk.sql.out

import org.oewntk.model.CoreModel
import org.oewntk.model.Lex
import org.oewntk.model.Sense
import org.oewntk.model.Synset
import org.oewntk.sql.out.Lexes.makeCasedWordNIDs
import org.oewntk.sql.out.Lexes.makeMorphNIDs
import org.oewntk.sql.out.Lexes.makeWordNIDs
import org.oewntk.sql.out.Senses.makeSenseNIDs
import org.oewntk.sql.out.Synsets.makeSynsetNIDs
import java.io.*
import java.util.*

/**
 * Serialize ID to Numeric IDs maps
 */
object SerializeNIDs {

    const val NID_PREFIX: String = "nid_"

    private const val SENSEKEYS_WORDS_SYNSETS_FILE = "sensekeys_words_synsets"

    /**
     * Serialize words id-to-nid map
     *
     * @param os    output stream
     * @param lexes lexes
     * @throws IOException io exception
     */
    @Throws(IOException::class)
    fun serializeWordNIDs(os: OutputStream, lexes: Collection) {
        val wordToNID = makeWordNIDs(lexes)
        serialize(os, wordToNID)
    }

    /**
     * Serialize cased words id-to-nid map
     *
     * @param os    output stream
     * @param lexes lexes
     * @throws IOException io exception
     */
    @Throws(IOException::class)
    fun serializeCasedWordNIDs(os: OutputStream, lexes: Collection) {
        val casedToNID = makeCasedWordNIDs(lexes)
        serialize(os, casedToNID)
    }

    /**
     * Serialize morphs id-to-nid map
     *
     * @param os    output stream
     * @param lexes lexes
     * @throws IOException io exception
     */
    @Throws(IOException::class)
    fun serializeMorphNIDs(os: OutputStream, lexes: Collection) {
        val morphToNID = makeMorphNIDs(lexes)
        serialize(os, morphToNID)
    }

    /**
     * Serialize pronunciations id-to-nid map
     *
     * @param os    output stream
     * @param lexes lexes
     * @throws IOException io exception
     */
    @Throws(IOException::class)
    fun serializePronunciationNIDs(os: OutputStream, lexes: Collection) {
        val pronunciationValueToNID = makeMorphNIDs(lexes)
        serialize(os, pronunciationValueToNID)
    }

    /**
     * Serialize senses id-to-nid map
     *
     * @param os     output stream
     * @param senses senses
     * @throws IOException io exception
     */
    @Throws(IOException::class)
    private fun serializeSensesNIDs(os: OutputStream, senses: Collection) {
        val senseToNID = makeSenseNIDs(senses)
        serialize(os, senseToNID)
    }

    /**
     * Serialize id-to-nid map
     *
     * @param os      output stream
     * @param synsets synsets
     * @throws IOException io exception
     */
    @Throws(IOException::class)
    fun serializeSynsetNIDs(os: OutputStream, synsets: Collection) {
        val synsetIdToNID = makeSynsetNIDs(synsets)
        serialize(os, synsetIdToNID)
    }

    /**
     * Serialize object
     *
     * @param os     output stream
     * @param `object` object
     * @throws IOException io exception
     */
    @Throws(IOException::class)
    private fun serialize(os: OutputStream, thing: Any) {
        ObjectOutputStream(os)
            .use { it.writeObject(thing) }
    }

    /**
     * Serialize sensekey to wordnid-synsetnid
     * Does not use Kotlin pairs.
     *
     * @param os    output stream
     * @param model model
     * @throws IOException io exception
     */
    @Throws(IOException::class)
    private fun serializeSensekeysWordsSynsetsNIDs(os: OutputStream, model: CoreModel) {
        val wordToNID = makeWordNIDs(model.lexes)
        val synsetIdToNID = makeSynsetNIDs(model.synsets)
        val m = model.senses
            .associate { it.senseKey to AbstractMap.SimpleEntry(wordToNID[it.lCLemma], synsetIdToNID[it.synsetId]) } // (sensekey, (lemma,synsetId)), avoid kotlin.Pair dependency
        serialize(os, m)
    }

    /**
     * Serialize sensekey to wordnid-synsetnid
     *
     * @param os    output stream
     * @param model model
     * @throws IOException io exception
     */
    @Throws(IOException::class)
    private fun kserializeSensekeysWordsSynsetsNIDs(os: OutputStream, model: CoreModel) {
        val wordToNID = makeWordNIDs(model.lexes)
        val synsetIdToNID = makeSynsetNIDs(model.synsets)
        val m = model.senses
            .associate { it.senseKey to (wordToNID[it.lCLemma] to synsetIdToNID[it.synsetId]) } // (sensekey, (lemma,synsetId))
        serialize(os, m)
    }

    /**
     * Serialize all id-to-nid maps
     *
     * @param model  model
     * @param outDir output dir
     * @throws IOException io exception
     */
    @Throws(IOException::class)
    fun serializeNIDs(model: CoreModel, outDir: File) {
        FileOutputStream(File(outDir, "$NID_PREFIX${Names.WORDS.FILE}.ser"))
            .use {
                serializeWordNIDs(it, model.lexes)
            }
        FileOutputStream(File(outDir, "$NID_PREFIX${Names.CASEDWORDS.FILE}.ser"))
            .use {
                serializeCasedWordNIDs(it, model.lexes)
            }
        FileOutputStream(File(outDir, "$NID_PREFIX${Names.MORPHS.FILE}.ser"))
            .use {
                serializeMorphNIDs(it, model.lexes)
            }
        FileOutputStream(File(outDir, "$NID_PREFIX${Names.PRONUNCIATIONS.FILE}.ser"))
            .use {
                serializePronunciationNIDs(it, model.lexes)
            }
        FileOutputStream(File(outDir, "$NID_PREFIX${Names.SENSES.FILE}.ser"))
            .use {
                serializeSensesNIDs(it, model.senses)
            }
        FileOutputStream(File(outDir, "$NID_PREFIX${Names.SYNSETS.FILE}.ser"))
            .use {
                serializeSynsetNIDs(it, model.synsets)
            }
        FileOutputStream(File(outDir, "$SENSEKEYS_WORDS_SYNSETS_FILE.ser"))
            .use {
                serializeSensekeysWordsSynsetsNIDs(it, model)
            }
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy