All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.oewntk.sql.out.CoreModelConsumer.kt Maven / Gradle / Ivy

/*
 * Copyright (c) 2021-2024. Bernard Bou.
 */
package org.oewntk.sql.out

import org.oewntk.model.*
import org.oewntk.sql.out.BuiltIn.generateAdjectivePositionTypes
import org.oewntk.sql.out.BuiltIn.generateDomains
import org.oewntk.sql.out.BuiltIn.generatePoses
import org.oewntk.sql.out.BuiltIn.generateRelationTypes
import org.oewntk.sql.out.Lexes.generateCasedWords
import org.oewntk.sql.out.Lexes.generateLexes
import org.oewntk.sql.out.Lexes.generateLexesMorphs
import org.oewntk.sql.out.Lexes.generateLexesPronunciations
import org.oewntk.sql.out.Lexes.generateMorphs
import org.oewntk.sql.out.Lexes.generatePronunciations
import org.oewntk.sql.out.Lexes.generateWords
import org.oewntk.sql.out.Senses.generateSenseRelations
import org.oewntk.sql.out.Senses.generateSenses
import org.oewntk.sql.out.Senses.generateSensesAdjPositions
import org.oewntk.sql.out.Senses.generateSensesSamples
import org.oewntk.sql.out.Senses.generateSensesVerbFrames
import org.oewntk.sql.out.Synsets.generateSynsetIlis
import org.oewntk.sql.out.Synsets.generateSynsetSamples
import org.oewntk.sql.out.Synsets.generateSynsetRelations
import org.oewntk.sql.out.Synsets.generateSynsets
import org.oewntk.sql.out.Synsets.generateSynsetUsages
import org.oewntk.sql.out.Synsets.generateSynsetWikidatas
import java.io.File
import java.io.FileNotFoundException
import java.io.FileOutputStream
import java.io.PrintStream
import java.nio.charset.StandardCharsets
import java.util.function.Consumer

/**
 * Main class that generates the core WN database in the SQL format
 *
 * @property outDir output directory
 *
 * @author Bernard Bou
 * @see "https://sqlunet.sourceforge.net/schema.html"
 */
class CoreModelConsumer(
    private val outDir: File,
) : Consumer {

    /**
     * NID maps
     */
    var lexKeyToNID: Map? = null

    /**
     * Word to NID map
     */
    var wordToNID: Map? = null

    /**
     * Cased word to NID map
     */
    private var casedWordToNID: Map? = null

    /**
     * Synset to NID map
     */
    var synsetIdToNID: Map? = null

    /**
     * Accept model
     *
     * @param model model
     */
    override fun accept(model: CoreModel) {
        Tracing.psInfo.println("[CoreModel] ${model.source}")

        try {
            lexes(outDir, model.lexes)
            synsets(outDir, model.synsets)
            senses(outDir, model.senses, model.sensesById!!)
            builtins(outDir)
        } catch (e: FileNotFoundException) {
            e.printStackTrace(Tracing.psErr)
        }
    }

    /**
     * Consume lexes
     *
     * @param outDir out dir
     * @param lexes  lexes
     * @throws FileNotFoundException file not found exception
     */
    @Throws(FileNotFoundException::class)
    private fun lexes(outDir: File, lexes: Collection) {
        PrintStream(FileOutputStream(File(outDir, makeFilename(Names.WORDS.FILE))), true, StandardCharsets.UTF_8)
            .use {
                wordToNID = generateWords(it, lexes)
            }
        PrintStream(FileOutputStream(File(outDir, makeFilename(Names.CASEDWORDS.FILE))), true, StandardCharsets.UTF_8)
            .use {
                casedWordToNID = generateCasedWords(it, lexes, wordToNID!!)
            }
        PrintStream(FileOutputStream(File(outDir, makeFilename(Names.LEXES.FILE))), true, StandardCharsets.UTF_8)
            .use {
                lexKeyToNID = generateLexes(it, lexes, wordToNID!!, casedWordToNID!!)
            }
        var morphToNID: Map
        PrintStream(FileOutputStream(File(outDir, makeFilename(Names.MORPHS.FILE))), true, StandardCharsets.UTF_8)
            .use {
                morphToNID = generateMorphs(it, lexes)
            }
        PrintStream(FileOutputStream(File(outDir, makeFilename(Names.LEXES_MORPHS.FILE))), true, StandardCharsets.UTF_8)
            .use {
                generateLexesMorphs(it, lexes, lexKeyToNID!!, wordToNID!!, morphToNID)
            }
        var pronunciationToNID: Map
        PrintStream(FileOutputStream(File(outDir, makeFilename(Names.PRONUNCIATIONS.FILE))), true, StandardCharsets.UTF_8)
            .use {
                pronunciationToNID = generatePronunciations(it, lexes)
            }
        PrintStream(FileOutputStream(File(outDir, makeFilename(Names.LEXES_PRONUNCIATIONS.FILE))), true, StandardCharsets.UTF_8)
            .use {
                generateLexesPronunciations(it, lexes, lexKeyToNID!!, wordToNID!!, pronunciationToNID)
            }
    }

    /**
     * Consume synsets
     *
     * @param outDir  out dir
     * @param synsets synsets
     * @throws FileNotFoundException file not found exception
     */
    @Throws(FileNotFoundException::class)
    private fun synsets(outDir: File, synsets: Collection) {
        PrintStream(FileOutputStream(File(outDir, makeFilename(Names.SYNSETS.FILE))), true, StandardCharsets.UTF_8)
            .use {
                synsetIdToNID = generateSynsets(it, synsets)
            }
        // synsets are generated first, so do not append
        PrintStream(FileOutputStream(File(outDir, makeFilename(Names.SAMPLES.FILE)), false), true, StandardCharsets.UTF_8)
            .use {
                generateSynsetSamples(it, synsets, synsetIdToNID!!)
            }
        PrintStream(FileOutputStream(File(outDir, makeFilename(Names.USAGES.FILE))), true, StandardCharsets.UTF_8)
            .use {
                generateSynsetUsages(it, synsets, synsetIdToNID!!)
            }
        PrintStream(FileOutputStream(File(outDir, makeFilename(Names.SEMRELATIONS.FILE))), true, StandardCharsets.UTF_8)
            .use {
                generateSynsetRelations(it, synsets, synsetIdToNID!!)
            }
        PrintStream(FileOutputStream(File(outDir, makeFilename(Names.ILIS.FILE))), true, StandardCharsets.UTF_8)
            .use {
                generateSynsetIlis(it, synsets, synsetIdToNID!!)
            }
        PrintStream(FileOutputStream(File(outDir, makeFilename(Names.WIKIDATAS.FILE))), true, StandardCharsets.UTF_8)
            .use {
                generateSynsetWikidatas(it, synsets, synsetIdToNID!!)
            }
    }

    /**
     * Consume senses
     *
     * @param outDir     out dir
     * @param senses     senses
     * @param sensesById senses mapped by sensekeys
     * @throws FileNotFoundException file not found exception
     */
    @Throws(FileNotFoundException::class)
    private fun senses(outDir: File, senses: Collection, sensesById: Map) {
        PrintStream(FileOutputStream(File(outDir, makeFilename(Names.SENSES.FILE))), true, StandardCharsets.UTF_8)
            .use {
                generateSenses(it, senses, synsetIdToNID!!, lexKeyToNID!!, wordToNID!!, casedWordToNID!!)
            }
        PrintStream(FileOutputStream(File(outDir, makeFilename(Names.LEXRELATIONS.FILE))), true, StandardCharsets.UTF_8)
            .use {
                generateSenseRelations(it, senses, sensesById, synsetIdToNID!!, lexKeyToNID!!, wordToNID!!)
            }
        // senses are generated second, so append
        PrintStream(FileOutputStream(File(outDir, makeFilename(Names.SAMPLES.FILE)), true), true, StandardCharsets.UTF_8)
            .use {
                generateSensesSamples(it, senses, synsetIdToNID!!, lexKeyToNID!!, wordToNID!!)
            }
        PrintStream(FileOutputStream(File(outDir, makeFilename(Names.SENSES_VFRAMES.FILE))), true, StandardCharsets.UTF_8)
            .use {
                generateSensesVerbFrames(it, senses, synsetIdToNID!!, lexKeyToNID!!, wordToNID!!)
            }
        PrintStream(FileOutputStream(File(outDir, makeFilename(Names.SENSES_ADJPOSITIONS.FILE))), true, StandardCharsets.UTF_8)
            .use {
                generateSensesAdjPositions(it, senses, synsetIdToNID!!, lexKeyToNID!!, wordToNID!!)
            }
    }

    companion object {

        /**
         * Consume builtins
         *
         * @param outDir out dir
         * @throws FileNotFoundException file not found exception
         */
        @Throws(FileNotFoundException::class)
        fun builtins(outDir: File) {
            PrintStream(FileOutputStream(File(outDir, makeFilename(Names.DOMAINS.TABLE))), true, StandardCharsets.UTF_8)
                .use {
                    generateDomains(it)
                }
            PrintStream(FileOutputStream(File(outDir, makeFilename(Names.POSES.FILE))), true, StandardCharsets.UTF_8)
                .use {
                    generatePoses(it)
                }
            PrintStream(FileOutputStream(File(outDir, makeFilename(Names.ADJPOSITIONS.FILE))), true, StandardCharsets.UTF_8)
                .use {
                    generateAdjectivePositionTypes(it)
                }
            PrintStream(FileOutputStream(File(outDir, makeFilename(Names.RELS.FILE))), true, StandardCharsets.UTF_8)
                .use {
                    generateRelationTypes(it)
                }
        }

        /**
         * Make SQL filename
         *
         * @param name name
         * @return filename
         */
        fun makeFilename(name: String): String {
            val fileName = "$name.sql"
            Tracing.psInfo.println(fileName)
            return fileName
        }
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy