org.oewntk.sql.out.Lexes.kt Maven / Gradle / Ivy
/*
* Copyright (c) 2024. Bernard Bou.
*/
package org.oewntk.sql.out
import org.oewntk.model.*
import java.io.PrintStream
/**
* Process lexes
*/
@Suppress("KotlinConstantConditions")
object Lexes {
// lexes
/**
* Make lex-to-NID map
*/
fun makeLexesNIDs(
lexes: Collection,
): Map {
return lexes
.asSequence()
.map { Key.KeyLCP.of_t(it) }
.sorted()
.withIndex()
.associate { it.value to it.index + 1 } // map(of_t(lex), nid)
}
/**
* Generate lexes table
*
* @param ps print stream
* @param lexes lexes
* @param wordToNID id-to-nid map for words
* @param casedwordToNID id-to-nid map for cased words
* @return lex_key-to-nid map
*/
fun generateLexes(
ps: PrintStream,
lexes: Collection,
wordToNID: Map,
casedwordToNID: Map,
): Map {
// lex key to NID
val lexKeyToNID: Map = makeLexesNIDs(lexes)
// insert map
val columns = listOf(
Names.LEXES.luid,
Names.LEXES.posid,
Names.LEXES.wordid,
Names.LEXES.casedwordid
).joinToString(",")
val toSqlRow = { lex: Lex ->
val word = lex.lCLemma
val wordNID = NIDMaps.lookupLC(wordToNID, word)
val casedWordNID = NIDMaps.lookupNullable(casedwordToNID, lex.lemma)
val type = lex.type
"'$type',$wordNID,$casedWordNID"
}
if (!Printers.WITH_COMMENT) {
Printers.printInsert(ps, Names.LEXES.TABLE, columns, lexes, lexKeyToNID, toSqlRow)
} else {
val toSqlRowWithComment = { lex: Lex -> toSqlRow.invoke(lex) to "${lex.type} '${lex.lemma}'" }
Printers.printInsertWithComment(ps, Names.LEXES.TABLE, columns, lexes, lexKeyToNID, toSqlRowWithComment)
}
return lexKeyToNID
}
// words
/**
* Make word-to-NID map
*
* @param lexes lexes
* @return word-to-nid map
*/
fun makeWordNIDs(lexes: Collection): Map {
// stream of words
val map = lexes
.asSequence()
.map(Lex::lCLemma)
.distinct()
.sorted()
.withIndex()
.associate { it.value to it.index + 1 }
assert(map.values.none { it == 0 })
return map
}
/**
* Generate words table
*
* @param ps print stream
* @param lexes lexes
* @return word-to-nid map
*/
fun generateWords(ps: PrintStream, lexes: Collection): Map {
// make word-to-nid map
val wordToNID = makeWordNIDs(lexes)
// insert map
val columns = listOf(
Names.WORDS.wordid,
Names.WORDS.word
).joinToString(",")
val toSqlRow = { lemma: Lemma -> "'${Utils.escape(lemma)}'" }
Printers.printInsert(ps, Names.WORDS.TABLE, columns, wordToNID, toSqlRow)
return wordToNID
}
// cased words
/**
* Make cased_word-to-NID map
*
* @param lexes lexes
* @return cased_word-to-nid map
*/
fun makeCasedWordNIDs(lexes: Collection): Map {
val map = lexes
.asSequence()
.filter(Lex::isCased)
.map { it.lemma }
.distinct()
.sorted()
.withIndex()
.associate { it.value to it.index + 1 }
assert(map.values.none { it == 0 })
return map
}
/**
* Generate cased word table
*
* @param ps print stream
* @param lexes lexes
* @param wordIdToNID word-to-nid map
* @return cased_word-to-nid map
*/
fun generateCasedWords(
ps: PrintStream,
lexes: Collection,
wordIdToNID: Map,
): Map {
// make casedword-to-nid map
val casedWordToNID = makeCasedWordNIDs(lexes)
// insert map
val columns = listOf(
Names.CASEDWORDS.casedwordid,
Names.CASEDWORDS.casedword,
Names.CASEDWORDS.wordid
).joinToString(",")
val toSqlRow = { casedWord: Lemma ->
val nid = NIDMaps.lookupLC(wordIdToNID, casedWord.lowercase())
"'${Utils.escape(casedWord)}',$nid"
}
Printers.printInsert(ps, Names.CASEDWORDS.TABLE, columns, casedWordToNID, toSqlRow)
return casedWordToNID
}
// morphs
/**
* Make morphs-to-NID map
*
* @param lexes lexes
* @return morph-to-nid map
*/
fun makeMorphNIDs(lexes: Collection): Map {
return lexes
.asSequence()
.filter { it.forms != null && it.forms!!.isNotEmpty() }
.flatMap { it.forms!!.asSequence() }
.sorted()
.distinct()
.withIndex()
.associate { it.value to it.index + 1 }
}
/**
* Generate morphs table
*
* @param ps print stream
* @param lexes lexes
* @return morph-to-nid map
*/
fun generateMorphs(ps: PrintStream, lexes: Collection): Map {
// make morph-to-nid map
val morphToNID = makeMorphNIDs(lexes)
// insert map
val columns = listOf(
Names.MORPHS.morphid,
Names.MORPHS.morph
).joinToString(",")
val toSqlRow = { morph: Morph -> "'${Utils.escape(morph)}'" }
Printers.printInsert(ps, Names.MORPHS.TABLE, columns, morphToNID, toSqlRow)
return morphToNID
}
/**
* Generate lexes-pronunciations mappings
*
* @param ps print stream
* @param lexes lexes
* @param lexKeyToNID lex_key-to-nid map
* @param wordToNID word-to-nid map
* @param morphToNID morph-to-nid map
*/
fun generateLexesMorphs(
ps: PrintStream,
lexes: Collection,
lexKeyToNID: Map,
wordToNID: Map,
morphToNID: Map,
) {
// stream of lexes
val lexSeq = lexes
.asSequence()
.filter { it.forms != null && it.forms!!.isNotEmpty() }
.sortedBy { it.lemma }
// insert map
val columns = listOf(
Names.LEXES_MORPHS.morphid,
Names.LEXES_MORPHS.luid,
Names.LEXES_MORPHS.wordid,
Names.LEXES_MORPHS.posid
).joinToString(",")
val toSqlRows = { lex: Lex ->
val wordNID = NIDMaps.lookupLC(wordToNID, lex.lCLemma)
val lexNID = NIDMaps.lookup(lexKeyToNID, Key.KeyLCP.of_t(lex))
lex.forms!!
.map {
val morphNID = NIDMaps.lookup(morphToNID, it)
"$morphNID,$lexNID,$wordNID,'${lex.type}'"
}
}
if (!Printers.WITH_COMMENT) {
Printers.printInserts(ps, Names.LEXES_MORPHS.TABLE, columns, lexSeq, toSqlRows, false)
} else {
val toSqlRowsWithComments = { lex: Lex ->
val rows = toSqlRows.invoke(lex)
val comments = lex.forms!!
.asSequence()
.map { "'$it' '${lex.lemma}' ${lex.type}" }
rows
.asSequence()
.zip(comments)
}
Printers.printInsertsWithComment(ps, Names.LEXES_MORPHS.TABLE, columns, lexSeq, toSqlRowsWithComments, false)
}
}
// pronunciations
/**
* Make pronunciation(values)-to-NID map
*
* @param lexes lexes
* @return pronunciation-to-nid map
*/
fun makePronunciationNIDs(lexes: Collection): Map {
return lexes
.asSequence()
.filter { it.pronunciations != null && it.pronunciations!!.isNotEmpty() }
.flatMap { it.pronunciations!!.asSequence() }
.map { it.value }
.sorted()
.distinct()
.withIndex()
.associate { it.value to it.index + 1 }
}
/**
* Generate pronunciations table
*
* @param ps print stream
* @param lexes lexes
* @return pronunciation-to-nid
*/
fun generatePronunciations(ps: PrintStream, lexes: Collection): Map {
// make pronunciation_value-to-nid map
val pronunciationValueToNID = makePronunciationNIDs(lexes)
// insert map
val columns = listOf(
Names.PRONUNCIATIONS.pronunciationid,
Names.PRONUNCIATIONS.pronunciation
).joinToString(",")
val toSqlRow = { pronunciationValue: PronunciationValue -> "'${Utils.escape(pronunciationValue)}'" }
Printers.printInsert(ps, Names.PRONUNCIATIONS.TABLE, columns, pronunciationValueToNID, toSqlRow)
return pronunciationValueToNID
}
/**
* Generate lexes-pronunciations mappings
*
* @param ps print stream
* @param lexes lexes
* @param lexKeyToNID lex_key-to-nid map
* @param wordToNID word-to-nid map
* @param pronunciationToNID pronunciation-to-nid
*/
fun generateLexesPronunciations(
ps: PrintStream,
lexes: Collection,
lexKeyToNID: Map,
wordToNID: Map,
pronunciationToNID: Map,
) {
// stream of lexes
val lexSeq = lexes
.asSequence()
.filter { it.pronunciations != null && it.pronunciations!!.isNotEmpty() }
.sortedBy { it.lemma }
// insert map
val columns = listOf(
Names.LEXES_PRONUNCIATIONS.pronunciationid,
Names.LEXES_PRONUNCIATIONS.variety,
Names.LEXES_PRONUNCIATIONS.luid,
Names.LEXES_PRONUNCIATIONS.wordid,
Names.LEXES_PRONUNCIATIONS.posid
).joinToString(",")
val toSqlRows = { lex: Lex ->
val wordNID = NIDMaps.lookupLC(wordToNID, lex.lCLemma)
val lexNID = NIDMaps.lookup(lexKeyToNID, Key.KeyLCP.of_t(lex))
lex.pronunciations!!
.map {
val variety = if (it.variety == null) "NULL" else "'${it.variety}'"
val pronunciationNID = NIDMaps.lookup(pronunciationToNID, it.value)
"$pronunciationNID,$variety,$lexNID,$wordNID,'${lex.type}'"
}
.toList()
}
if (!Printers.WITH_COMMENT) {
Printers.printInserts(ps, Names.LEXES_PRONUNCIATIONS.TABLE, columns, lexSeq, toSqlRows, false)
} else {
val toSqlRowsWithComments = { lex: Lex ->
val rows = toSqlRows.invoke(lex)
val comments = lex.pronunciations!!
.asSequence()
.map {
val variety = if (it.variety == null) "" else " [${it.variety}]"
"${it.value}$variety '${lex.lemma}' ${lex.type}"
}
rows
.asSequence()
.zip(comments)
}
Printers.printInsertsWithComment(ps, Names.LEXES_PRONUNCIATIONS.TABLE, columns, lexSeq, toSqlRowsWithComments, false)
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy