stt-google-speech.20.9.1.source-code.GoogleSpeechClient.kt Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of tock-stt-google-speech Show documentation
Show all versions of tock-stt-google-speech Show documentation
Google Speech STT implementation
/*
* Copyright (C) 2017/2020 e-voyageurs technologies
*
* Licensed under the Apache License, Version 2.0 (the "License")
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ai.tock.stt.google
import ai.tock.shared.error
import ai.tock.stt.STT
import com.google.cloud.speech.v1.RecognitionAudio
import com.google.cloud.speech.v1.RecognitionConfig
import com.google.cloud.speech.v1.SpeechClient
import com.google.protobuf.ByteString
import mu.KotlinLogging
import ws.schild.jave.AudioAttributes
import ws.schild.jave.Encoder
import ws.schild.jave.EncodingAttributes
import ws.schild.jave.MultimediaObject
import java.io.File
import java.nio.file.Files
import java.util.Locale
/**
*
*/
internal object GoogleSpeechClient : STT {
private val logger = KotlinLogging.logger {}
private fun parseUnknown(sourceBytes: ByteArray): ByteArray {
val encoder = Encoder()
val a = EncodingAttributes()
val audioA = AudioAttributes()
audioA.setChannels(1)
a.audioAttributes = audioA
a.format = "flac"
val sourceFile = File.createTempFile("tock-", ".unknown")
val targetFile = File.createTempFile("tock-", ".flac")
return try {
Files.write(sourceFile.toPath(), sourceBytes)
encoder.encode(
MultimediaObject(sourceFile),
targetFile,
a
)
Files.readAllBytes(targetFile.toPath())
} finally {
sourceFile.delete()
targetFile.delete()
}
}
override fun parse(bytes: ByteArray, language: Locale): String? =
try {
SpeechClient.create().use { speechClient ->
val config = RecognitionConfig.newBuilder()
.setEncoding(
RecognitionConfig.AudioEncoding.FLAC
)
.setLanguageCode(language.toString())
.build()
val audio = RecognitionAudio.newBuilder()
.setContent(
ByteString.copyFrom(parseUnknown(bytes))
)
.build()
val response = speechClient.recognize(config, audio)
logger.info { response }
response.getResults(0).getAlternatives(0).transcript
}
} catch (e: Exception) {
logger.error(e)
null
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy