All Downloads are FREE. Search and download functionalities are using the official Maven repository.

stt-google-speech.23.3.4.source-code.GoogleSpeechClient.kt Maven / Gradle / Ivy

/*
 * Copyright (C) 2017/2021 e-voyageurs technologies
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package ai.tock.stt.google

import ai.tock.shared.error
import ai.tock.stt.STT
import com.google.cloud.speech.v1.RecognitionAudio
import com.google.cloud.speech.v1.RecognitionConfig
import com.google.cloud.speech.v1.SpeechClient
import com.google.protobuf.ByteString
import mu.KotlinLogging
import ws.schild.jave.Encoder
import ws.schild.jave.MultimediaObject
import java.io.File
import java.nio.file.Files
import java.util.Locale
import ws.schild.jave.encode.AudioAttributes
import ws.schild.jave.encode.EncodingAttributes

/**
 *
 */
internal object GoogleSpeechClient : STT {

    private val logger = KotlinLogging.logger {}

    private fun parseUnknown(sourceBytes: ByteArray): ByteArray {
        val encoder = Encoder()
        val a = EncodingAttributes()
        val audioA = AudioAttributes()
        audioA.setChannels(1)
        a.setAudioAttributes(audioA)
        a.setInputFormat("flac")
        val sourceFile = File.createTempFile("tock-", ".unknown")
        val targetFile = File.createTempFile("tock-", ".flac")
        return try {
            Files.write(sourceFile.toPath(), sourceBytes)
            encoder.encode(
                MultimediaObject(sourceFile),
                targetFile,
                a
            )
            Files.readAllBytes(targetFile.toPath())
        } finally {
            sourceFile.delete()
            targetFile.delete()
        }
    }

    override fun parse(bytes: ByteArray, language: Locale): String? =
        try {
            SpeechClient.create().use { speechClient ->

                val config = RecognitionConfig.newBuilder()
                    .setEncoding(
                        RecognitionConfig.AudioEncoding.FLAC
                    )
                    .setLanguageCode(language.toString())
                    .build()
                val audio = RecognitionAudio.newBuilder()
                    .setContent(
                        ByteString.copyFrom(parseUnknown(bytes))
                    )
                    .build()
                val response = speechClient.recognize(config, audio)
                logger.info { response }
                response.getResults(0).getAlternatives(0).transcript
            }
        } catch (e: Exception) {
            logger.error(e)
            null
        }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy