I’m working on vosk (Speech to text conversion) library, Implementing in python project.
Vosk Link : https://github.com/alphacep/vosk-api
I have this line of code
import wave
import json
import subprocess
from vosk import Model, KaldiRecognizer, SetLogLevel
import Word as custom_Word
sample_rate=16000
model = Model("model")
rec = KaldiRecognizer(model, sample_rate)
rec.SetWords(True)
model=Model("model")
this line loads a large file into a memory, I want to load this model only once and every other instance can share it.
init.py
import os
import sys
from .vosk_cffi import ffi as _ffi
def open_dll():
dlldir = os.path.abspath(os.path.dirname(__file__))
if sys.platform == 'win32':
# We want to load dependencies too
os.environ["PATH"] = dlldir + os.pathsep + os.environ['PATH']
if hasattr(os, 'add_dll_directory'):
os.add_dll_directory(dlldir)
return _ffi.dlopen(os.path.join(dlldir, "libvosk.dll"))
elif sys.platform == 'linux':
return _ffi.dlopen(os.path.join(dlldir, "libvosk.so"))
elif sys.platform == 'darwin':
return _ffi.dlopen(os.path.join(dlldir, "libvosk.dyld"))
else:
raise TypeError("Unsupported platform")
_c = open_dll()
#model=None
#@staticmethod
class Model(object):
def __init__(self, model_path):
super().__init__()
self._handle = _c.vosk_model_new(model_path.encode('utf-8'))
if self._handle == _ffi.NULL:
raise Exception("Failed to create a model")
def __del__(self):
_c.vosk_model_free(self._handle)
def vosk_model_find_word(self, word):
return _c.vosk_model_find_word(self._handle, word.encode('utf-8'))
#def loadModel(self,model_path):
class SpkModel(object):
def __init__(self, model_path):
self._handle = _c.vosk_spk_model_new(model_path.encode('utf-8'))
if self._handle == _ffi.NULL:
raise Exception("Failed to create a speaker model")
def __del__(self):
_c.vosk_spk_model_free(self._handle)
class KaldiRecognizer(object):
def __init__(self, *args):
if len(args) == 2:
self._handle = _c.vosk_recognizer_new(args[0]._handle, args[1])
elif len(args) == 3 and type(args[2]) is SpkModel:
self._handle = _c.vosk_recognizer_new_spk(args[0]._handle, args[1], args[2]._handle)
elif len(args) == 3 and type(args[2]) is str:
self._handle = _c.vosk_recognizer_new_grm(args[0]._handle, args[1], args[2].encode('utf-8'))
else:
raise TypeError("Unknown arguments")
if self._handle == _ffi.NULL:
raise Exception("Failed to create a recognizer")
def __del__(self):
_c.vosk_recognizer_free(self._handle)
def SetMaxAlternatives(self, max_alternatives):
_c.vosk_recognizer_set_max_alternatives(self._handle, max_alternatives)
def SetWords(self, enable_words):
_c.vosk_recognizer_set_words(self._handle, 1 if enable_words else 0)
def SetSpkModel(self, spk_model):
_c.vosk_recognizer_set_spk_model(self._handle, spk_model._handle)
def AcceptWaveform(self, data):
res = _c.vosk_recognizer_accept_waveform(self._handle, data, len(data))
if res < 0:
raise Exception("Failed to process waveform")
return res
def Result(self):
return _ffi.string(_c.vosk_recognizer_result(self._handle)).decode('utf-8')
def PartialResult(self):
return _ffi.string(_c.vosk_recognizer_partial_result(self._handle)).decode('utf-8')
def FinalResult(self):
return _ffi.string(_c.vosk_recognizer_final_result(self._handle)).decode('utf-8')
def Reset(self):
return _c.vosk_recognizer_reset(self._handle)
def SetLogLevel(level):
return _c.vosk_set_log_level(level)
def GpuInit():
_c.vosk_gpu_init()
def GpuThreadInit():
_c.vosk_gpu_thread_init()