9 Commits

Author SHA1 Message Date
Dmitrium12 a2375e6daf modify project structure and add download_models.py 2024-05-01 13:35:00 +07:00
Dmitrium12 8504126d4a Merge pull request 'add_noisereduce' (#2) from add_noisereduce into master
Reviewed-on: #2
2024-05-01 10:38:58 +07:00
Dmitrium12 4dc4ffa58c micro fix 2024-04-30 08:46:16 +07:00
Dmitrium12 26b38811cc micro fix 2024-04-30 08:46:06 +07:00
Dmitrium12 1d63461e35 add testing noisereduce 2024-04-30 08:31:33 +07:00
Dmitrium12 5f1e2e67a4 add command and add its in ollama functions 2024-04-29 17:58:53 +07:00
Dmitrium12 c23b1d42bf add linter and test ollama in python 2024-04-29 15:31:01 +07:00
Dmitrium12 b2866d073c .gitignore 2024-01-09 22:10:57 +07:00
Dmitrium12 a889c68e40 starting 2024-01-09 22:10:13 +07:00
20 changed files with 67 additions and 9 deletions
+2 -2
View File
@@ -6,8 +6,8 @@ __pycache__/
*$py.class *$py.class
# Custom # Custom
model_small/ data/model_small/
model_large/ data/model_large/
# C extensions # C extensions
*.so *.so
+1 -1
View File
@@ -1,5 +1,5 @@
VA_ALIAS = ('джарвис',) VA_ALIAS = ('джарвис',)
VA_TBR = ('скажи', 'покажи', 'ответь', 'произнеси', 'расскажи', 'сколько', 'слушай') VA_TBR = ('скажи', 'покажи', 'ответь', 'произнеси', 'расскажи', 'сколько', 'слушай')
MODEL_NAME = "vosk-model-small-ru-0.22" # vosk-model-ru-0.42
MICROPHONE_INDEX = -1 MICROPHONE_INDEX = -1
PICOVOICE_TOKEN = "4xbwaZwZmSHeTiowFl5Rgqsc8CR4FKGV8YueJUlR4Zt2e1kB64IDcA==" PICOVOICE_TOKEN = "4xbwaZwZmSHeTiowFl5Rgqsc8CR4FKGV8YueJUlR4Zt2e1kB64IDcA=="
OPENAI_TOKEN = "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"
+1 -1
View File
@@ -1,4 +1,4 @@
from Jarvis import Jarvis from modules.Jarvis import Jarvis
def main(): def main():
+5 -4
View File
@@ -10,21 +10,22 @@ import yaml
from fuzzywuzzy import fuzz from fuzzywuzzy import fuzz
from pvrecorder import PvRecorder from pvrecorder import PvRecorder
import config from data import config
from utils import execute_cmd, play from utils import download_models, execute_cmd, play
class Jarvis: class Jarvis:
def __init__(self): def __init__(self):
download_models.install_vosk_model()
self.recorder = None self.recorder = None
self.CDIR = os.getcwd() self.CDIR = os.getcwd()
self.VA_CMD_LIST = yaml.safe_load(open('commands.yaml', encoding='utf8')) self.VA_CMD_LIST = yaml.safe_load(open('data/commands.yaml', encoding='utf8'))
self.porcupine = pvporcupine.create( self.porcupine = pvporcupine.create(
access_key=config.PICOVOICE_TOKEN, access_key=config.PICOVOICE_TOKEN,
keywords=['jarvis'], keywords=['jarvis'],
sensitivities=[1] sensitivities=[1]
) )
self.kaldi_rec = vosk.KaldiRecognizer(vosk.Model("model_large"), 16000) self.kaldi_rec = vosk.KaldiRecognizer(vosk.Model("data/model_small"), 16000)
def main(self): def main(self):
self.recorder = PvRecorder( self.recorder = PvRecorder(
+26
View File
@@ -0,0 +1,26 @@
import os
import sys
from data import config
def install_vosk_model():
try:
open('data/model_small/README')
except Exception as e:
print(e)
if sys.platform == "linux" or sys.platform == "linux2":
os.system(f"wget https://alphacephei.com/vosk/models/{config.MODEL_NAME}.zip")
os.system(f"unzip {config.MODEL_NAME}.zip")
os.system(f"mv {config.MODEL_NAME} data/model_small")
os.system(f"rm -rf {config.MODEL_NAME}.zip")
elif sys.platform == "darwin":
os.system(f"curl https://alphacephei.com/vosk/models/{config.MODEL_NAME}.zip")
os.system(f"unzip {config.MODEL_NAME}.zip")
os.system(f"mv {config.MODEL_NAME} data/model_small")
os.system(f"rm -rf {config.MODEL_NAME}.zip")
elif sys.platform == "win32":
os.system(f"curl https://alphacephei.com/vosk/models/{config.MODEL_NAME}.zip --output 1.zip")
os.system('powershell -command "Expand-Archive 1.zip ./"')
os.system(f"rename {config.MODEL_NAME} data/model_small")
os.system("del /s /q 1.zip")
+1 -1
View File
@@ -15,7 +15,7 @@ def play(self, phrase, wait_done=True):
if wait_done: if wait_done:
self.recorder.stop() self.recorder.stop()
if filename: if filename:
wave_obj = sa.WaveObject.from_wave_file(f"{self.CDIR}/sound/{filename}") wave_obj = sa.WaveObject.from_wave_file(f"{self.CDIR}/data/sound/{filename}")
play_obj = wave_obj.play() play_obj = wave_obj.play()
if wait_done: if wait_done:
play_obj.wait_done() play_obj.wait_done()
Executable
+31
View File
@@ -0,0 +1,31 @@
import time
import sounddevice as sd
import torch
language = 'ru'
model_id = 'ru_v3'
sample_rate = 48000 # 48000
speaker = 'aidar' # aidar, baya, kseniya, xenia, random
put_accent = True
put_yo = True
device = torch.device('cpu') # cpu или gpu
text = "Хауди Хо, друзья!!!"
model, _ = torch.hub.load(repo_or_dir='snakers4/silero-models',
model='silero_tts',
language=language,
speaker=model_id)
model.to(device)
def va_speak(what: str):
audio = model.apply_tts(text=what + "..",
speaker=speaker,
sample_rate=sample_rate,
put_accent=put_accent,
put_yo=put_yo)
sd.play(audio, sample_rate * 1.05)
time.sleep((len(audio) / sample_rate) + 0.5)
sd.stop()