18 Commits

Author SHA1 Message Date
Dmitrium12 727376f495 fix download dataset 2024-05-18 19:56:30 +07:00
Dmitrium12 9b7ca85831 add test download dataset 2024-05-18 11:10:43 +07:00
Dmitrium12 197dc50529 vrem ton working 2024-05-05 19:56:01 +07:00
Dmitrium12 e6c7006f1f Merge pull request 'add comment in all function and class< and use .env file' (#5) from update_function_docs_fixes into master
Reviewed-on: #5
2024-05-05 10:30:17 +07:00
Dmitrium12 9d08a7eb85 add comment in all function and class< and use .env file 2024-05-05 10:28:21 +07:00
Dmitrium12 49946322bb Merge pull request 'add_home_assistant_connect' (#4) from add_home_assistant_connect into master
Reviewed-on: #4
2024-05-04 20:14:48 +07:00
Dmitrium12 aa639ffae9 add HomeAssistant get info on entity 2024-05-04 19:28:36 +07:00
Dmitrium12 3d12032942 add HomeAssistant module and use execute command 2024-05-02 12:55:27 +07:00
Dmitrium12 06b70afdce Merge pull request 'modify project structure and add download_models.py' (#3) from census_of_the_project_structure into master
Reviewed-on: #3
2024-05-01 14:15:59 +07:00
Dmitrium12 91cd9b02df modify project structure and add download_models.py 2024-05-01 13:35:00 +07:00
Dmitrium12 ba52d86754 Merge pull request 'add_noisereduce' (#2) from add_noisereduce into master
Reviewed-on: #2
2024-05-01 10:38:58 +07:00
Dmitrium12 4a0155413e micro fix 2024-04-30 08:46:16 +07:00
Dmitrium12 6a6efe8dd6 micro fix 2024-04-30 08:46:06 +07:00
Dmitrium12 08b6f95a67 add testing noisereduce 2024-04-30 08:31:33 +07:00
Dmitrium12 d2b60b53c4 add command and add its in ollama functions 2024-04-29 17:58:53 +07:00
Dmitrium12 a7abfe44b4 add linter and test ollama in python 2024-04-29 15:31:01 +07:00
Dmitrium12 3fb31e60ea .gitignore 2024-01-09 22:10:57 +07:00
Dmitrium12 c03811cb87 starting 2024-01-09 22:10:13 +07:00
29 changed files with 4003 additions and 117 deletions
+10
View File
@@ -0,0 +1,10 @@
VA_ALIAS='("джарвис",)'
VA_TBR='("скажи", "покажи", "ответь", "произнеси", "расскажиv, "сколько", "слушай")'
VOSK_MODEL_NAME='vosk-model-small-ru-0.22' # vosk-model-ru-0.42
MICROPHONE_INDEX=-1
PICOVOICE_TOKEN='token'
# home assistant
HOME_ASSISTANT_URL='http://localhost:8123/api'
HOME_ASSISTANT_TOKEN=''
+5 -1
View File
@@ -6,7 +6,11 @@ __pycache__/
*$py.class *$py.class
# Custom # Custom
model_small/ data/model_small/
data/model_large/
data/v4_ru.pt
MyTTSDataset/
vocal.wav
# C extensions # C extensions
*.so *.so
-5
View File
@@ -1,5 +0,0 @@
VA_ALIAS = ('джарвис',)
VA_TBR = ('скажи', 'покажи', 'ответь', 'произнеси', 'расскажи', 'сколько', 'слушай')
MICROPHONE_INDEX = -1
PICOVOICE_TOKEN = "4xbwaZwZmSHeTiowFl5Rgqsc8CR4FKGV8YueJUlR4Zt2e1kB64IDcA=="
OPENAI_TOKEN = "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"
+7
View File
@@ -72,3 +72,10 @@ weather:
- возможен дождь сегодня? - возможен дождь сегодня?
- прогноз погоды на сегодня - прогноз погоды на сегодня
- погода - погода
home_assistant_execute:
- включи телевизор
- выключи телевизор
- начни уборку
- убрать мою комнату
home_assistant_get:
- тест
Executable
+15
View File
@@ -0,0 +1,15 @@
import environs
env = environs.Env()
env.read_env()
VA_ALIAS = env.str("VA_ALIAS")
VA_TBR = env.str("VA_TBR")
VOSK_MODEL_NAME = env.str("VOSK_MODEL_NAME")
MICROPHONE_INDEX = env.int("MICROPHONE_INDEX")
PICOVOICE_TOKEN = env.str("PICOVOICE_TOKEN")
# home assistant
HOME_ASSISTANT_URL = env.str("HOME_ASSISTANT_URL")
HOME_ASSISTANT_TOKEN = env.str("HOME_ASSISTANT_TOKEN")
+4
View File
@@ -0,0 +1,4 @@
пылесос:
- entity_id:vacuum.roborock_vacuum_m1s
- state:находится в
- attributes.battery_level:а его уровень зарядки
+1 -1
View File
@@ -1,4 +1,4 @@
from Jarvis import Jarvis from modules.Jarvis import Jarvis
def main(): def main():
+97
View File
@@ -0,0 +1,97 @@
import requests
import yaml
from fuzzywuzzy import process
from requests import Response
from data import config
class HomeAssistant:
"""
Модуль home assistant для работы с его api
"""
def __init__(self):
self.url = "http://192.168.0.112:9999/api"
self.token = config.HOME_ASSISTANT_TOKEN
self.HA_CMD_LIST = yaml.safe_load(open('data/home_assistant_entities.yaml', encoding='utf8'))
def get_info(self, state: str) -> Response:
"""
Функция для получения информации о заданном entity
:param state: str - объект в home assistant информацию о котором надо узнать
:return: Response - ответ от сервера api
"""
response = requests.get(
url=f"{self.url}/states",
headers={
"Authorization": "Bearer " + self.token
}
)
for entity in response.json():
if entity["entity_id"] == state:
return entity
return response
def send_process(self, command: str = "выключи телевизор") -> bool:
"""
Функция для отправки запроса о выполнении команды к api
:param command: str - команда в виде строки
:return: bool - удачная ли отправка запроса к api
"""
response = requests.post(
url=f"{self.url}/services/conversation/process",
json={"text": command},
headers={
"Authorization": "Bearer " + self.token,
"content-type": "application/json"
},
)
if response.status_code == 200:
return True
return False
def voice_to_name(self, voice: str) -> str:
"""
Функция для неточного сравнивания входной строки голоса
и списка устройств дял которых можно узнать информацию
:param voice: str - распознанная фраза без проверки по списку
:return: str - найденный объект для получения информации
"""
words = voice.lower().split()
best_match = None
highest_score = 0
for word in words:
result, score = process.extractOne(word, self.HA_CMD_LIST.keys())
if score > highest_score:
highest_score = score
best_match = result
return best_match
def validate_info(self, name: str) -> str:
"""
Функция для получения готовой строки информации entity по его имени.
Эта строка готова для произношения
:param name: str - имя entity для нахождения информации о нём
:return: str - готовая строка для найденного по имени объекта для её произношения
"""
answer = name
entity_config = self.HA_CMD_LIST.get(name)
if entity_config:
# Создание словаря, разделяя каждый элемент конфигурации на ключ и значение
entity_details = {item.split(':')[0]: item.split(':')[1] for item in entity_config}
entity_id = entity_details.pop("entity_id", "robot")
if entity_id:
responses = self.get_info(entity_id)
for attribute_path, label in entity_details.items():
response = responses
try:
for attribute in attribute_path.split("."):
response = response[attribute]
answer += f" {label} {response}"
except KeyError:
continue
return answer
+35 -8
View File
@@ -3,27 +3,34 @@ import os
import struct import struct
import time import time
import noisereduce as nr
import pvporcupine import pvporcupine
import vosk import vosk
import yaml import yaml
from fuzzywuzzy import fuzz from fuzzywuzzy import fuzz
from pvrecorder import PvRecorder from pvrecorder import PvRecorder
import config from data import config
from utils import execute_cmd, play from modules import HomeAssistant
from utils import download_models, execute_cmd, play
class Jarvis: class Jarvis:
"""
Это основной модуль голосового ассистента
"""
def __init__(self): def __init__(self):
download_models.install_vosk_model()
self.recorder = None self.recorder = None
self.CDIR = os.getcwd() self.CDIR = os.getcwd()
self.VA_CMD_LIST = yaml.safe_load(open('commands.yaml', encoding='utf8')) self.VA_CMD_LIST = yaml.safe_load(open('data/commands.yaml', encoding='utf8'))
self.home_assistant = HomeAssistant.HomeAssistant()
self.porcupine = pvporcupine.create( self.porcupine = pvporcupine.create(
access_key=config.PICOVOICE_TOKEN, access_key=config.PICOVOICE_TOKEN,
keywords=['jarvis'], keywords=['jarvis'],
sensitivities=[1] sensitivities=[1]
) )
self.kaldi_rec = vosk.KaldiRecognizer(vosk.Model("model_small"), 32000) self.kaldi_rec = vosk.KaldiRecognizer(vosk.Model("data/model_small"), 16000)
def main(self): def main(self):
self.recorder = PvRecorder( self.recorder = PvRecorder(
@@ -37,7 +44,12 @@ class Jarvis:
while True: while True:
try: try:
pcm = self.recorder.read() pcm = self.recorder.read()
if self.porcupine.process(pcm) >= 0: reduced_audio = nr.reduce_noise(
y=pcm,
sr=16000,
prop_decrease=0.6
)
if self.porcupine.process(reduced_audio) >= 0:
self.recorder.stop() self.recorder.stop()
self.play("greet", True) self.play("greet", True)
self.recorder.start() self.recorder.start()
@@ -53,7 +65,13 @@ class Jarvis:
print(f"Unexpected {err=}, {type(err)=}") print(f"Unexpected {err=}, {type(err)=}")
raise raise
def va_respond(self, voice: str): def va_respond(self, voice: str) -> bool:
"""
Функция предсказывает команду
:param voice: str - распознанная строка
:return: bool - распознана или нет команда
"""
print(f"Распознано: {voice}") print(f"Распознано: {voice}")
for x in config.VA_ALIAS + config.VA_TBR: for x in config.VA_ALIAS + config.VA_TBR:
voice = voice.replace(x, "").strip() voice = voice.replace(x, "").strip()
@@ -64,6 +82,7 @@ class Jarvis:
if vrt > rc['percent']: if vrt > rc['percent']:
rc['cmd'] = c rc['cmd'] = c
rc['percent'] = vrt rc['percent'] = vrt
rc['recognized_phrase'] = x
if len(rc['cmd'].strip()) <= 0: if len(rc['cmd'].strip()) <= 0:
return False return False
elif rc['percent'] < 70 or rc['cmd'] not in self.VA_CMD_LIST.keys(): elif rc['percent'] < 70 or rc['cmd'] not in self.VA_CMD_LIST.keys():
@@ -71,8 +90,16 @@ class Jarvis:
time.sleep(1) time.sleep(1)
return False return False
else: else:
execute_cmd.execute_cmd(self, rc['cmd']) execute_cmd.execute_cmd(self, rc['cmd'], rc['recognized_phrase'], voice)
return True return True
def play(self, phrase, wait_done=True): def play(self, phrase: str, wait_done: bool = True):
"""
Функция для запуска голосовой команды
:param self: modules.Jarvis - объект основного модуля
:param phrase: str - фраза для запуска голосовой команды
:param wait_done: bool - нужно-ли ждать окончания фразы
:return:
"""
play.play(self, phrase, wait_done) play.play(self, phrase, wait_done)
Generated
+3593 -11
View File
File diff suppressed because it is too large Load Diff
+5 -1
View File
@@ -6,7 +6,7 @@ authors = ["dmitrium12 <belicdima8@gmail.com>"]
readme = "README.md" readme = "README.md"
[tool.poetry.dependencies] [tool.poetry.dependencies]
python = "^3.11" python = ">=3.11,<3.12"
vosk = "^0.3.45" vosk = "^0.3.45"
pvporcupine = "^3.0.1" pvporcupine = "^3.0.1"
pvrecorder = "^1.2.1" pvrecorder = "^1.2.1"
@@ -26,6 +26,10 @@ torch = "^2.1.1+cpu"
torchaudio = "^2.1.1+cpu" torchaudio = "^2.1.1+cpu"
ollama = "^0.1.6" ollama = "^0.1.6"
ruff = "^0.4.2" ruff = "^0.4.2"
noisereduce = "^3.0.2"
environs = "^11.0.0"
webrtcvad = "^2.0.10"
tts = "^0.22.0"
[[tool.poetry.source]] [[tool.poetry.source]]
+48 -60
View File
@@ -1,67 +1,55 @@
import asyncio
import re import re
import ollama import requests
import yaml from bs4 import BeautifulSoup
def load_commands(file_path): def filter_string(input_string: str) -> str:
with open(file_path) as file: allowed_chars = []
data = yaml.safe_load(file) for j in "АБВГДЕЁЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдеёжзийклмнопрстуфхцчшщъыьэюя1234567890 !,.?-":
answers = {} allowed_chars.append(j)
command_mapping = {} input_string = re.sub(r'^\d+.\s+', '', input_string)
idx = 1 return ''.join([char for char in input_string if char in allowed_chars])
for category, items in data.items():
answers[idx] = items
command_mapping[idx] = category
idx += 1
return answers, command_mapping
async def chat(answers: dict[int, str], request: str = 'музычку пожалуйста') -> str: repetition = 0
answers_str = ''.join([f'{key}. {", ".join(value)}\n' for key, value in answers.items()]) response = {}
messages = [ soup = BeautifulSoup(
{ requests.get('https://theportalwiki.com/wiki/GLaDOS_voice_lines/ru').text,
'role': 'system', features='html.parser'
'content': 'Пожалуйста, просмотрите список доступных команд и '
'выберите подходящую команду, указав ее номер. '
'Вы можете выбрать одну команду или несколько команд одновременно. '
'В ответе укажите только номер или номера команд, '
'например: "1" или "1, 3, 5". Вот список доступных команд:\n' + answers_str
},
{
'role': 'user',
'content': request
}
]
async_client = ollama.AsyncClient()
response = await async_client.chat(
model='llama3:8b',
messages=messages,
options={
'temperature': 0.5,
'mirostat_tau': 100.0,
'repeat_last_n': 2,
'num_predict': 20
}
) )
return response.get("message").get("content") for li in soup.find_all('li'):
try:
i = li.find('i').text
async def main(): url = li.find('span', class_=['audio-player']).find('a')['href']
answers, command_mapping = load_commands('commands.yaml') if i not in response.keys():
number = None response[i] = url
while not number: else:
response_content = await chat(answers, "останови музыку и скажи погоду") repetition += 1
number = [ except AttributeError:
int(i) pass
for i in re.findall(r'\d+', response_content) try:
if int(i) in answers.keys() i = li.find('a').text
] url = li.find('a')['href']
command_names = [command_mapping[n] for n in number] if i not in response.keys():
print(f"Выбранный номер: {command_names}") response[i] = url
else:
repetition += 1
if __name__ == '__main__': except AttributeError:
ollama.pull("llama3:8b") pass
asyncio.run(main()) print(f'Количество найденный элементов: {len(response)}')
print(f'Количество повторении: {repetition}')
with open('MyTTSDataset/transcript.txt', 'w') as f:
for index, (key, value) in enumerate(response.items()):
try:
response = requests.get(value)
if response.status_code == 200:
key = filter_string(key)
if key and len(key.replace(" ", "")) > 3:
with open(f"MyTTSDataset/wavs/wav{index}.wav", 'wb') as file:
file.write(response.content)
f.write(f'wav{index}|{key}\n')
except requests.exceptions.MissingSchema:
pass
except requests.exceptions.InvalidSchema:
pass
+37
View File
@@ -0,0 +1,37 @@
import os
import sys
from data import config
def install_vosk_model() -> None:
"""
Функция устанавливает заданную в конфигурационном файле модели
:return:
"""
try:
open('data/model_small/README')
except Exception as e:
print(e)
if sys.platform == "linux" or sys.platform == "linux2":
os.system(
f"wget https://alphacephei.com/vosk/models/{config.VOSK_MODEL_NAME}.zip"
)
os.system(f"unzip {config.VOSK_MODEL_NAME}.zip")
os.system(f"mv {config.VOSK_MODEL_NAME} data/model_small")
os.system(f"rm -rf {config.VOSK_MODEL_NAME}.zip")
elif sys.platform == "darwin":
os.system(
f"curl https://alphacephei.com/vosk/models/{config.VOSK_MODEL_NAME}.zip"
)
os.system(f"unzip {config.VOSK_MODEL_NAME}.zip")
os.system(f"mv {config.VOSK_MODEL_NAME} data/model_small")
os.system(f"rm -rf {config.VOSK_MODEL_NAME}.zip")
elif sys.platform == "win32":
os.system(
f"curl https://alphacephei.com/vosk/models/{config.VOSK_MODEL_NAME}.zip --output 1.zip"
)
os.system('powershell -command "Expand-Archive 1.zip ./"')
os.system(f"rename {config.VOSK_MODEL_NAME} data/model_small")
os.system("del /s /q 1.zip")
+16 -1
View File
@@ -1,4 +1,13 @@
def execute_cmd(self, cmd: str): def execute_cmd(self, cmd: str, recognized_phrase: str, voice: str) -> None:
"""
Функция выполняет полученные команды
:param self: modules.Jarvis - объект основного модуля
:param cmd: str - команда которую функция должна выполнить
:param recognized_phrase: str - распознанная фраза из списка фраз
:param voice: str - распознанная фраза без проверки по списку
:return:
"""
if cmd == 'thanks': if cmd == 'thanks':
self.play("thanks") self.play("thanks")
elif cmd == 'stupid': elif cmd == 'stupid':
@@ -7,3 +16,9 @@ def execute_cmd(self, cmd: str):
self.play("off", True) self.play("off", True)
self.porcupine.delete() self.porcupine.delete()
exit(0) exit(0)
elif cmd == 'home_assistant_execute':
self.home_assistant.send_process(recognized_phrase)
elif cmd == 'home_assistant_get':
entity_name = self.home_assistant.voice_to_name(voice)
entity_info = self.home_assistant.validate_info(entity_name)
print(entity_info)
+10 -2
View File
@@ -3,7 +3,15 @@ import random
import simpleaudio as sa import simpleaudio as sa
def play(self, phrase, wait_done=True): def play(self, phrase: str, wait_done: bool = True) -> None:
"""
Функция для запуска голосовой команды
:param self: modules.Jarvis - объект основного модуля
:param phrase: str - фраза для запуска голосовой команды
:param wait_done: bool - нужно-ли ждать окончания фразы
:return:
"""
filename = None filename = None
file_array = ["not_found", "thanks", "run", "stupid", "ready", "off"] file_array = ["not_found", "thanks", "run", "stupid", "ready", "off"]
if phrase == "greet": if phrase == "greet":
@@ -15,7 +23,7 @@ def play(self, phrase, wait_done=True):
if wait_done: if wait_done:
self.recorder.stop() self.recorder.stop()
if filename: if filename:
wave_obj = sa.WaveObject.from_wave_file(f"{self.CDIR}/sound/{filename}") wave_obj = sa.WaveObject.from_wave_file(f"{self.CDIR}/data/sound/{filename}")
play_obj = wave_obj.play() play_obj = wave_obj.play()
if wait_done: if wait_done:
play_obj.wait_done() play_obj.wait_done()
Executable → Regular
+52 -26
View File
@@ -1,31 +1,57 @@
import time import os
import sounddevice as sd
import torch import torch
import torchaudio
language = 'ru'
model_id = 'ru_v3'
sample_rate = 48000 # 48000
speaker = 'aidar' # aidar, baya, kseniya, xenia, random
put_accent = True
put_yo = True
device = torch.device('cpu') # cpu или gpu
text = "Хауди Хо, друзья!!!"
model, _ = torch.hub.load(repo_or_dir='snakers4/silero-models',
model='silero_tts',
language=language,
speaker=model_id)
model.to(device)
def va_speak(what: str): def load_data(audio_folder):
audio = model.apply_tts(text=what + "..", audios = []
speaker=speaker, texts = []
sample_rate=sample_rate, for audio_file in os.listdir(audio_folder):
put_accent=put_accent, if audio_file.endswith('.wav'):
put_yo=put_yo) audio_path = os.path.join(audio_folder, audio_file)
waveform, sample_rate = torchaudio.load(audio_path)
text_path = audio_path.replace('.wav', '.txt')
with open(text_path) as f:
text = f.read().strip()
audios.append((waveform, sample_rate))
texts.append(text)
return audios, texts
sd.play(audio, sample_rate * 1.05)
time.sleep((len(audio) / sample_rate) + 0.5) def train(model, audios, texts, epochs=3, learning_rate=1e-4):
sd.stop() optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
criterion = torch.nn.MSELoss() # Вам нужно будет настроить эту функцию под вашу задачу
model.train()
for epoch in range(epochs):
total_loss = 0
for waveform, text in zip(audios, texts):
optimizer.zero_grad()
# Предполагается, что модель принимает текст и возвращает аудио
predicted_waveform = model(text)
loss = criterion(predicted_waveform, waveform)
loss.backward()
optimizer.step()
total_loss += loss.item()
average_loss = total_loss / len(audios)
print(f'Epoch {epoch + 1}: Average Loss = {average_loss}')
def main():
model_path = 'data/v4_ru.pt'
model = torch.load(model_path)
model.eval()
audio_folder = 'wav_files'
audios, texts = load_data(audio_folder)
train(model, audios, texts)
torch.save(model.state_dict(), 'fine_tuned_model.pth')
model.eval()
sample_text = "Пример текста для синтеза."
with torch.no_grad():
generated_waveform = model(sample_text)
torchaudio.save('output_audio.wav', generated_waveform, 16000)
if __name__ == '__main__':
main()
+67
View File
@@ -0,0 +1,67 @@
import asyncio
import re
import ollama
import yaml
def load_commands(file_path):
with open(file_path) as file:
data = yaml.safe_load(file)
answers = {}
command_mapping = {}
idx = 1
for category, items in data.items():
answers[idx] = items
command_mapping[idx] = category
idx += 1
return answers, command_mapping
async def chat(answers: dict[int, str], request: str = 'музычку пожалуйста') -> str:
answers_str = ''.join([f'{key}. {", ".join(value)}\n' for key, value in answers.items()])
messages = [
{
'role': 'system',
'content': 'Пожалуйста, просмотрите список доступных команд и '
'выберите подходящую команду, указав ее номер. '
'Вы можете выбрать одну команду или несколько команд одновременно. '
'В ответе укажите только номер или номера команд, '
'например: "1" или "1, 3, 5". Вот список доступных команд:\n' + answers_str
},
{
'role': 'user',
'content': request
}
]
async_client = ollama.AsyncClient()
response = await async_client.chat(
model='llama3:8b',
messages=messages,
options={
'temperature': 0.5,
'mirostat_tau': 100.0,
'repeat_last_n': 2,
'num_predict': 20
}
)
return response.get("message").get("content")
async def main():
answers, command_mapping = load_commands('commands.yaml')
number = None
while not number:
response_content = await chat(answers, "останови музыку и скажи погоду")
number = [
int(i)
for i in re.findall(r'\d+', response_content)
if int(i) in answers.keys()
]
return [command_mapping[n] for n in number]
if __name__ == '__main__':
ollama.pull("llama3:8b")
command_names = asyncio.run(main())
print(f"Выбранный номер: {command_names}")