8 Commits

13 changed files with 1515 additions and 2754 deletions
+4 -1
View File
@@ -1,5 +1,4 @@
VA_ALIAS='("джарвис",)' VA_ALIAS='("джарвис",)'
VA_TBR='("скажи", "покажи", "ответь", "произнеси", "расскажиv, "сколько", "слушай")'
VOSK_MODEL_NAME='vosk-model-small-ru-0.22' # vosk-model-ru-0.42 VOSK_MODEL_NAME='vosk-model-small-ru-0.22' # vosk-model-ru-0.42
MICROPHONE_INDEX=-1 MICROPHONE_INDEX=-1
PICOVOICE_TOKEN='token' PICOVOICE_TOKEN='token'
@@ -8,3 +7,7 @@ PICOVOICE_TOKEN='token'
# home assistant # home assistant
HOME_ASSISTANT_URL='http://localhost:8123/api' HOME_ASSISTANT_URL='http://localhost:8123/api'
HOME_ASSISTANT_TOKEN='' HOME_ASSISTANT_TOKEN=''
# weather
WEATHER_DEFAULT_CITY='krasnoyarsk'
WEATHER_URL='https://yandex.ru/pogoda'
-3
View File
@@ -8,9 +8,6 @@ __pycache__/
# Custom # Custom
data/model_small/ data/model_small/
data/model_large/ data/model_large/
data/v4_ru.pt
MyTTSDataset/
vocal.wav
# C extensions # C extensions
*.so *.so
+2 -1
View File
@@ -46,6 +46,7 @@ music_on:
- хочу послушать музыку - хочу послушать музыку
- запусти плейлист - запусти плейлист
music_off: music_off:
- пауза
- выключи музыку - выключи музыку
- остановить музыку - остановить музыку
- пауза музыки - пауза музыки
@@ -72,10 +73,10 @@ weather:
- возможен дождь сегодня? - возможен дождь сегодня?
- прогноз погоды на сегодня - прогноз погоды на сегодня
- погода - погода
- скажи погоду
home_assistant_execute: home_assistant_execute:
- включи телевизор - включи телевизор
- выключи телевизор - выключи телевизор
- начни уборку - начни уборку
- убрать мою комнату
home_assistant_get: home_assistant_get:
- тест - тест
+6 -3
View File
@@ -1,11 +1,10 @@
import environs import environs
import ast
env = environs.Env() env = environs.Env()
env.read_env() env.read_env()
VA_ALIAS = ast.literal_eval(env.str("VA_ALIAS"))
VA_ALIAS = env.str("VA_ALIAS")
VA_TBR = env.str("VA_TBR")
VOSK_MODEL_NAME = env.str("VOSK_MODEL_NAME") VOSK_MODEL_NAME = env.str("VOSK_MODEL_NAME")
MICROPHONE_INDEX = env.int("MICROPHONE_INDEX") MICROPHONE_INDEX = env.int("MICROPHONE_INDEX")
PICOVOICE_TOKEN = env.str("PICOVOICE_TOKEN") PICOVOICE_TOKEN = env.str("PICOVOICE_TOKEN")
@@ -13,3 +12,7 @@ PICOVOICE_TOKEN = env.str("PICOVOICE_TOKEN")
# home assistant # home assistant
HOME_ASSISTANT_URL = env.str("HOME_ASSISTANT_URL") HOME_ASSISTANT_URL = env.str("HOME_ASSISTANT_URL")
HOME_ASSISTANT_TOKEN = env.str("HOME_ASSISTANT_TOKEN") HOME_ASSISTANT_TOKEN = env.str("HOME_ASSISTANT_TOKEN")
# weather
WEATHER_DEFAULT_CITY = env.str("WEATHER_DEFAULT_CITY")
WEATHER_URL = env.str("WEATHER_URL")
File diff suppressed because it is too large Load Diff
+4 -2
View File
@@ -11,7 +11,7 @@ from fuzzywuzzy import fuzz
from pvrecorder import PvRecorder from pvrecorder import PvRecorder
from data import config from data import config
from modules import HomeAssistant from modules import HomeAssistant, MediaPlayerController, Weather
from utils import download_models, execute_cmd, play from utils import download_models, execute_cmd, play
@@ -25,6 +25,8 @@ class Jarvis:
self.CDIR = os.getcwd() self.CDIR = os.getcwd()
self.VA_CMD_LIST = yaml.safe_load(open('data/commands.yaml', encoding='utf8')) self.VA_CMD_LIST = yaml.safe_load(open('data/commands.yaml', encoding='utf8'))
self.home_assistant = HomeAssistant.HomeAssistant() self.home_assistant = HomeAssistant.HomeAssistant()
self.media_player_controller = MediaPlayerController.MediaPlayerController()
self.weather = Weather.Weather()
self.porcupine = pvporcupine.create( self.porcupine = pvporcupine.create(
access_key=config.PICOVOICE_TOKEN, access_key=config.PICOVOICE_TOKEN,
keywords=['jarvis'], keywords=['jarvis'],
@@ -73,7 +75,7 @@ class Jarvis:
:return: bool - распознана или нет команда :return: bool - распознана или нет команда
""" """
print(f"Распознано: {voice}") print(f"Распознано: {voice}")
for x in config.VA_ALIAS + config.VA_TBR: for x in config.VA_ALIAS:
voice = voice.replace(x, "").strip() voice = voice.replace(x, "").strip()
rc = {'cmd': '', 'percent': 0} rc = {'cmd': '', 'percent': 0}
for c, v in self.VA_CMD_LIST.items(): for c, v in self.VA_CMD_LIST.items():
+90
View File
@@ -0,0 +1,90 @@
import platform
import subprocess
class MediaPlayerController:
"""
Модуль для манипуляции музыкой
"""
def __init__(self):
self.os_type = platform.system()
def play_pause(self) -> None:
"""
Запуск/остановка музыки
:return:
"""
if self.os_type == 'Windows':
self._windows_play_pause()
elif self.os_type == 'Linux':
self._linux_control("play-pause")
def next_track(self) -> None:
"""
Включает следующею композицию
:return:
"""
if self.os_type == 'Windows':
self._windows_control("next")
elif self.os_type == 'Linux':
self._linux_control("next")
def previous_track(self) -> None:
"""
Включает предыдущею композицию
:return:
"""
if self.os_type == 'Windows':
self._windows_control("previous")
elif self.os_type == 'Linux':
self._linux_control("previous")
def _windows_play_pause(self) -> None:
"""
Запуск/остановка музыки в windows
:return:
"""
import win32con
self.key_press(win32con.VK_MEDIA_PLAY_PAUSE)
def _windows_control(self, action: str) -> None:
"""
Включает предыдущею или следующею композицию в windows
:return:
"""
import win32con
if action == "next":
self.key_press(win32con.VK_MEDIA_NEXT_TRACK)
elif action == "previous":
self.key_press(win32con.VK_MEDIA_PREV_TRACK)
@staticmethod
def key_press(key_code: str) -> None:
"""
Симуляция нажатия и отпускания клавиши
:param key_code: str - какую кнопку нажать
:return:
"""
import win32api
import win32con
win32api.keybd_event(key_code, 0, 0, 0)
win32api.keybd_event(key_code, 0, win32con.KEYEVENTF_KEYUP, 0)
@staticmethod
def _linux_control(command: str) -> None:
"""
Запускает команду для linux систем
:param command: str - команда для запуска
:return:
"""
try:
subprocess.run(["playerctl", command], check=True)
except subprocess.CalledProcessError as e:
print(f"Failed to {command}: {e}")
+50
View File
@@ -0,0 +1,50 @@
import json
from bs4 import BeautifulSoup
from curl_cffi import requests
from fuzzywuzzy import fuzz
from data.config import WEATHER_DEFAULT_CITY, WEATHER_URL
class Weather:
def __init__(self):
self.default_city = WEATHER_DEFAULT_CITY
self.url = WEATHER_URL
def get_info(self, city: str) -> str:
try:
response = requests.get(f"{self.url}/{city}", impersonate="chrome110")
soup = BeautifulSoup(response.text, "html.parser")
card = soup.find(
"div",
class_=["fact", "fact_prec_rain-low", "card", "card_size_big"]
)
info = card.find(
"div",
class_=["fact__temp-wrap"]
)
temp = info.find(
"span",
class_=["temp__value", "temp__value_with-unit"]
).text
weather = info.find(
"div",
class_=["link__condition", "day-anchor i-bem"]
).text.lower()
return f"За окном {temp}, {weather}"
except AttributeError:
return self.get_info(self.default_city)
def validate_city(self, voice: str) -> str:
validate_voice = voice.split(" ")[-1]
rc = {'cmd': '', 'percent': 0}
data = json.load(open("data/weather_city.json"))
for ru, en in data.items():
vrt = fuzz.ratio(validate_voice.lower(), ru.lower())
if vrt > rc['percent']:
rc['cmd'] = en.lower()
rc['percent'] = vrt
if rc['percent'] > 80:
return rc['cmd']
return self.default_city
Generated
+141 -2628
View File
File diff suppressed because it is too large Load Diff
+5 -3
View File
@@ -6,7 +6,7 @@ authors = ["dmitrium12 <belicdima8@gmail.com>"]
readme = "README.md" readme = "README.md"
[tool.poetry.dependencies] [tool.poetry.dependencies]
python = ">=3.11,<3.12" python = "^3.11"
vosk = "^0.3.45" vosk = "^0.3.45"
pvporcupine = "^3.0.1" pvporcupine = "^3.0.1"
pvrecorder = "^1.2.1" pvrecorder = "^1.2.1"
@@ -28,8 +28,10 @@ ollama = "^0.1.6"
ruff = "^0.4.2" ruff = "^0.4.2"
noisereduce = "^3.0.2" noisereduce = "^3.0.2"
environs = "^11.0.0" environs = "^11.0.0"
webrtcvad = "^2.0.10" bs4 = "^0.0.2"
tts = "^0.22.0" fake-useragent = "^1.5.1"
curl-cffi = "^0.6.3"
pandas = "^2.2.2"
[[tool.poetry.source]] [[tool.poetry.source]]
-55
View File
@@ -1,55 +0,0 @@
import re
import requests
from bs4 import BeautifulSoup
def filter_string(input_string: str) -> str:
allowed_chars = []
for j in "АБВГДЕЁЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдеёжзийклмнопрстуфхцчшщъыьэюя1234567890 !,.?-":
allowed_chars.append(j)
input_string = re.sub(r'^\d+.\s+', '', input_string)
return ''.join([char for char in input_string if char in allowed_chars])
repetition = 0
response = {}
soup = BeautifulSoup(
requests.get('https://theportalwiki.com/wiki/GLaDOS_voice_lines/ru').text,
features='html.parser'
)
for li in soup.find_all('li'):
try:
i = li.find('i').text
url = li.find('span', class_=['audio-player']).find('a')['href']
if i not in response.keys():
response[i] = url
else:
repetition += 1
except AttributeError:
pass
try:
i = li.find('a').text
url = li.find('a')['href']
if i not in response.keys():
response[i] = url
else:
repetition += 1
except AttributeError:
pass
print(f'Количество найденный элементов: {len(response)}')
print(f'Количество повторении: {repetition}')
with open('MyTTSDataset/transcript.txt', 'w') as f:
for index, (key, value) in enumerate(response.items()):
try:
response = requests.get(value)
if response.status_code == 200:
key = filter_string(key)
if key and len(key.replace(" ", "")) > 3:
with open(f"MyTTSDataset/wavs/wav{index}.wav", 'wb') as file:
file.write(response.content)
f.write(f'wav{index}|{key}\n')
except requests.exceptions.MissingSchema:
pass
except requests.exceptions.InvalidSchema:
pass
+21 -1
View File
@@ -1,4 +1,12 @@
def execute_cmd(self, cmd: str, recognized_phrase: str, voice: str) -> None: from typing import TYPE_CHECKING
if TYPE_CHECKING:
from modules.Jarvis import Jarvis
else:
Jarvis = None
def execute_cmd(self: Jarvis, cmd: str, recognized_phrase: str, voice: str) -> None:
""" """
Функция выполняет полученные команды Функция выполняет полученные команды
@@ -16,9 +24,21 @@ def execute_cmd(self, cmd: str, recognized_phrase: str, voice: str) -> None:
self.play("off", True) self.play("off", True)
self.porcupine.delete() self.porcupine.delete()
exit(0) exit(0)
elif cmd == 'music_on':
self.media_player_controller.play_pause()
elif cmd == 'music_off':
self.media_player_controller.play_pause()
elif cmd == 'music_next':
self.media_player_controller.next_track()
elif cmd == 'music_previous':
self.media_player_controller.previous_track()
elif cmd == 'home_assistant_execute': elif cmd == 'home_assistant_execute':
self.home_assistant.send_process(recognized_phrase) self.home_assistant.send_process(recognized_phrase)
elif cmd == 'home_assistant_get': elif cmd == 'home_assistant_get':
entity_name = self.home_assistant.voice_to_name(voice) entity_name = self.home_assistant.voice_to_name(voice)
entity_info = self.home_assistant.validate_info(entity_name) entity_info = self.home_assistant.validate_info(entity_name)
print(entity_info) print(entity_info)
elif cmd == 'weather':
city = self.weather.validate_city(voice)
city_info = self.weather.get_info(city)
print(city_info)
-57
View File
@@ -1,57 +0,0 @@
import os
import torch
import torchaudio
def load_data(audio_folder):
audios = []
texts = []
for audio_file in os.listdir(audio_folder):
if audio_file.endswith('.wav'):
audio_path = os.path.join(audio_folder, audio_file)
waveform, sample_rate = torchaudio.load(audio_path)
text_path = audio_path.replace('.wav', '.txt')
with open(text_path) as f:
text = f.read().strip()
audios.append((waveform, sample_rate))
texts.append(text)
return audios, texts
def train(model, audios, texts, epochs=3, learning_rate=1e-4):
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
criterion = torch.nn.MSELoss() # Вам нужно будет настроить эту функцию под вашу задачу
model.train()
for epoch in range(epochs):
total_loss = 0
for waveform, text in zip(audios, texts):
optimizer.zero_grad()
# Предполагается, что модель принимает текст и возвращает аудио
predicted_waveform = model(text)
loss = criterion(predicted_waveform, waveform)
loss.backward()
optimizer.step()
total_loss += loss.item()
average_loss = total_loss / len(audios)
print(f'Epoch {epoch + 1}: Average Loss = {average_loss}')
def main():
model_path = 'data/v4_ru.pt'
model = torch.load(model_path)
model.eval()
audio_folder = 'wav_files'
audios, texts = load_data(audio_folder)
train(model, audios, texts)
torch.save(model.state_dict(), 'fine_tuned_model.pth')
model.eval()
sample_text = "Пример текста для синтеза."
with torch.no_grad():
generated_waveform = model(sample_text)
torchaudio.save('output_audio.wav', generated_waveform, 16000)
if __name__ == '__main__':
main()