3 Commits

Author SHA1 Message Date
Dmitrium12 727376f495 fix download dataset 2024-05-18 19:56:30 +07:00
Dmitrium12 9b7ca85831 add test download dataset 2024-05-18 11:10:43 +07:00
Dmitrium12 197dc50529 vrem ton working 2024-05-05 19:56:01 +07:00
9 changed files with 2803 additions and 118 deletions
+3
View File
@@ -8,6 +8,9 @@ __pycache__/
# Custom
data/model_small/
data/model_large/
data/v4_ru.pt
MyTTSDataset/
vocal.wav
# C extensions
*.so
+1
View File
@@ -76,5 +76,6 @@ home_assistant_execute:
- включи телевизор
- выключи телевизор
- начни уборку
- убрать мою комнату
home_assistant_get:
- тест
+2 -3
View File
@@ -11,7 +11,7 @@ from fuzzywuzzy import fuzz
from pvrecorder import PvRecorder
from data import config
from modules import HomeAssistant, MediaPlayerController
from modules import HomeAssistant
from utils import download_models, execute_cmd, play
@@ -25,7 +25,6 @@ class Jarvis:
self.CDIR = os.getcwd()
self.VA_CMD_LIST = yaml.safe_load(open('data/commands.yaml', encoding='utf8'))
self.home_assistant = HomeAssistant.HomeAssistant()
self.media_player_controller = MediaPlayerController.MediaPlayerController()
self.porcupine = pvporcupine.create(
access_key=config.PICOVOICE_TOKEN,
keywords=['jarvis'],
@@ -74,7 +73,7 @@ class Jarvis:
:return: bool - распознана или нет команда
"""
print(f"Распознано: {voice}")
for x in config.VA_ALIAS:
for x in config.VA_ALIAS + config.VA_TBR:
voice = voice.replace(x, "").strip()
rc = {'cmd': '', 'percent': 0}
for c, v in self.VA_CMD_LIST.items():
-90
View File
@@ -1,90 +0,0 @@
import platform
import subprocess
class MediaPlayerController:
"""
Модуль для манипуляции музыкой
"""
def __init__(self):
self.os_type = platform.system()
def play_pause(self) -> None:
"""
Запуск/остановка музыки
:return:
"""
if self.os_type == 'Windows':
self._windows_play_pause()
elif self.os_type == 'Linux':
self._linux_control("play-pause")
def next_track(self) -> None:
"""
Включает следующею композицию
:return:
"""
if self.os_type == 'Windows':
self._windows_control("next")
elif self.os_type == 'Linux':
self._linux_control("next")
def previous_track(self) -> None:
"""
Включает предыдущею композицию
:return:
"""
if self.os_type == 'Windows':
self._windows_control("previous")
elif self.os_type == 'Linux':
self._linux_control("previous")
def _windows_play_pause(self) -> None:
"""
Запуск/остановка музыки в windows
:return:
"""
import win32con
self.key_press(win32con.VK_MEDIA_PLAY_PAUSE)
def _windows_control(self, action: str) -> None:
"""
Включает предыдущею или следующею композицию в windows
:return:
"""
import win32con
if action == "next":
self.key_press(win32con.VK_MEDIA_NEXT_TRACK)
elif action == "previous":
self.key_press(win32con.VK_MEDIA_PREV_TRACK)
@staticmethod
def key_press(key_code: str) -> None:
"""
Симуляция нажатия и отпускания клавиши
:param key_code: str - какую кнопку нажать
:return:
"""
import win32api
import win32con
win32api.keybd_event(key_code, 0, 0, 0)
win32api.keybd_event(key_code, 0, win32con.KEYEVENTF_KEYUP, 0)
@staticmethod
def _linux_control(command: str) -> None:
"""
Запускает команду для linux систем
:param command: str - команда для запуска
:return:
"""
try:
subprocess.run(["playerctl", command], check=True)
except subprocess.CalledProcessError as e:
print(f"Failed to {command}: {e}")
Generated
+2682 -16
View File
File diff suppressed because it is too large Load Diff
+3 -1
View File
@@ -6,7 +6,7 @@ authors = ["dmitrium12 <belicdima8@gmail.com>"]
readme = "README.md"
[tool.poetry.dependencies]
python = "^3.11"
python = ">=3.11,<3.12"
vosk = "^0.3.45"
pvporcupine = "^3.0.1"
pvrecorder = "^1.2.1"
@@ -28,6 +28,8 @@ ollama = "^0.1.6"
ruff = "^0.4.2"
noisereduce = "^3.0.2"
environs = "^11.0.0"
webrtcvad = "^2.0.10"
tts = "^0.22.0"
[[tool.poetry.source]]
+55
View File
@@ -0,0 +1,55 @@
import re
import requests
from bs4 import BeautifulSoup
def filter_string(input_string: str) -> str:
allowed_chars = []
for j in "АБВГДЕЁЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдеёжзийклмнопрстуфхцчшщъыьэюя1234567890 !,.?-":
allowed_chars.append(j)
input_string = re.sub(r'^\d+.\s+', '', input_string)
return ''.join([char for char in input_string if char in allowed_chars])
repetition = 0
response = {}
soup = BeautifulSoup(
requests.get('https://theportalwiki.com/wiki/GLaDOS_voice_lines/ru').text,
features='html.parser'
)
for li in soup.find_all('li'):
try:
i = li.find('i').text
url = li.find('span', class_=['audio-player']).find('a')['href']
if i not in response.keys():
response[i] = url
else:
repetition += 1
except AttributeError:
pass
try:
i = li.find('a').text
url = li.find('a')['href']
if i not in response.keys():
response[i] = url
else:
repetition += 1
except AttributeError:
pass
print(f'Количество найденный элементов: {len(response)}')
print(f'Количество повторении: {repetition}')
with open('MyTTSDataset/transcript.txt', 'w') as f:
for index, (key, value) in enumerate(response.items()):
try:
response = requests.get(value)
if response.status_code == 200:
key = filter_string(key)
if key and len(key.replace(" ", "")) > 3:
with open(f"MyTTSDataset/wavs/wav{index}.wav", 'wb') as file:
file.write(response.content)
f.write(f'wav{index}|{key}\n')
except requests.exceptions.MissingSchema:
pass
except requests.exceptions.InvalidSchema:
pass
-8
View File
@@ -16,14 +16,6 @@ def execute_cmd(self, cmd: str, recognized_phrase: str, voice: str) -> None:
self.play("off", True)
self.porcupine.delete()
exit(0)
elif cmd == 'music_on':
self.media_player_controller.play_pause()
elif cmd == 'music_off':
self.media_player_controller.play_pause()
elif cmd == 'music_next':
self.media_player_controller.next_track()
elif cmd == 'music_previous':
self.media_player_controller.previous_track()
elif cmd == 'home_assistant_execute':
self.home_assistant.send_process(recognized_phrase)
elif cmd == 'home_assistant_get':
+57
View File
@@ -0,0 +1,57 @@
import os
import torch
import torchaudio
def load_data(audio_folder):
audios = []
texts = []
for audio_file in os.listdir(audio_folder):
if audio_file.endswith('.wav'):
audio_path = os.path.join(audio_folder, audio_file)
waveform, sample_rate = torchaudio.load(audio_path)
text_path = audio_path.replace('.wav', '.txt')
with open(text_path) as f:
text = f.read().strip()
audios.append((waveform, sample_rate))
texts.append(text)
return audios, texts
def train(model, audios, texts, epochs=3, learning_rate=1e-4):
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
criterion = torch.nn.MSELoss() # Вам нужно будет настроить эту функцию под вашу задачу
model.train()
for epoch in range(epochs):
total_loss = 0
for waveform, text in zip(audios, texts):
optimizer.zero_grad()
# Предполагается, что модель принимает текст и возвращает аудио
predicted_waveform = model(text)
loss = criterion(predicted_waveform, waveform)
loss.backward()
optimizer.step()
total_loss += loss.item()
average_loss = total_loss / len(audios)
print(f'Epoch {epoch + 1}: Average Loss = {average_loss}')
def main():
model_path = 'data/v4_ru.pt'
model = torch.load(model_path)
model.eval()
audio_folder = 'wav_files'
audios, texts = load_data(audio_folder)
train(model, audios, texts)
torch.save(model.state_dict(), 'fine_tuned_model.pth')
model.eval()
sample_text = "Пример текста для синтеза."
with torch.no_grad():
generated_waveform = model(sample_text)
torchaudio.save('output_audio.wav', generated_waveform, 16000)
if __name__ == '__main__':
main()