4 Commits

Author SHA1 Message Date
Dmitrium12 ee5acf4316 Merge pull request 'add_music_functions' (#6) from add_music_functions into master
Reviewed-on: #6
2024-05-09 16:36:26 +07:00
Dmitrium12 36391ba187 add comment 2024-05-09 16:33:00 +07:00
Dmitrium12 aa1e424e8b windows fix 2024-05-09 16:23:37 +07:00
Dmitrium12 789f4f38d5 add MediaPlayerController 2024-05-09 14:06:49 +07:00
9 changed files with 118 additions and 2803 deletions
-3
View File
@@ -8,9 +8,6 @@ __pycache__/
# Custom # Custom
data/model_small/ data/model_small/
data/model_large/ data/model_large/
data/v4_ru.pt
MyTTSDataset/
vocal.wav
# C extensions # C extensions
*.so *.so
-1
View File
@@ -76,6 +76,5 @@ home_assistant_execute:
- включи телевизор - включи телевизор
- выключи телевизор - выключи телевизор
- начни уборку - начни уборку
- убрать мою комнату
home_assistant_get: home_assistant_get:
- тест - тест
+3 -2
View File
@@ -11,7 +11,7 @@ from fuzzywuzzy import fuzz
from pvrecorder import PvRecorder from pvrecorder import PvRecorder
from data import config from data import config
from modules import HomeAssistant from modules import HomeAssistant, MediaPlayerController
from utils import download_models, execute_cmd, play from utils import download_models, execute_cmd, play
@@ -25,6 +25,7 @@ class Jarvis:
self.CDIR = os.getcwd() self.CDIR = os.getcwd()
self.VA_CMD_LIST = yaml.safe_load(open('data/commands.yaml', encoding='utf8')) self.VA_CMD_LIST = yaml.safe_load(open('data/commands.yaml', encoding='utf8'))
self.home_assistant = HomeAssistant.HomeAssistant() self.home_assistant = HomeAssistant.HomeAssistant()
self.media_player_controller = MediaPlayerController.MediaPlayerController()
self.porcupine = pvporcupine.create( self.porcupine = pvporcupine.create(
access_key=config.PICOVOICE_TOKEN, access_key=config.PICOVOICE_TOKEN,
keywords=['jarvis'], keywords=['jarvis'],
@@ -73,7 +74,7 @@ class Jarvis:
:return: bool - распознана или нет команда :return: bool - распознана или нет команда
""" """
print(f"Распознано: {voice}") print(f"Распознано: {voice}")
for x in config.VA_ALIAS + config.VA_TBR: for x in config.VA_ALIAS:
voice = voice.replace(x, "").strip() voice = voice.replace(x, "").strip()
rc = {'cmd': '', 'percent': 0} rc = {'cmd': '', 'percent': 0}
for c, v in self.VA_CMD_LIST.items(): for c, v in self.VA_CMD_LIST.items():
+90
View File
@@ -0,0 +1,90 @@
import platform
import subprocess
class MediaPlayerController:
"""
Модуль для манипуляции музыкой
"""
def __init__(self):
self.os_type = platform.system()
def play_pause(self) -> None:
"""
Запуск/остановка музыки
:return:
"""
if self.os_type == 'Windows':
self._windows_play_pause()
elif self.os_type == 'Linux':
self._linux_control("play-pause")
def next_track(self) -> None:
"""
Включает следующею композицию
:return:
"""
if self.os_type == 'Windows':
self._windows_control("next")
elif self.os_type == 'Linux':
self._linux_control("next")
def previous_track(self) -> None:
"""
Включает предыдущею композицию
:return:
"""
if self.os_type == 'Windows':
self._windows_control("previous")
elif self.os_type == 'Linux':
self._linux_control("previous")
def _windows_play_pause(self) -> None:
"""
Запуск/остановка музыки в windows
:return:
"""
import win32con
self.key_press(win32con.VK_MEDIA_PLAY_PAUSE)
def _windows_control(self, action: str) -> None:
"""
Включает предыдущею или следующею композицию в windows
:return:
"""
import win32con
if action == "next":
self.key_press(win32con.VK_MEDIA_NEXT_TRACK)
elif action == "previous":
self.key_press(win32con.VK_MEDIA_PREV_TRACK)
@staticmethod
def key_press(key_code: str) -> None:
"""
Симуляция нажатия и отпускания клавиши
:param key_code: str - какую кнопку нажать
:return:
"""
import win32api
import win32con
win32api.keybd_event(key_code, 0, 0, 0)
win32api.keybd_event(key_code, 0, win32con.KEYEVENTF_KEYUP, 0)
@staticmethod
def _linux_control(command: str) -> None:
"""
Запускает команду для linux систем
:param command: str - команда для запуска
:return:
"""
try:
subprocess.run(["playerctl", command], check=True)
except subprocess.CalledProcessError as e:
print(f"Failed to {command}: {e}")
Generated
+16 -2682
View File
File diff suppressed because it is too large Load Diff
+1 -3
View File
@@ -6,7 +6,7 @@ authors = ["dmitrium12 <belicdima8@gmail.com>"]
readme = "README.md" readme = "README.md"
[tool.poetry.dependencies] [tool.poetry.dependencies]
python = ">=3.11,<3.12" python = "^3.11"
vosk = "^0.3.45" vosk = "^0.3.45"
pvporcupine = "^3.0.1" pvporcupine = "^3.0.1"
pvrecorder = "^1.2.1" pvrecorder = "^1.2.1"
@@ -28,8 +28,6 @@ ollama = "^0.1.6"
ruff = "^0.4.2" ruff = "^0.4.2"
noisereduce = "^3.0.2" noisereduce = "^3.0.2"
environs = "^11.0.0" environs = "^11.0.0"
webrtcvad = "^2.0.10"
tts = "^0.22.0"
[[tool.poetry.source]] [[tool.poetry.source]]
-55
View File
@@ -1,55 +0,0 @@
import re
import requests
from bs4 import BeautifulSoup
def filter_string(input_string: str) -> str:
allowed_chars = []
for j in "АБВГДЕЁЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдеёжзийклмнопрстуфхцчшщъыьэюя1234567890 !,.?-":
allowed_chars.append(j)
input_string = re.sub(r'^\d+.\s+', '', input_string)
return ''.join([char for char in input_string if char in allowed_chars])
repetition = 0
response = {}
soup = BeautifulSoup(
requests.get('https://theportalwiki.com/wiki/GLaDOS_voice_lines/ru').text,
features='html.parser'
)
for li in soup.find_all('li'):
try:
i = li.find('i').text
url = li.find('span', class_=['audio-player']).find('a')['href']
if i not in response.keys():
response[i] = url
else:
repetition += 1
except AttributeError:
pass
try:
i = li.find('a').text
url = li.find('a')['href']
if i not in response.keys():
response[i] = url
else:
repetition += 1
except AttributeError:
pass
print(f'Количество найденный элементов: {len(response)}')
print(f'Количество повторении: {repetition}')
with open('MyTTSDataset/transcript.txt', 'w') as f:
for index, (key, value) in enumerate(response.items()):
try:
response = requests.get(value)
if response.status_code == 200:
key = filter_string(key)
if key and len(key.replace(" ", "")) > 3:
with open(f"MyTTSDataset/wavs/wav{index}.wav", 'wb') as file:
file.write(response.content)
f.write(f'wav{index}|{key}\n')
except requests.exceptions.MissingSchema:
pass
except requests.exceptions.InvalidSchema:
pass
+8
View File
@@ -16,6 +16,14 @@ def execute_cmd(self, cmd: str, recognized_phrase: str, voice: str) -> None:
self.play("off", True) self.play("off", True)
self.porcupine.delete() self.porcupine.delete()
exit(0) exit(0)
elif cmd == 'music_on':
self.media_player_controller.play_pause()
elif cmd == 'music_off':
self.media_player_controller.play_pause()
elif cmd == 'music_next':
self.media_player_controller.next_track()
elif cmd == 'music_previous':
self.media_player_controller.previous_track()
elif cmd == 'home_assistant_execute': elif cmd == 'home_assistant_execute':
self.home_assistant.send_process(recognized_phrase) self.home_assistant.send_process(recognized_phrase)
elif cmd == 'home_assistant_get': elif cmd == 'home_assistant_get':
-57
View File
@@ -1,57 +0,0 @@
import os
import torch
import torchaudio
def load_data(audio_folder):
audios = []
texts = []
for audio_file in os.listdir(audio_folder):
if audio_file.endswith('.wav'):
audio_path = os.path.join(audio_folder, audio_file)
waveform, sample_rate = torchaudio.load(audio_path)
text_path = audio_path.replace('.wav', '.txt')
with open(text_path) as f:
text = f.read().strip()
audios.append((waveform, sample_rate))
texts.append(text)
return audios, texts
def train(model, audios, texts, epochs=3, learning_rate=1e-4):
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
criterion = torch.nn.MSELoss() # Вам нужно будет настроить эту функцию под вашу задачу
model.train()
for epoch in range(epochs):
total_loss = 0
for waveform, text in zip(audios, texts):
optimizer.zero_grad()
# Предполагается, что модель принимает текст и возвращает аудио
predicted_waveform = model(text)
loss = criterion(predicted_waveform, waveform)
loss.backward()
optimizer.step()
total_loss += loss.item()
average_loss = total_loss / len(audios)
print(f'Epoch {epoch + 1}: Average Loss = {average_loss}')
def main():
model_path = 'data/v4_ru.pt'
model = torch.load(model_path)
model.eval()
audio_folder = 'wav_files'
audios, texts = load_data(audio_folder)
train(model, audios, texts)
torch.save(model.state_dict(), 'fine_tuned_model.pth')
model.eval()
sample_text = "Пример текста для синтеза."
with torch.no_grad():
generated_waveform = model(sample_text)
torchaudio.save('output_audio.wav', generated_waveform, 16000)
if __name__ == '__main__':
main()