Compare commits
3 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 727376f495 | |||
| 9b7ca85831 | |||
| 197dc50529 |
@@ -8,6 +8,9 @@ __pycache__/
|
||||
# Custom
|
||||
data/model_small/
|
||||
data/model_large/
|
||||
data/v4_ru.pt
|
||||
MyTTSDataset/
|
||||
vocal.wav
|
||||
|
||||
# C extensions
|
||||
*.so
|
||||
|
||||
@@ -76,5 +76,6 @@ home_assistant_execute:
|
||||
- включи телевизор
|
||||
- выключи телевизор
|
||||
- начни уборку
|
||||
- убрать мою комнату
|
||||
home_assistant_get:
|
||||
- тест
|
||||
Generated
+2682
-16
File diff suppressed because it is too large
Load Diff
+3
-1
@@ -6,7 +6,7 @@ authors = ["dmitrium12 <belicdima8@gmail.com>"]
|
||||
readme = "README.md"
|
||||
|
||||
[tool.poetry.dependencies]
|
||||
python = "^3.11"
|
||||
python = ">=3.11,<3.12"
|
||||
vosk = "^0.3.45"
|
||||
pvporcupine = "^3.0.1"
|
||||
pvrecorder = "^1.2.1"
|
||||
@@ -28,6 +28,8 @@ ollama = "^0.1.6"
|
||||
ruff = "^0.4.2"
|
||||
noisereduce = "^3.0.2"
|
||||
environs = "^11.0.0"
|
||||
webrtcvad = "^2.0.10"
|
||||
tts = "^0.22.0"
|
||||
|
||||
|
||||
[[tool.poetry.source]]
|
||||
|
||||
@@ -0,0 +1,55 @@
|
||||
import re
|
||||
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
|
||||
def filter_string(input_string: str) -> str:
|
||||
allowed_chars = []
|
||||
for j in "АБВГДЕЁЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдеёжзийклмнопрстуфхцчшщъыьэюя1234567890 !,.?-":
|
||||
allowed_chars.append(j)
|
||||
input_string = re.sub(r'^\d+.\s+', '', input_string)
|
||||
return ''.join([char for char in input_string if char in allowed_chars])
|
||||
|
||||
|
||||
repetition = 0
|
||||
response = {}
|
||||
soup = BeautifulSoup(
|
||||
requests.get('https://theportalwiki.com/wiki/GLaDOS_voice_lines/ru').text,
|
||||
features='html.parser'
|
||||
)
|
||||
for li in soup.find_all('li'):
|
||||
try:
|
||||
i = li.find('i').text
|
||||
url = li.find('span', class_=['audio-player']).find('a')['href']
|
||||
if i not in response.keys():
|
||||
response[i] = url
|
||||
else:
|
||||
repetition += 1
|
||||
except AttributeError:
|
||||
pass
|
||||
try:
|
||||
i = li.find('a').text
|
||||
url = li.find('a')['href']
|
||||
if i not in response.keys():
|
||||
response[i] = url
|
||||
else:
|
||||
repetition += 1
|
||||
except AttributeError:
|
||||
pass
|
||||
print(f'Количество найденный элементов: {len(response)}')
|
||||
print(f'Количество повторении: {repetition}')
|
||||
with open('MyTTSDataset/transcript.txt', 'w') as f:
|
||||
for index, (key, value) in enumerate(response.items()):
|
||||
try:
|
||||
response = requests.get(value)
|
||||
if response.status_code == 200:
|
||||
key = filter_string(key)
|
||||
if key and len(key.replace(" ", "")) > 3:
|
||||
with open(f"MyTTSDataset/wavs/wav{index}.wav", 'wb') as file:
|
||||
file.write(response.content)
|
||||
f.write(f'wav{index}|{key}\n')
|
||||
except requests.exceptions.MissingSchema:
|
||||
pass
|
||||
except requests.exceptions.InvalidSchema:
|
||||
pass
|
||||
|
||||
@@ -0,0 +1,57 @@
|
||||
import os
|
||||
|
||||
import torch
|
||||
import torchaudio
|
||||
|
||||
|
||||
def load_data(audio_folder):
|
||||
audios = []
|
||||
texts = []
|
||||
for audio_file in os.listdir(audio_folder):
|
||||
if audio_file.endswith('.wav'):
|
||||
audio_path = os.path.join(audio_folder, audio_file)
|
||||
waveform, sample_rate = torchaudio.load(audio_path)
|
||||
text_path = audio_path.replace('.wav', '.txt')
|
||||
with open(text_path) as f:
|
||||
text = f.read().strip()
|
||||
audios.append((waveform, sample_rate))
|
||||
texts.append(text)
|
||||
return audios, texts
|
||||
|
||||
|
||||
def train(model, audios, texts, epochs=3, learning_rate=1e-4):
|
||||
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
|
||||
criterion = torch.nn.MSELoss() # Вам нужно будет настроить эту функцию под вашу задачу
|
||||
|
||||
model.train()
|
||||
for epoch in range(epochs):
|
||||
total_loss = 0
|
||||
for waveform, text in zip(audios, texts):
|
||||
optimizer.zero_grad()
|
||||
# Предполагается, что модель принимает текст и возвращает аудио
|
||||
predicted_waveform = model(text)
|
||||
loss = criterion(predicted_waveform, waveform)
|
||||
loss.backward()
|
||||
optimizer.step()
|
||||
total_loss += loss.item()
|
||||
average_loss = total_loss / len(audios)
|
||||
print(f'Epoch {epoch + 1}: Average Loss = {average_loss}')
|
||||
|
||||
|
||||
def main():
|
||||
model_path = 'data/v4_ru.pt'
|
||||
model = torch.load(model_path)
|
||||
model.eval()
|
||||
audio_folder = 'wav_files'
|
||||
audios, texts = load_data(audio_folder)
|
||||
train(model, audios, texts)
|
||||
torch.save(model.state_dict(), 'fine_tuned_model.pth')
|
||||
model.eval()
|
||||
sample_text = "Пример текста для синтеза."
|
||||
with torch.no_grad():
|
||||
generated_waveform = model(sample_text)
|
||||
torchaudio.save('output_audio.wav', generated_waveform, 16000)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
Reference in New Issue
Block a user