Compare commits
3 Commits
master
...
add_tts_utils
| Author | SHA1 | Date | |
|---|---|---|---|
| 727376f495 | |||
| 9b7ca85831 | |||
| 197dc50529 |
@@ -8,6 +8,9 @@ __pycache__/
|
|||||||
# Custom
|
# Custom
|
||||||
data/model_small/
|
data/model_small/
|
||||||
data/model_large/
|
data/model_large/
|
||||||
|
data/v4_ru.pt
|
||||||
|
MyTTSDataset/
|
||||||
|
vocal.wav
|
||||||
|
|
||||||
# C extensions
|
# C extensions
|
||||||
*.so
|
*.so
|
||||||
|
|||||||
@@ -76,5 +76,6 @@ home_assistant_execute:
|
|||||||
- включи телевизор
|
- включи телевизор
|
||||||
- выключи телевизор
|
- выключи телевизор
|
||||||
- начни уборку
|
- начни уборку
|
||||||
|
- убрать мою комнату
|
||||||
home_assistant_get:
|
home_assistant_get:
|
||||||
- тест
|
- тест
|
||||||
Generated
+2682
-16
File diff suppressed because it is too large
Load Diff
+3
-1
@@ -6,7 +6,7 @@ authors = ["dmitrium12 <belicdima8@gmail.com>"]
|
|||||||
readme = "README.md"
|
readme = "README.md"
|
||||||
|
|
||||||
[tool.poetry.dependencies]
|
[tool.poetry.dependencies]
|
||||||
python = "^3.11"
|
python = ">=3.11,<3.12"
|
||||||
vosk = "^0.3.45"
|
vosk = "^0.3.45"
|
||||||
pvporcupine = "^3.0.1"
|
pvporcupine = "^3.0.1"
|
||||||
pvrecorder = "^1.2.1"
|
pvrecorder = "^1.2.1"
|
||||||
@@ -28,6 +28,8 @@ ollama = "^0.1.6"
|
|||||||
ruff = "^0.4.2"
|
ruff = "^0.4.2"
|
||||||
noisereduce = "^3.0.2"
|
noisereduce = "^3.0.2"
|
||||||
environs = "^11.0.0"
|
environs = "^11.0.0"
|
||||||
|
webrtcvad = "^2.0.10"
|
||||||
|
tts = "^0.22.0"
|
||||||
|
|
||||||
|
|
||||||
[[tool.poetry.source]]
|
[[tool.poetry.source]]
|
||||||
|
|||||||
@@ -0,0 +1,55 @@
|
|||||||
|
import re
|
||||||
|
|
||||||
|
import requests
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
|
|
||||||
|
def filter_string(input_string: str) -> str:
|
||||||
|
allowed_chars = []
|
||||||
|
for j in "АБВГДЕЁЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдеёжзийклмнопрстуфхцчшщъыьэюя1234567890 !,.?-":
|
||||||
|
allowed_chars.append(j)
|
||||||
|
input_string = re.sub(r'^\d+.\s+', '', input_string)
|
||||||
|
return ''.join([char for char in input_string if char in allowed_chars])
|
||||||
|
|
||||||
|
|
||||||
|
repetition = 0
|
||||||
|
response = {}
|
||||||
|
soup = BeautifulSoup(
|
||||||
|
requests.get('https://theportalwiki.com/wiki/GLaDOS_voice_lines/ru').text,
|
||||||
|
features='html.parser'
|
||||||
|
)
|
||||||
|
for li in soup.find_all('li'):
|
||||||
|
try:
|
||||||
|
i = li.find('i').text
|
||||||
|
url = li.find('span', class_=['audio-player']).find('a')['href']
|
||||||
|
if i not in response.keys():
|
||||||
|
response[i] = url
|
||||||
|
else:
|
||||||
|
repetition += 1
|
||||||
|
except AttributeError:
|
||||||
|
pass
|
||||||
|
try:
|
||||||
|
i = li.find('a').text
|
||||||
|
url = li.find('a')['href']
|
||||||
|
if i not in response.keys():
|
||||||
|
response[i] = url
|
||||||
|
else:
|
||||||
|
repetition += 1
|
||||||
|
except AttributeError:
|
||||||
|
pass
|
||||||
|
print(f'Количество найденный элементов: {len(response)}')
|
||||||
|
print(f'Количество повторении: {repetition}')
|
||||||
|
with open('MyTTSDataset/transcript.txt', 'w') as f:
|
||||||
|
for index, (key, value) in enumerate(response.items()):
|
||||||
|
try:
|
||||||
|
response = requests.get(value)
|
||||||
|
if response.status_code == 200:
|
||||||
|
key = filter_string(key)
|
||||||
|
if key and len(key.replace(" ", "")) > 3:
|
||||||
|
with open(f"MyTTSDataset/wavs/wav{index}.wav", 'wb') as file:
|
||||||
|
file.write(response.content)
|
||||||
|
f.write(f'wav{index}|{key}\n')
|
||||||
|
except requests.exceptions.MissingSchema:
|
||||||
|
pass
|
||||||
|
except requests.exceptions.InvalidSchema:
|
||||||
|
pass
|
||||||
|
|||||||
@@ -0,0 +1,57 @@
|
|||||||
|
import os
|
||||||
|
|
||||||
|
import torch
|
||||||
|
import torchaudio
|
||||||
|
|
||||||
|
|
||||||
|
def load_data(audio_folder):
|
||||||
|
audios = []
|
||||||
|
texts = []
|
||||||
|
for audio_file in os.listdir(audio_folder):
|
||||||
|
if audio_file.endswith('.wav'):
|
||||||
|
audio_path = os.path.join(audio_folder, audio_file)
|
||||||
|
waveform, sample_rate = torchaudio.load(audio_path)
|
||||||
|
text_path = audio_path.replace('.wav', '.txt')
|
||||||
|
with open(text_path) as f:
|
||||||
|
text = f.read().strip()
|
||||||
|
audios.append((waveform, sample_rate))
|
||||||
|
texts.append(text)
|
||||||
|
return audios, texts
|
||||||
|
|
||||||
|
|
||||||
|
def train(model, audios, texts, epochs=3, learning_rate=1e-4):
|
||||||
|
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
|
||||||
|
criterion = torch.nn.MSELoss() # Вам нужно будет настроить эту функцию под вашу задачу
|
||||||
|
|
||||||
|
model.train()
|
||||||
|
for epoch in range(epochs):
|
||||||
|
total_loss = 0
|
||||||
|
for waveform, text in zip(audios, texts):
|
||||||
|
optimizer.zero_grad()
|
||||||
|
# Предполагается, что модель принимает текст и возвращает аудио
|
||||||
|
predicted_waveform = model(text)
|
||||||
|
loss = criterion(predicted_waveform, waveform)
|
||||||
|
loss.backward()
|
||||||
|
optimizer.step()
|
||||||
|
total_loss += loss.item()
|
||||||
|
average_loss = total_loss / len(audios)
|
||||||
|
print(f'Epoch {epoch + 1}: Average Loss = {average_loss}')
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
model_path = 'data/v4_ru.pt'
|
||||||
|
model = torch.load(model_path)
|
||||||
|
model.eval()
|
||||||
|
audio_folder = 'wav_files'
|
||||||
|
audios, texts = load_data(audio_folder)
|
||||||
|
train(model, audios, texts)
|
||||||
|
torch.save(model.state_dict(), 'fine_tuned_model.pth')
|
||||||
|
model.eval()
|
||||||
|
sample_text = "Пример текста для синтеза."
|
||||||
|
with torch.no_grad():
|
||||||
|
generated_waveform = model(sample_text)
|
||||||
|
torchaudio.save('output_audio.wav', generated_waveform, 16000)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
||||||
Reference in New Issue
Block a user