fix download dataset

add test download dataset
vrem ton working
2024-05-18 19:56:30 +07:00 · 2024-05-18 11:10:43 +07:00 · 2024-05-05 19:56:01 +07:00
6 changed files with 2801 additions and 17 deletions
@@ -8,6 +8,9 @@ __pycache__/
 # Custom
 data/model_small/
 data/model_large/
 data/v4_ru.pt
 MyTTSDataset/
 vocal.wav
 # C extensions
 *.so
@@ -76,5 +76,6 @@ home_assistant_execute:
  - включи телевизор
  - выключи телевизор
  - начни уборку
  - убрать мою комнату
 home_assistant_get:
  - тест
@@ -6,7 +6,7 @@ authors = ["dmitrium12 <belicdima8@gmail.com>"]
 readme = "README.md"
 [tool.poetry.dependencies]
-python = "^3.11"
+python = ">=3.11,<3.12"
 vosk = "^0.3.45"
 pvporcupine = "^3.0.1"
 pvrecorder = "^1.2.1"
@@ -28,6 +28,8 @@ ollama = "^0.1.6"
 ruff = "^0.4.2"
 noisereduce = "^3.0.2"
 environs = "^11.0.0"
 webrtcvad = "^2.0.10"
 tts = "^0.22.0"
 [[tool.poetry.source]]
@@ -0,0 +1,55 @@
 import re
 import requests
 from bs4 import BeautifulSoup
 def filter_string(input_string: str) -> str:
    allowed_chars = []
    for j in "АБВГДЕЁЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдеёжзийклмнопрстуфхцчшщъыьэюя1234567890 !,.?-":
        allowed_chars.append(j)
    input_string = re.sub(r'^\d+.\s+', '', input_string)
    return ''.join([char for char in input_string if char in allowed_chars])
 repetition = 0
 response = {}
 soup = BeautifulSoup(
    requests.get('https://theportalwiki.com/wiki/GLaDOS_voice_lines/ru').text,
    features='html.parser'
 )
 for li in soup.find_all('li'):
    try:
        i = li.find('i').text
        url = li.find('span', class_=['audio-player']).find('a')['href']
        if i not in response.keys():
            response[i] = url
        else:
            repetition += 1
    except AttributeError:
        pass
    try:
        i = li.find('a').text
        url = li.find('a')['href']
        if i not in response.keys():
            response[i] = url
        else:
            repetition += 1
    except AttributeError:
        pass
 print(f'Количество найденный элементов: {len(response)}')
 print(f'Количество повторении: {repetition}')
 with open('MyTTSDataset/transcript.txt', 'w') as f:
    for index, (key, value) in enumerate(response.items()):
        try:
            response = requests.get(value)
            if response.status_code == 200:
                key = filter_string(key)
                if key and len(key.replace(" ", "")) > 3:
                    with open(f"MyTTSDataset/wavs/wav{index}.wav", 'wb') as file:
                        file.write(response.content)
                    f.write(f'wav{index}|{key}\n')
        except requests.exceptions.MissingSchema:
            pass
        except requests.exceptions.InvalidSchema:
            pass
@@ -0,0 +1,57 @@
 import os
 import torch
 import torchaudio
 def load_data(audio_folder):
    audios = []
    texts = []
    for audio_file in os.listdir(audio_folder):
        if audio_file.endswith('.wav'):
            audio_path = os.path.join(audio_folder, audio_file)
            waveform, sample_rate = torchaudio.load(audio_path)
            text_path = audio_path.replace('.wav', '.txt')
            with open(text_path) as f:
                text = f.read().strip()
            audios.append((waveform, sample_rate))
            texts.append(text)
    return audios, texts
 def train(model, audios, texts, epochs=3, learning_rate=1e-4):
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    criterion = torch.nn.MSELoss()  # Вам нужно будет настроить эту функцию под вашу задачу
    model.train()
    for epoch in range(epochs):
        total_loss = 0
        for waveform, text in zip(audios, texts):
            optimizer.zero_grad()
            # Предполагается, что модель принимает текст и возвращает аудио
            predicted_waveform = model(text)
            loss = criterion(predicted_waveform, waveform)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        average_loss = total_loss / len(audios)
        print(f'Epoch {epoch + 1}: Average Loss = {average_loss}')
 def main():
    model_path = 'data/v4_ru.pt'
    model = torch.load(model_path)
    model.eval()
    audio_folder = 'wav_files'
    audios, texts = load_data(audio_folder)
    train(model, audios, texts)
    torch.save(model.state_dict(), 'fine_tuned_model.pth')
    model.eval()
    sample_text = "Пример текста для синтеза."
    with torch.no_grad():
        generated_waveform = model(sample_text)
        torchaudio.save('output_audio.wav', generated_waveform, 16000)
 if __name__ == '__main__':
    main()
Author	SHA1	Message	Date
Dmitrium12	727376f495	fix download dataset	2024-05-18 19:56:30 +07:00
Dmitrium12	9b7ca85831	add test download dataset	2024-05-18 11:10:43 +07:00
Dmitrium12	197dc50529	vrem ton working	2024-05-05 19:56:01 +07:00