{ "cells": [ { "cell_type": "markdown", "id": "18662496-bb36-45a6-99ab-b2f0b91eb534", "metadata": {}, "source": [ "## Suno Demo\n", "\n", "Copy-Pasted from: https://colab.research.google.com/drive/1dWWkZzvu7L9Bunq9zvD-W02RFUXoW-Pd?usp=sharing#scrollTo=68QtoUqPWdLk\n" ] }, { "cell_type": "code", "execution_count": null, "id": "73cf9a1c-c6d4-492f-9d3d-8b19466e6014", "metadata": {}, "outputs": [], "source": [ "#!pip3 install optimum\n", "#!pip install -U flash-attn --no-build-isolation" ] }, { "cell_type": "code", "execution_count": 12, "id": "f6c0c08d-b1b7-479c-ae10-bd126d925bcd", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "The class `optimum.bettertransformers.transformation.BetterTransformer` is deprecated and will be removed in a future release.\n", "The BetterTransformer implementation does not support padding during training, as the fused kernels do not support attention masks. Beware that passing padded batched data during training may result in unexpected outputs. Please refer to https://huggingface.co/docs/optimum/bettertransformer/overview for more details.\n" ] } ], "source": [ "from transformers import BarkModel, AutoProcessor\n", "import torch\n", "\n", "device = \"cuda:3\"\n", "\n", "processor = AutoProcessor.from_pretrained(\"suno/bark\")\n", "\n", "#model = model.to_bettertransformer()\n", "#model = BarkModel.from_pretrained(\"suno/bark\", torch_dtype=torch.float16, attn_implementation=\"flash_attention_2\").to(device)\n", "model = BarkModel.from_pretrained(\"suno/bark\", torch_dtype=torch.float16).to(device).to_bettertransformer()" ] }, { "cell_type": "code", "execution_count": null, "id": "1f92997d-17d8-41c5-867d-fd5f7d94853f", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n", "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n" ] } ], "source": [ "# prepare the inputs\n", "text_prompt = \"Let's try generating speech, with Bark, a text-to-speech model\"\n", "inputs = processor(text_prompt)\n", "\n", "# generate speech\n", "speech_output = model.generate(**inputs.to(device))" ] }, { "cell_type": "code", "execution_count": null, "id": "76834cbb-66fb-443e-ad7b-54fd4fb87007", "metadata": {}, "outputs": [], "source": [ "from IPython.display import Audio\n", "\n", "sampling_rate = model.generation_config.sample_rate\n", "Audio(speech_output[0].cpu().numpy(), rate=sampling_rate)" ] }, { "cell_type": "code", "execution_count": null, "id": "94c95582-49cf-419d-89bf-51e2e4e04379", "metadata": {}, "outputs": [], "source": [ "voice_preset = \"v2/en_speaker_6\"\n", "\n", "# prepare the inputs\n", "text_prompt = \"[laughter] Let's try generating speech, with Bark, a text-to-speech model\"\n", "inputs = processor(text_prompt, voice_preset=voice_preset)\n", "\n", "# generate speech\n", "speech_output = model.generate(**inputs.to(device))\n", "\n", "# let's hear it\n", "Audio(speech_output[0].cpu().numpy(), rate=sampling_rate)" ] }, { "cell_type": "code", "execution_count": null, "id": "330d02ed-abfe-4fcc-9858-c12f9add3ce5", "metadata": {}, "outputs": [], "source": [ "#for some reason this always gets stuck in a loop\n", "speech_output = model.generate(**inputs, num_beams = 4, temperature = 0.5, semantic_temperature = 0.5)\n", "\n", "speech_output = model.generate(**inputs, temperature = 0.5, semantic_temperature = 0.5)\n", "\n", "Audio(speech_output[0].cpu().numpy(), rate=sampling_rate)" ] }, { "cell_type": "markdown", "id": "e2cd65ba-69ec-4ee3-be6e-c88b9f7c3f9e", "metadata": {}, "source": [ "## To save" ] }, { "cell_type": "code", "execution_count": null, "id": "a8839e82-79eb-4f83-8524-44ad279b593f", "metadata": {}, "outputs": [], "source": [ "import scipy\n", "\n", "scipy.io.wavfile.write(\"bark_out.wav\", rate=sampling_rate, data=speech_output[0].cpu().numpy())" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.5" } }, "nbformat": 4, "nbformat_minor": 5 }