{ "cells": [ { "cell_type": "markdown", "id": "18662496-bb36-45a6-99ab-b2f0b91eb534", "metadata": {}, "source": [ "## Suno Demo\n", "\n", "Copy-Pasted from: https://colab.research.google.com/drive/1dWWkZzvu7L9Bunq9zvD-W02RFUXoW-Pd?usp=sharing#scrollTo=68QtoUqPWdLk\n" ] }, { "cell_type": "code", "execution_count": 2, "id": "f6c0c08d-b1b7-479c-ae10-bd126d925bcd", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "6bcc5df3f0fc46e69824f2f194c3b805", "version_major": 2, "version_minor": 0 }, "text/plain": [ "config.json: 0%| | 0.00/8.81k [00:00\n", " \n", " Your browser does not support the audio element.\n", " \n", " " ], "text/plain": [ "" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from IPython.display import Audio\n", "\n", "sampling_rate = model.generation_config.sample_rate\n", "Audio(speech_output[0].cpu().numpy(), rate=sampling_rate)" ] }, { "cell_type": "code", "execution_count": 9, "id": "74af0e3c-a34c-4477-b917-5c62c0ddaa94", "metadata": {}, "outputs": [], "source": [ "import scipy\n", "\n", "scipy.io.wavfile.write(\"bark_out.wav\", rate=sampling_rate, data=speech_output[0].cpu().numpy())" ] }, { "cell_type": "code", "execution_count": 11, "id": "94c95582-49cf-419d-89bf-51e2e4e04379", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n", "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n" ] }, { "data": { "text/html": [ "\n", " \n", " " ], "text/plain": [ "" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "voice_preset = \"v2/en_speaker_6\"\n", "\n", "# prepare the inputs\n", "text_prompt = \"Let's try generating speech, with Bark, a text-to-speech model\"\n", "inputs = processor(text_prompt, voice_preset=voice_preset)\n", "\n", "# generate speech\n", "speech_output = model.generate(**inputs.to(device))\n", "\n", "# let's hear it\n", "Audio(speech_output[0].cpu().numpy(), rate=sampling_rate)" ] }, { "cell_type": "code", "execution_count": 12, "id": "330d02ed-abfe-4fcc-9858-c12f9add3ce5", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n", "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n" ] }, { "data": { "text/html": [ "\n", " \n", " " ], "text/plain": [ "" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "speech_output = model.generate(**inputs, num_beams = 4, temperature = 0.5, semantic_temperature = 0.8)\n", "\n", "Audio(speech_output[0].cpu().numpy(), rate=sampling_rate)" ] }, { "cell_type": "code", "execution_count": null, "id": "a8839e82-79eb-4f83-8524-44ad279b593f", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.5" } }, "nbformat": 4, "nbformat_minor": 5 }