Last active
November 7, 2023 13:53
-
-
Save StanAngeloff/91480fac18a74d8aff3e4cf566cfd0ff to your computer and use it in GitHub Desktop.
x-pyannote.ipynb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"nbformat": 4, | |
"nbformat_minor": 0, | |
"metadata": { | |
"colab": { | |
"provenance": [], | |
"name": "pyannote.ipynb", | |
"gpuType": "A100", | |
"machine_shape": "hm", | |
"include_colab_link": true | |
}, | |
"kernelspec": { | |
"name": "python3", | |
"display_name": "Python 3" | |
}, | |
"language_info": { | |
"name": "python" | |
}, | |
"accelerator": "GPU" | |
}, | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "view-in-github", | |
"colab_type": "text" | |
}, | |
"source": [ | |
"<a href=\"https://colab.research.google.com/gist/StanAngeloff/91480fac18a74d8aff3e4cf566cfd0ff/pyannote.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"import locale\n", | |
"\n", | |
"def getpreferredencoding(do_setlocale = True):\n", | |
" return \"UTF-8\"\n", | |
"\n", | |
"locale.getpreferredencoding = getpreferredencoding" | |
], | |
"metadata": { | |
"id": "rudtRKL9P7KK" | |
}, | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"id": "lbtdzoCjO7DU" | |
}, | |
"outputs": [], | |
"source": [ | |
"!pip install \\\n", | |
" git+https://github.com/pyannote/pyannote-audio.git@7379f1c82be093078354449100e1a84cbdfbafdf \\\n", | |
" git+https://github.com/openai/whisper.git@248b6cb124225dd263bb9bd32d060b6517e067f8 \\\n", | |
" torch==2.0.0+cu118 torchvision==0.15.1+cu118 torchaudio==2.0.1 \\\n", | |
" --extra-index-url https://download.pytorch.org/whl/cu118" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"import torch\n", | |
"\n", | |
"torch.cuda.is_available()" | |
], | |
"metadata": { | |
"id": "J_Ss89GhRlvq" | |
}, | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"from google.colab import drive\n", | |
"\n", | |
"drive.mount('/content/gdrive')" | |
], | |
"metadata": { | |
"id": "10-Kt8ghQFK1" | |
}, | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"from huggingface_hub import notebook_login\n", | |
"\n", | |
"notebook_login()" | |
], | |
"metadata": { | |
"id": "KOvvVyKSPJck" | |
}, | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"import soundfile as sf\n", | |
"\n", | |
"sf.available_formats()" | |
], | |
"metadata": { | |
"id": "oNpve47cPJ9n" | |
}, | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "markdown", | |
"source": [ | |
"---" | |
], | |
"metadata": { | |
"id": "75IRpeQnxJYW" | |
} | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"import whisper\n", | |
"import torch\n", | |
"\n", | |
"device = torch.device(\"cuda\")\n", | |
"\n", | |
"model = whisper.load_model(\"large\", device=device)" | |
], | |
"metadata": { | |
"id": "x0CLqnvDwg8P" | |
}, | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"from pyannote.audio import Pipeline\n", | |
"from pyannote.audio.pipelines.utils.hook import ProgressHook\n", | |
"\n", | |
"#device = torch.device(\"cuda\")\n", | |
"\n", | |
"speaker_diarization = Pipeline.from_pretrained(\n", | |
" \"pyannote/[email protected]\",\n", | |
" use_auth_token=True\n", | |
")\n", | |
"\n", | |
"speaker_diarization.to(device)" | |
], | |
"metadata": { | |
"id": "3rKnEyruPKwu" | |
}, | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"!nvidia-smi" | |
], | |
"metadata": { | |
"id": "yzH_lXFOv2ur" | |
}, | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"!cp \"/content/gdrive/MyDrive/Recordings/Day 1/Session 1.ogg\" /content/target.ogg" | |
], | |
"metadata": { | |
"id": "CHGdyOCFxCiQ" | |
}, | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "markdown", | |
"source": [ | |
"---" | |
], | |
"metadata": { | |
"id": "05L9PUV9xb8k" | |
} | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"with ProgressHook() as hook:\n", | |
" who_speaks_when = speaker_diarization(\n", | |
" \"/content/target.ogg\",\n", | |
" num_speakers=2,\n", | |
" #min_speakers=5,\n", | |
" #max_speakers=9,\n", | |
" hook=hook\n", | |
" )" | |
], | |
"metadata": { | |
"id": "PEH4cv5VPMfC" | |
}, | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"from pyannote.core import Segment\n", | |
"from pyannote.audio import Audio\n", | |
"\n", | |
"speakers = who_speaks_when.rename_labels({\n", | |
" #\"SPEAKER_00\": \"David\",\n", | |
" #\"SPEAKER_01\": \"Stan\",\n", | |
"})\n", | |
"\n", | |
"crop = Segment(0, 999999999)\n", | |
"#crop = Segment(5 * 60.0, 10 * 60.0)\n", | |
"audio = Audio(sample_rate=16000, mono=\"downmix\")\n", | |
"\n", | |
"def float_to_timestamp(float_time):\n", | |
" hours, remainder = divmod(float_time, 3600)\n", | |
" minutes, seconds = divmod(remainder, 60)\n", | |
" return \"{:02}:{:02}:{:04.1f}\".format(int(hours), int(minutes), seconds)\n", | |
"\n", | |
"for segment, _, speaker in speakers.crop(crop).itertracks(yield_label=True):\n", | |
" waveform, sample_rate = audio.crop(\"/content/target.ogg\", segment)\n", | |
" text = model.transcribe(waveform.squeeze().numpy(), language=\"en\", initial_prompt=\"\"\"\n", | |
"A recorded conversation between Company A consisting of Stan & David and Client consisting of … discussing a new project requirements and demonstrating … and current standard operating procedures in …, including Campaign Planning and Digital Marketing, Customer Service, IT Process, Finance.\n", | |
"\"\"\".strip())[\"text\"]\n", | |
" print(f\"{float_to_timestamp(segment.start)}-{float_to_timestamp(segment.end)} {speaker.strip()}: {text.strip()}\")" | |
], | |
"metadata": { | |
"id": "JbC9Qj0k5fuF" | |
}, | |
"execution_count": null, | |
"outputs": [] | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Here's a tutorial on how to use GPT-4 for analyzing and generating detailed reports from the transcribed notes of lengthy meetings.
Begin by setting up GPT-4 with the following prompt, which instructs the AI to analyze the meeting notes meticulously:
The output generated will be the detailed meeting minutes. For effective processing, consider breaking down the input content and feed approximately 3000 tokens at a time into GPT-4. This approach helps prevent the loss of essential information.
After you have the comprehensive minutes, compile them for further analysis. You can then instruct GPT-4 to summarize high-level topics, as shown in the following prompt:
GPT-4's output here should offer a solid, categorical list of topics. Use your judgement to refine this list further by combining similar items and restructuring the list for conciseness.
Once you have the high-level overview, proceed to create in-depth reports for each topic using the unchanged initial detailed meeting report. For this, employ the following prompt structure for each topic:
When working topic by topic, it's advisable not to ask GPT-4 to handle more than two items simultaneously as it may struggle to maintain focus. One topic at a time is typically the most manageable approach.
When you're done with the process, you're going to end up with a report that translates your meeting's chaos into a neatly organized playbook of what's next. Basically, it's like having the ultimate meeting notes that don't just remind you what was said, but what you gotta do about it.