Created
September 12, 2023 12:43
-
-
Save brunosan/e31a9ca284ee68d72ed0c185c6923bf2 to your computer and use it in GitHub Desktop.
Summary a long transcript using OpenAI API
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import openai | |
def recursive_summarization(text, chunk_size=32000, summary_length=20000, final_length=2000): | |
openai.api_key = "ADD YOURS" | |
print("Starting summarization...") | |
if len(text) <= final_length: | |
print("Text is short enough. No summarization needed.") | |
return text | |
summaries = [] | |
print(f"Splitting text into chunks of size {chunk_size}...") | |
text_chunks = [text[i:i + chunk_size] for i in range(0, len(text), chunk_size)] | |
for i, chunk in enumerate(text_chunks): | |
print(f"Summarizing chunk {i+1}/{len(text_chunks)}...") | |
prompt = f"The following is a chunk of a long transcript of a meeting, or summary of it. Make a narrative technical and dense one pager note with key points, insights, plans, challenges and takeaway. Use your knowledge to weight relevance. Use jargon to make it as dense, detailed and short as possible, while capturing all neccesary details. Assume we are experts in all fields" | |
response = openai.ChatCompletion.create( | |
model="gpt-4-0613", | |
messages=[ | |
{"role":"system","content":prompt}, | |
{"role":"user","content":chunk} | |
] | |
) | |
r=response.choices[0].message.content | |
print(r) | |
summaries.append(r) | |
print("Concatenating chunk summaries...") | |
concatenated_summary = " ".join(summaries) | |
if len(concatenated_summary.split()) > final_length: | |
print("Concatenated summary is too long. Recursing...") | |
return recursive_summarization(concatenated_summary, chunk_size, summary_length, final_length) | |
print("Final summary ready.") | |
return concatenated_summary | |
def main(): | |
print("Reading transcript from file...") | |
with open("transcript.txt", "r") as f: | |
long_text = f.read() | |
final_summary = recursive_summarization(long_text ) | |
print("Writing final summary to file...") | |
with open("summary.txt", "w") as f: | |
f.write(final_summary) | |
print("Done.") | |
if __name__ == "__main__": | |
main() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment