Skip to content

Instantly share code, notes, and snippets.

@itsanishjain
Last active November 27, 2024 13:22
Show Gist options
  • Save itsanishjain/b696b7c4ede0f38004d956d6e7283e28 to your computer and use it in GitHub Desktop.
Save itsanishjain/b696b7c4ede0f38004d956d6e7283e28 to your computer and use it in GitHub Desktop.
SIPs Ideas
- Idea Name: Cybersecurity for AI Companies
- Idea Title: High-Level Security for AI Weights
- Description: This idea focuses on providing cybersecurity solutions for AI companies to protect their valuable digital assets from theft or espionage. The proposed solution is to build high-level security for AI weights, similar to Chainalysis but for AI.
- Categories: Cybersecurity, Artificial Intelligence
- Industry: Technology, Security
- Tool Suggestions to build: AI-based cybersecurity tools
- MVP Plan: Develop a prototype for AI weight security, test it with a few AI companies, and iterate based on feedback.
- Idea Name: Moon-Focused Space Startups
- Idea Title: Moon Tourism, Mining, and Asset Placement
- Description: This idea explores the opportunities in moon-focused space startups, emphasizing the potential for moon tourism, mining, and asset placement, leveraging SpaceX's falling launch costs.
- Categories: Space Exploration, Tourism, Mining
- Industry: Aerospace
- Tool Suggestions to build: Spacecrafts, Mining equipment
- MVP Plan: Partner with SpaceX or similar companies to test the feasibility of moon tourism or mining.
- Idea Name: Kid-Friendly Smartphone Alternatives
- Idea Title: Limited-Feature Phone for Kids
- Description: This idea proposes a kid-friendly smartphone alternative that caters to parents' desire for device control and kids' need for connectivity, offering a limited-feature phone with basic communication and learning tools.
- Categories: Technology, Education
- Industry: Consumer Electronics
- Tool Suggestions to build: Limited-feature smartphone
- MVP Plan: Design and develop a prototype, test it with a small group of kids and parents, and iterate based on feedback.
- Idea Name: Humanoid Military Robots
...
- Categories: Product Design, Consumer Goods
- Industry: Manufacturing, Retail
- Tool Suggestions to build: Design software, 3D printers
- MVP Plan: Design a few products with fun and quirky elements, test them in the market, and iterate based on feedback.
Output is truncated. View as a scrollable element or open in a text editor. Adjust cell output settings...
from dotenv import load_dotenv
import os
from langchain_community.document_loaders import TextLoader
from langchain import OpenAI
from langchain import PromptTemplate
load_dotenv()
openai_api_key = os.getenv('OPENAI_API_KEY', 'YourAPIKey')
# Load the text file
loader = TextLoader("4 $1 Billion Startup Ideas.text")
pages = loader.load()
# Combine the pages, and replace the tabs with spaces
text = ""
for page in pages:
text += page.page_content
text = text.replace('\t', ' ')
llm = OpenAI(temperature=0, openai_api_key=openai_api_key)
num_tokens = llm.get_num_tokens(text)
print (f"This book has {num_tokens} tokens in it")
# Loaders
from langchain.schema import Document
# Splitters
from langchain.text_splitter import RecursiveCharacterTextSplitter
# Model
from langchain.chat_models import ChatOpenAI
# Embedding Support
from langchain.vectorstores import FAISS
from langchain.embeddings import OpenAIEmbeddings
# Summarizer we'll use for Map Reduce
from langchain.chains.summarize import load_summarize_chain
# Data Science
import numpy as np
from sklearn.cluster import KMeans
text_splitter = RecursiveCharacterTextSplitter(separators=["\n\n", "\n", "\t"], chunk_size=10000, chunk_overlap=3000)
docs = text_splitter.create_documents([text])
num_documents = len(docs)
print (f"Now our book is split up into {num_documents} documents")
embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key)
vectors = embeddings.embed_documents([x.page_content for x in docs])
# Assuming 'embeddings' is a list or array of 1536-dimensional embeddings
# Choose the number of clusters, this can be adjusted based on the book's content.
# I played around and found ~10 was the best.
# Usually if you have 10 passages from a book you can tell what it's about
num_clusters = 4
# Perform K-means clustering
kmeans = KMeans(n_clusters=num_clusters, random_state=42).fit(vectors)
# Find the closest embeddings to the centroids
# Create an empty list that will hold your closest points
closest_indices = []
# Loop through the number of clusters you have
for i in range(num_clusters):
# Get the list of distances from that particular cluster center
distances = np.linalg.norm(vectors - kmeans.cluster_centers_[i], axis=1)
# Find the list position of the closest one (using argmin to find the smallest distance)
closest_index = np.argmin(distances)
# Append that position to your closest indices list
closest_indices.append(closest_index)
selected_indices = sorted(closest_indices)
llm3 = ChatOpenAI(temperature=0,
openai_api_key=openai_api_key,
max_tokens=1000,
model='gpt-3.5-turbo'
)
map_prompt = """
You will be given a single passage of a book. This section will be enclosed in triple backticks (```)
Your goal is to give a summary of this section so that a reader will have a full understanding of what happened.
Your response should be at least three paragraphs and fully encompass what was said in the passage.
```{text}```
FULL SUMMARY:
"""
map_prompt_template = PromptTemplate(template=map_prompt, input_variables=["text"])
map_chain = load_summarize_chain(llm=llm3,
chain_type="stuff",
prompt=map_prompt_template)
selected_docs = [docs[doc] for doc in selected_indices]
# Make an empty list to hold your summaries
summary_list = []
# Loop through a range of the lenght of your selected docs
for i, doc in enumerate(selected_docs):
# Go get a summary of the chunk
chunk_summary = map_chain.run([doc])
# Append that summary to your list
summary_list.append(chunk_summary)
print (f"Summary #{i} (chunk #{selected_indices[i]}) - Preview: {chunk_summary[:250]} \n")
summaries = "\n".join(summary_list)
# Convert it back to a document
summaries = Document(page_content=summaries)
print (f"Your total summary has {llm.get_num_tokens(summaries.page_content)} tokens")
llm4 = ChatOpenAI(temperature=0,
openai_api_key=openai_api_key,
max_tokens=3000,
model='gpt-4',
request_timeout=120
)
extract_prompt = """
You will be given a transcription of a podcast where smart business professionals discuss innovative business ideas, strategies, and niche stories.
Your goal is to extract structured information that highlights the core business ideas and plans discussed. Ensure the output is concise and accurate, summarizing key actionable insights.
The transcription will be enclosed in triple backticks (```).
Use the following format for your response:
- Idea Name:
- Idea Title:
- Description:
- Categories:
- Industry:
- Tool Suggestions to build:
- MVP Plan:
```{text}```
STRUCTURED INFORMATION:
"""
extract_prompt_template = PromptTemplate(template=extract_prompt, input_variables=["text"])
reduce_chain = load_summarize_chain(llm=llm4,
chain_type="stuff",
prompt=extract_prompt_template,
# verbose=True # Set this to true if you want to see the inner workings
)
output = reduce_chain.run([summaries])
print (output)
### Source langchain-tutorials
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment