Last active
June 4, 2025 13:20
-
-
Save davidefiocco/de5beb4ecd60c91e66815b961bdc9709 to your computer and use it in GitHub Desktop.
Generate team AI digest using smolagents and Exa search
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
from smolagents import OpenAIServerModel | |
from smolagents import CodeAgent, VisitWebpageTool, FinalAnswerTool | |
from smolagents.tools import tool, Tool | |
from datetime import datetime | |
import os | |
EXA_API_KEY = os.getenv("EXA_API_KEY") | |
HF_TOKEN = os.getenv("HF_TOKEN") | |
model = OpenAIServerModel(model_id="gpt-4.1") | |
class ExaSearchTool(Tool): | |
name = "exa_search" | |
description = "Search the web for the given query using Exa with date filtering and text extraction capabilities." | |
inputs = { | |
"query": { | |
"type": "string", | |
"description": "The search query to perform." | |
}, | |
"start_published_date": { | |
"type": "string", | |
"description": "The start date of the search (YYYY-MM-DD format)." | |
}, | |
"end_published_date": { | |
"type": "string", | |
"description": "The end date of the search (YYYY-MM-DD format)." | |
}, | |
"extract_text": { | |
"type": "boolean", | |
"description": "Whether to extract text from the search results pages.", | |
"nullable": True | |
} | |
} | |
output_type = "string" | |
def __init__(self, api_key: str = None, **kwargs): | |
super().__init__(**kwargs) | |
self.api_key = api_key | |
if self.api_key is None: | |
raise ValueError("Missing API key. Make sure you have 'EXA_API_KEY' in your env variables or pass it as api_key parameter.") | |
try: | |
from exa_py import Exa | |
except ImportError as e: | |
raise ImportError( | |
"You must install `exa_py` to run this tool: for instance run `pip install exa_py`." | |
) from e | |
self.exa = Exa(api_key=self.api_key) | |
def forward(self, query: str, start_published_date: str, end_published_date: str, extract_text: bool = True) -> str: | |
"""Search the web using Exa API with date filtering and text extraction. | |
Args: | |
query (str): The search query. | |
start_published_date (str): The start date of the search. | |
end_published_date (str): The end date of the search. | |
extract_text (bool): Whether to extract text from the search results pages. | |
Returns: | |
str: The formatted search results. | |
""" | |
try: | |
result = self.exa.search_and_contents( | |
query, | |
text=extract_text, | |
type="auto", | |
start_published_date=start_published_date, | |
end_published_date=end_published_date | |
) | |
return result | |
except Exception as e: | |
return f"Error performing Exa search: {str(e)}" | |
exa_search_tool = ExaSearchTool(api_key=EXA_API_KEY) | |
@tool | |
def write_markdown_to_file_tool(file_name: str, content: str) -> str: | |
"""Write the given markdown content to an output file. | |
Args: | |
file_name (str): The name of the file to write to. | |
content (str): The content to write to the file. | |
""" | |
with open(file_name, "w") as f: | |
f.write(content) | |
return f"File {file_name} has been written successfully." | |
agent = CodeAgent( | |
tools=[ | |
VisitWebpageTool(), | |
write_markdown_to_file_tool, | |
exa_search_tool, | |
FinalAnswerTool() | |
], | |
model=model, | |
max_steps=25, | |
verbosity_level=1 | |
) | |
prompt = f"""Perform research looking for updates about AI developments in the last 14 days relevant for practitioners working in the realm of (scholarly/scientific) document classification, (scholarly) recommender systems, and applications of generative AI. | |
Relevant material for this research could be: | |
- ArXiv articles | |
- GitHub repositories | |
- YouTube videos/tutorials | |
- AI-related blog posts | |
- AI conferences (soon upcoming or in progress) | |
- AI courses | |
- Product launches from frontier AI labs (e.g. AllenAI) | |
- General news about AI developments (e.g. visiting https://news.smol.ai/) | |
Stick to a list of items and their descriptions, without adding extra text/summarization/takeaways. | |
Put them together in a markdown format and export it as a WeeklyML-update-<YYYY-MM-DD>.md file, and include links/sources whenever it is appropriate. | |
Time now is {datetime.now()}""" | |
result = agent.run(prompt) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment