Skip to content

Instantly share code, notes, and snippets.

@vncsna
Last active November 19, 2024 21:51
Show Gist options
  • Save vncsna/c21a91e458c8953a9a1aefd8df5226ff to your computer and use it in GitHub Desktop.
Save vncsna/c21a91e458c8953a9a1aefd8df5226ff to your computer and use it in GitHub Desktop.
Save aws logs to a duckdb
import subprocess
import IPython
import duckdb
def main(log_group):
def process_log_entry(timestamp, log_group, log_entry):
conn.execute(
"INSERT INTO cloudwatch_logs (timestamp, log_group, log_entry) VALUES (?, ?, ?)",
[timestamp, log_group, log_entry],
)
conn = duckdb.connect('cloudwatch_logs.db')
conn.execute(
"""
CREATE TABLE IF NOT EXISTS cloudwatch_logs (
timestamp TIMESTAMP,
log_group TEXT,
log_entry TEXT
)
"""
)
process = subprocess.Popen(
['aws', 'logs', 'tail', log_group, '--follow', '--output', 'json', '--since', '15d'],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
universal_newlines=True
)
for line in process.stdout:
try:
timestamp, log_group, log_entry = line.strip().split(maxsplit=2)
process_log_entry(timestamp, log_group, log_entry)
IPython.display.clear_output()
print(timestamp)
except:
continue
for line in process.stderr:
print(line)
process.kill()
if __name__ == "__main__":
stream_logs_to_duckdb("<log-group-name>")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment