Skip to content

Instantly share code, notes, and snippets.

@shpaker
Last active March 10, 2025 08:42
Show Gist options
  • Save shpaker/ad563fbaecaf2b07817013449a1a0994 to your computer and use it in GitHub Desktop.
Save shpaker/ad563fbaecaf2b07817013449a1a0994 to your computer and use it in GitHub Desktop.
This script retrieves and saves information about MongoDB databases, collections, indexes, and explicitly defined schemas
# This script retrieves and saves information about MongoDB databases,
# collections, indexes, and explicitly defined schemas (if available) to an output file.
#
# Author: Aleksandr Shpak
# GitHub: https://github.com/shpaker
# Telegram: https://t.me/g33ks (Russian-language tech channel by the author)
#
# Installation:
# Ensure you have Python 3 and install the required packages:
# pip3 install click pymongo -U
#
# Usage:
# Run the script with a MongoDB DSN (Data Source Name):
# python3 pymongoinfo.py mongodb://root:secret-pass@hostname:port
#
# The output is saved to 'output.json' in the execution directory.
import json
from pathlib import Path
import click
from pymongo import MongoClient
from pymongo.errors import OperationFailure
def _get_databases(client):
"""
Yields database names and metadata available on the MongoDB server.
:param client: MongoClient instance
"""
for database in client.list_databases():
yield database['name'], database
def _get_collections(database):
"""
Yields collection names within a given database.
:param database: A pymongo database instance
"""
for collection in database.list_collection_names():
yield collection
def _get_indexes(collection):
"""
Yields index metadata associated with a collection.
:param collection: A pymongo collection instance
"""
for index in collection.list_indexes():
yield index
def _get_explicit_schema(database, collection_name):
"""
Retrieves explicitly defined schema from MongoDB collection options.
:param database: A pymongo database instance
:param collection_name: Name of the collection
"""
try:
options = database.command('listCollections', filter={"name": collection_name})
for col in options.get("cursor", {}).get("firstBatch", []):
schema = col.get("options", {}).get("validator", {}).get("$jsonSchema")
if schema:
yield schema
except OperationFailure:
return
@click.command()
@click.argument(
'mongodb_dsn',
required=True,
)
@click.option(
'--output', '-o',
required=True,
default='output.json',
show_default=True,
type=click.Path(
resolve_path=True,
dir_okay=False,
path_type=Path,
),
)
def main(mongodb_dsn: str, output: Path):
"""
Main function that connects to MongoDB, gathers metadata, and writes it to a JSON file.
USAGE EXAMPLE:
python3 pymongoinfo.py mongodb://root:secret-pass@hostname:port
"""
client = MongoClient(mongodb_dsn)
# Retrieve and display MongoDB server version
server_info = client.server_info()
mongo_version = server_info.get('version', 'Unknown')
click.secho(f'MongoDB Server Version: {mongo_version}', fg='green', bold=True)
click.secho('Reading database information...', fg='cyan')
result_dict = {'mongo_version': mongo_version}
for db, db_meta in _get_databases(client):
click.secho(f'Found database: {db}', fg='yellow')
result_dict[db] = db_meta
result_dict[db]['collection'] = {}
try:
for col in _get_collections(client[db]):
click.secho(f' Found collection: {col}', fg='blue')
collection_obj = client[db][col]
indexes = list(_get_indexes(collection_obj))
schema = next(_get_explicit_schema(client[db], col), None)
result_dict[db]['collection'][col] = {
'indexes': indexes,
'schema': schema if schema else "No explicit schema"
}
if indexes:
click.secho(f' Indexes found:', fg='magenta')
for index in indexes:
click.secho(f' {index}', fg='magenta')
if schema:
click.secho(f' Explicit Schema detected:', fg='cyan')
click.secho(json.dumps(schema, indent=2), fg='cyan')
except OperationFailure:
click.secho(f' Skipping database: {db} (Access Denied)', fg='red')
result_str = json.dumps(result_dict, indent=2, sort_keys=True)
click.secho(f'Writing results to {output}', fg='green')
with open(output, 'w') as fh:
fh.write(result_str)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment