Last active
March 10, 2025 08:42
-
-
Save shpaker/ad563fbaecaf2b07817013449a1a0994 to your computer and use it in GitHub Desktop.
This script retrieves and saves information about MongoDB databases, collections, indexes, and explicitly defined schemas
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# This script retrieves and saves information about MongoDB databases, | |
# collections, indexes, and explicitly defined schemas (if available) to an output file. | |
# | |
# Author: Aleksandr Shpak | |
# GitHub: https://github.com/shpaker | |
# Telegram: https://t.me/g33ks (Russian-language tech channel by the author) | |
# | |
# Installation: | |
# Ensure you have Python 3 and install the required packages: | |
# pip3 install click pymongo -U | |
# | |
# Usage: | |
# Run the script with a MongoDB DSN (Data Source Name): | |
# python3 pymongoinfo.py mongodb://root:secret-pass@hostname:port | |
# | |
# The output is saved to 'output.json' in the execution directory. | |
import json | |
from pathlib import Path | |
import click | |
from pymongo import MongoClient | |
from pymongo.errors import OperationFailure | |
def _get_databases(client): | |
""" | |
Yields database names and metadata available on the MongoDB server. | |
:param client: MongoClient instance | |
""" | |
for database in client.list_databases(): | |
yield database['name'], database | |
def _get_collections(database): | |
""" | |
Yields collection names within a given database. | |
:param database: A pymongo database instance | |
""" | |
for collection in database.list_collection_names(): | |
yield collection | |
def _get_indexes(collection): | |
""" | |
Yields index metadata associated with a collection. | |
:param collection: A pymongo collection instance | |
""" | |
for index in collection.list_indexes(): | |
yield index | |
def _get_explicit_schema(database, collection_name): | |
""" | |
Retrieves explicitly defined schema from MongoDB collection options. | |
:param database: A pymongo database instance | |
:param collection_name: Name of the collection | |
""" | |
try: | |
options = database.command('listCollections', filter={"name": collection_name}) | |
for col in options.get("cursor", {}).get("firstBatch", []): | |
schema = col.get("options", {}).get("validator", {}).get("$jsonSchema") | |
if schema: | |
yield schema | |
except OperationFailure: | |
return | |
@click.command() | |
@click.argument( | |
'mongodb_dsn', | |
required=True, | |
) | |
@click.option( | |
'--output', '-o', | |
required=True, | |
default='output.json', | |
show_default=True, | |
type=click.Path( | |
resolve_path=True, | |
dir_okay=False, | |
path_type=Path, | |
), | |
) | |
def main(mongodb_dsn: str, output: Path): | |
""" | |
Main function that connects to MongoDB, gathers metadata, and writes it to a JSON file. | |
USAGE EXAMPLE: | |
python3 pymongoinfo.py mongodb://root:secret-pass@hostname:port | |
""" | |
client = MongoClient(mongodb_dsn) | |
# Retrieve and display MongoDB server version | |
server_info = client.server_info() | |
mongo_version = server_info.get('version', 'Unknown') | |
click.secho(f'MongoDB Server Version: {mongo_version}', fg='green', bold=True) | |
click.secho('Reading database information...', fg='cyan') | |
result_dict = {'mongo_version': mongo_version} | |
for db, db_meta in _get_databases(client): | |
click.secho(f'Found database: {db}', fg='yellow') | |
result_dict[db] = db_meta | |
result_dict[db]['collection'] = {} | |
try: | |
for col in _get_collections(client[db]): | |
click.secho(f' Found collection: {col}', fg='blue') | |
collection_obj = client[db][col] | |
indexes = list(_get_indexes(collection_obj)) | |
schema = next(_get_explicit_schema(client[db], col), None) | |
result_dict[db]['collection'][col] = { | |
'indexes': indexes, | |
'schema': schema if schema else "No explicit schema" | |
} | |
if indexes: | |
click.secho(f' Indexes found:', fg='magenta') | |
for index in indexes: | |
click.secho(f' {index}', fg='magenta') | |
if schema: | |
click.secho(f' Explicit Schema detected:', fg='cyan') | |
click.secho(json.dumps(schema, indent=2), fg='cyan') | |
except OperationFailure: | |
click.secho(f' Skipping database: {db} (Access Denied)', fg='red') | |
result_str = json.dumps(result_dict, indent=2, sort_keys=True) | |
click.secho(f'Writing results to {output}', fg='green') | |
with open(output, 'w') as fh: | |
fh.write(result_str) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment