Created
May 24, 2024 07:24
-
-
Save djinn/d05a88c4d6008cdb9c5291c53a2beeb3 to your computer and use it in GitHub Desktop.
Mongo Vs Couchbase
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import time | |
import random | |
from pymongo import MongoClient | |
from couchbase.cluster import Cluster | |
from couchbase.management.buckets import BucketManager | |
from couchbase.options import ClusterOptions | |
from couchbase.auth import PasswordAuthenticator | |
import os | |
import string | |
from tqdm import tqdm | |
import statistics | |
# Utility function for generating random strings | |
def generate_random_string(length): | |
letters = string.ascii_letters + string.digits | |
return ''.join(random.choice(letters) for i in range(length)) | |
username = os.environ['CB_USER'] | |
password = os.environ['CB_PASSWORD'] | |
mongo_uri = os.environ['MDB_URI'] | |
couchbase_uri = os.environ['CB_URI'] | |
# Connection Parameters | |
# Initialize MongoDB | |
mongo_client = MongoClient(mongo_uri) | |
mongo_db = mongo_client['benchmark'] | |
mongo_parents = mongo_db['parents'] | |
mongo_children = mongo_db['children'] | |
# Drop and recreate MongoDB collections | |
mongo_db.drop_collection('parents') | |
mongo_db.drop_collection('children') | |
mongo_parents = mongo_db['parents'] | |
mongo_children = mongo_db['children'] | |
# Initialize Couchbase and manage buckets | |
cluster = Cluster(couchbase_uri, ClusterOptions(PasswordAuthenticator(username, password))) | |
bucket_manager = cluster.buckets() | |
# Drop and recreate Couchbase bucket | |
bucket_name = 'benchmark' | |
try: | |
cluster.query("DELETE FROM `benchmark`") | |
print("Successfully deleted all documents in Couchbase bucket") | |
except Exception as e: | |
print(f"Failed to drop Couchbase bucket: {e}") | |
#try: | |
# bucket_manager.create_bucket(bucket_name, flush_enabled=True, ram_quota_mb=512) | |
#except Exception as e: | |
# print(f"Failed to create Couchbase bucket: {e}") | |
cb_bucket = cluster.bucket(bucket_name) | |
cb_collection = cb_bucket.default_collection() | |
# Parameters for data generation | |
num_parents = 40 # Total parents | |
children_per_parent = 1000 # Children per parent | |
random_string_length = 9900 # Adjust this to make document ~10KB | |
from tqdm import tqdm | |
def generate_data(): | |
# Total number of operations (each parent and each child operation) | |
total_operations = num_parents + (num_parents * children_per_parent) | |
with tqdm(total=total_operations, desc="Generating data") as pbar: | |
for parent_id in range(num_parents): | |
large_random_string = generate_random_string(random_string_length) | |
parent_doc = {'_id': f'parent_{parent_id}', 'data': 'Some parent data', 'random_data': large_random_string} | |
mongo_parents.insert_one(parent_doc) | |
cb_collection.upsert(f'parent_{parent_id}', parent_doc) | |
pbar.update(1) # Update progress after each parent is processed | |
for child_id in range(children_per_parent): | |
large_random_string = generate_random_string(random_string_length) | |
child_doc = {'_id': f'child_{parent_id * children_per_parent + child_id}', | |
'parent_id': f'parent_{parent_id}', 'random_data': large_random_string} | |
mongo_children.insert_one(child_doc) | |
cb_collection.upsert(f'child_{parent_id * children_per_parent + child_id}', child_doc) | |
pbar.update(1) # Update progress after each child is processed | |
def benchmark_read_speed(db_type, num_samples=1000000): | |
read_times = [] # List to store the time taken for each read | |
start_time = time.time() | |
child_indices = [random.randint(0, num_parents * children_per_parent - 1) for _ in range(num_samples)] | |
if db_type == 'mongo': | |
for child_index in tqdm(child_indices, desc=f"Benchmarking MongoDB reads"): | |
start_op_time = time.time() # Start time of the operation | |
child_doc = mongo_children.find_one({'_id': f'child_{child_index}'}) | |
parent_doc = mongo_parents.find_one({'_id': child_doc['parent_id']}) | |
end_op_time = time.time() # End time of the operation | |
read_times.append(end_op_time - start_op_time) # Calculate and store the read time | |
elif db_type == 'couchbase': | |
for child_index in tqdm(child_indices, desc=f"Benchmarking Couchbase reads"): | |
start_op_time = time.time() # Start time of the operation | |
child_doc = cb_collection.get(f'child_{child_index}').content_as[dict] | |
parent_doc = cb_collection.get(child_doc['parent_id']).content_as[dict] | |
end_op_time = time.time() # End time of the operation | |
read_times.append(end_op_time - start_op_time) # Calculate and store the read time | |
end_time = time.time() | |
total_time = end_time - start_time | |
# Calculating the median and standard deviation of read times | |
median_read_time = statistics.median(read_times) | |
try: | |
stdev_read_time = statistics.stdev(read_times) | |
except statistics.StatisticsError: | |
stdev_read_time = 0 # If there is only one sample or all samples are identical | |
print(f"Total time for {num_samples} reads in {db_type}: {total_time} seconds") | |
print(f"Median read time: {median_read_time:.6f} seconds") | |
print(f"Standard deviation of read times: {stdev_read_time:.6f} seconds") | |
# Create indexes for MongoDB | |
mongo_children.create_index("parent_id") | |
mongo_parents.create_index("_id") | |
# Generate data | |
generate_data() | |
# Benchmark both databases | |
benchmark_read_speed('mongo') | |
benchmark_read_speed('couchbase') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment