cobanov · January 28, 2025 18:50
diff --git a/hw_comparison.py b/hw_comparison.py
 # Import necessary libraries
 from sklearn.datasets import fetch_openml
 from sklearn.model_selection import train_test_split
 from sklearn.ensemble import RandomForestClassifier
 from sklearn.metrics import accuracy_score
 import time

 # Load a larger dataset (MNIST dataset with 70,000 samples)
 mnist = fetch_openml("mnist_784", version=1)
 X, y = mnist.data, mnist.target  # Features and labels

 # Split the dataset into training and testing sets (80% train, 20% test)
 X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
 )

 # Initialize a Random Forest Classifier with more estimators for longer training time
 clf = RandomForestClassifier(n_estimators=100, random_state=42)

 # Measure the start time
 start_time = time.time()

 # Train the model on the training data
 clf.fit(X_train, y_train)

 # Measure the end time and calculate training duration
 end_time = time.time()
 training_time = end_time - start_time
 print(f"Training time: {training_time:.2f} seconds")

 # Make predictions on the test data
 y_pred = clf.predict(X_test)

 # Evaluate the model's accuracy
 accuracy = accuracy_score(y_test, y_pred)
 print(f"Accuracy of the Random Forest Classifier: {accuracy * 100:.2f}%")
	# Import necessary libraries
	from sklearn.datasets import fetch_openml
	from sklearn.model_selection import train_test_split
	from sklearn.ensemble import RandomForestClassifier
	from sklearn.metrics import accuracy_score
	import time

	# Load a larger dataset (MNIST dataset with 70,000 samples)
	mnist = fetch_openml("mnist_784", version=1)
	X, y = mnist.data, mnist.target # Features and labels

	# Split the dataset into training and testing sets (80% train, 20% test)
	X_train, X_test, y_train, y_test = train_test_split(
	X, y, test_size=0.2, random_state=42
	)

	# Initialize a Random Forest Classifier with more estimators for longer training time
	clf = RandomForestClassifier(n_estimators=100, random_state=42)

	# Measure the start time
	start_time = time.time()

	# Train the model on the training data
	clf.fit(X_train, y_train)

	# Measure the end time and calculate training duration
	end_time = time.time()
	training_time = end_time - start_time
	print(f"Training time: {training_time:.2f} seconds")

	# Make predictions on the test data
	y_pred = clf.predict(X_test)

	# Evaluate the model's accuracy
	accuracy = accuracy_score(y_test, y_pred)
	print(f"Accuracy of the Random Forest Classifier: {accuracy * 100:.2f}%")