Created
November 15, 2024 05:37
-
-
Save jamesy0ung/011a6691710edb47a821cb659ff51f7a to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <string> | |
#include <iostream> | |
#include <thread> | |
#include <random> | |
#include <atomic> | |
#include <vector> | |
#include <iomanip> | |
#include <chrono> | |
#include <algorithm> | |
#include <array> | |
class PiCalculator { | |
private: | |
const long long iterations; | |
const unsigned int nthreads; | |
std::atomic<long long> successCount{ 0 }; | |
std::atomic<long long> totalCount{ 0 }; | |
std::atomic<bool> shouldStop{ false }; | |
std::chrono::steady_clock::time_point startTime; | |
// Cache line padding to prevent false sharing | |
struct alignas(64) PaddedCount { | |
long long count; | |
char padding[64 - sizeof(long long)]; | |
}; | |
std::vector<PaddedCount> threadLocalSuccess; | |
std::vector<PaddedCount> threadLocalTotal; | |
// Process points in batches for better efficiency | |
int processBatch(std::mt19937& gen, std::uniform_real_distribution<>& dis, int batchSize) { | |
int successCount = 0; | |
for (int i = 0; i < batchSize; ++i) { | |
double x = dis(gen); | |
double y = dis(gen); | |
if (x * x + y * y <= 1.0) { | |
successCount++; | |
} | |
} | |
return successCount; | |
} | |
void task(int threadId, long long threadIterations) { | |
// Thread-local RNG for better performance | |
std::random_device rd; | |
std::mt19937 gen(rd() ^ static_cast<unsigned int>(threadId)); // XOR with threadId for better distribution | |
std::uniform_real_distribution<> dis(0.0, 1.0); | |
const int BATCH_SIZE = 1024; // Process points in batches | |
const long long ATOMIC_BATCH = 1000000LL; // Reduce atomic operations | |
long long localSuccess = 0; | |
long long localTotal = 0; | |
long long processedIterations = 0; | |
while (processedIterations < threadIterations && !shouldStop) { | |
// Calculate how many iterations to process in this round | |
long long remainingIterations = threadIterations - processedIterations; | |
long long iterationsThisRound = (remainingIterations < ATOMIC_BATCH) ? | |
remainingIterations : ATOMIC_BATCH; | |
// Process batches | |
long long batchesInThisRound = (iterationsThisRound + BATCH_SIZE - 1) / BATCH_SIZE; | |
for (long long b = 0; b < batchesInThisRound; ++b) { | |
// Calculate the size of this batch | |
long long remainingInRound = iterationsThisRound - (b * BATCH_SIZE); | |
int currentBatchSize = static_cast<int>( | |
(remainingInRound < BATCH_SIZE) ? remainingInRound : BATCH_SIZE | |
); | |
if (currentBatchSize <= 0) break; | |
int successInBatch = processBatch(gen, dis, currentBatchSize); | |
localSuccess += successInBatch; | |
localTotal += currentBatchSize; | |
} | |
// Update thread-local counters | |
threadLocalSuccess[threadId].count += localSuccess; | |
threadLocalTotal[threadId].count += localTotal; | |
// Update global counters | |
successCount.fetch_add(localSuccess, std::memory_order_relaxed); | |
totalCount.fetch_add(localTotal, std::memory_order_relaxed); | |
processedIterations += localTotal; | |
localSuccess = 0; | |
localTotal = 0; | |
} | |
} | |
public: | |
PiCalculator(long long n) : | |
iterations(n), | |
nthreads(std::thread::hardware_concurrency()), | |
threadLocalSuccess(nthreads), | |
threadLocalTotal(nthreads) { | |
} | |
double calculate() { | |
startTime = std::chrono::steady_clock::now(); | |
std::vector<std::thread> threads; | |
threads.reserve(nthreads); | |
// Calculate iterations per thread | |
long long iterationsPerThread = iterations / nthreads; | |
long long remainder = iterations % nthreads; | |
// Launch threads | |
for (unsigned int i = 0; i < nthreads; i++) { | |
long long threadIterations = iterationsPerThread + (i < remainder ? 1 : 0); | |
threads.emplace_back(&PiCalculator::task, this, i, threadIterations); | |
} | |
// Progress monitoring thread | |
std::thread progressThread([this]() { | |
while (totalCount < iterations && !shouldStop) { | |
auto now = std::chrono::steady_clock::now(); | |
auto duration = std::chrono::duration_cast<std::chrono::seconds>(now - startTime).count(); | |
if (duration > 0) { | |
double progress = static_cast<double>(totalCount) / iterations * 100.0; | |
double pointsPerSecond = static_cast<double>(totalCount) / duration; | |
std::cout << "\rProgress: " << std::fixed << std::setprecision(2) | |
<< progress << "% (" | |
<< std::setprecision(2) << pointsPerSecond / 1000000.0 << "M points/sec)" | |
<< std::flush; | |
} | |
std::this_thread::sleep_for(std::chrono::milliseconds(100)); | |
} | |
}); | |
// Join threads | |
for (auto& thread : threads) { | |
thread.join(); | |
} | |
shouldStop = true; | |
progressThread.join(); | |
// Calculate final result | |
auto endTime = std::chrono::steady_clock::now(); | |
auto duration = std::chrono::duration_cast<std::chrono::seconds>(endTime - startTime).count(); | |
double pi = 4.0 * static_cast<double>(successCount) / static_cast<double>(totalCount); | |
// Print results | |
std::cout << "\nCalculation completed in " << duration << " seconds" << std::endl; | |
std::cout << "Points processed: " << totalCount << std::endl; | |
std::cout << "Points inside circle: " << successCount << std::endl; | |
std::cout << "Approximated value of Pi: " << std::setprecision(10) << pi << std::endl; | |
std::cout << "Points per second: " << std::fixed << std::setprecision(2) | |
<< static_cast<double>(totalCount) / duration / 1000000.0 << "M" << std::endl; | |
return pi; | |
} | |
}; | |
int main() { | |
const long long iterations = 100000000000LL; | |
try { | |
PiCalculator calculator(iterations); | |
calculator.calculate(); | |
} | |
catch (const std::exception& e) { | |
std::cerr << "Error: " << e.what() << std::endl; | |
return 1; | |
} | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment