Created
July 5, 2025 21:34
-
-
Save PaulKinlan/a091b7c52f3a7cc43d081cc79f945c63 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const alpha = 1.2; // 1.2 seems to map well to top 100 sites... Maybe it works for rest of web? | |
const size = 360_000_000; // 360 million is the number of sites in the web as of 2023 | |
/* | |
83 billion is the number of navigations for rank 0 (the most popular site) | |
for one month in 2025 (yes, I know I don't have the size of the web | |
for 2025, but this is a good estimate based on current trends). | |
*/ | |
const max = 83_000_000_000; | |
// Zipfian based on a known rank and alpha. | |
const valueAtRank = (valueAt0, rank, alpha) => { | |
return valueAt0 / Math.pow(rank, alpha); | |
}; | |
/* | |
Calculate the total number of navigations following a Zipfian distribution. | |
*/ | |
let sum = 0; | |
for (let i = 0; i < size; i++) { | |
// Value at rank 0 is the max value. | |
// Alpha is the Zipfian constant, 1.2 is a good value looking at SimilarWeb | |
const value = Math.floor(valueAtRank(max, i + 1, alpha)); | |
sum += value; | |
if (sum > Number.MAX_SAFE_INTEGER) { | |
console.warn("Sum exceeded MAX_SAFE_INTEGER, resetting to 0"); | |
throw new Error("Sum exceeded MAX_SAFE_INTEGER"); | |
} | |
} | |
console.log(`Sum of generated values: ${sum}`); | |
/* | |
Calculate the rank at a given percentile. | |
This function finds the rank that corresponds to a given percentile | |
based on the cumulative sum of values generated by the Zipfian distribution. | |
It iterates through ranks, accumulating their values until it reaches the | |
target value for the specified percentile. | |
*/ | |
const rankAtPercentile = (percentile, size, sum) => { | |
const target = sum * (percentile / 100); | |
let cumulativeSum = 0; | |
for (let i = 1; i < size; i++) { | |
cumulativeSum += valueAtRank(max, i, alpha); | |
if (cumulativeSum >= target) { | |
return i; | |
} | |
} | |
return size; // If no rank found, return the size as the last rank | |
}; | |
const percentageAtRank = (rank, sum) => { | |
let cumulativeSum = 0; | |
for (let i = 1; i < rank; i++) { | |
cumulativeSum += valueAtRank(max, i, alpha); | |
} | |
return (cumulativeSum / sum) * 100; | |
}; | |
console.log(`50th percentile ${rankAtPercentile(50, size, sum)}`); | |
console.log(`75th percentile ${rankAtPercentile(75, size, sum)}`); | |
console.log(`90th percentile ${rankAtPercentile(90, size, sum)}`); | |
console.log(`95th percentile ${rankAtPercentile(95, size, sum)}`); | |
console.log(`Rank 1000 is at ${percentageAtRank(1000, sum)}%`); | |
console.log(`Rank 10000 is at ${percentageAtRank(10000, sum)}%`); | |
console.log(`Rank 100000 is at ${percentageAtRank(100000, sum)}%`); | |
console.log(`Rank 1000000 is at ${percentageAtRank(1000000, sum)}%`); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment