-
-
Save cernceph/59158b9c53f925cc778b44585ffc897c to your computer and use it in GitHub Desktop.
Simulation of the CRUSH Multi Pick Anomaly
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import random | |
from math import sqrt, log, pow | |
DEBUG = True | |
NPGS = 100000 # num PGs | |
R = 3 # num replicas | |
if NPGS > 3: | |
DEBUG = False | |
print 'Num PGs:', NPGS | |
print 'Num Replicas:', R | |
# OSDs: (id, weight) | |
# TODO: everything below is hard coded for exactly 4 OSDs | |
osds = [(0, 3.0), (1, 3.0), (2, 3.0), (3, 1.0)] | |
weights = {} # OSD weights.. we might tweak these round by round | |
orig_weights = {} # OSD weights. Read only. | |
total_weight = 0 | |
for id, w in osds: | |
weights[id] = float(w) | |
orig_weights[id] = float(w) | |
total_weight += float(w) | |
print 'OSDs (id: weight):', weights | |
# compute the expected # PGs for our 4 OSDs | |
expected = { | |
0: NPGS * R * weights[0]/total_weight, | |
1: NPGS * R * weights[1]/total_weight, | |
2: NPGS * R * weights[2]/total_weight, | |
3: NPGS * R * weights[3]/total_weight, | |
} | |
print '\nExpected: ', expected | |
for r in xrange(R): | |
print ' Replica %d: %s' % (r, [float(v/R) for k,v in expected.iteritems()]) | |
# initialize PG and PG-by-replicanum counters for each OSD | |
pgs = { | |
0: 0, | |
1: 0, | |
2: 0, | |
3: 0, | |
} | |
pgs_by_r = [dict(pgs) for r in xrange(R)] | |
def weighted_choice(choices): | |
total = sum(w for c, w in choices) | |
r = random.uniform(0, total) | |
upto = 0 | |
for c, w in choices: | |
if upto + w >= r: | |
return c | |
upto += w | |
assert False, "Shouldn't get here" | |
print "\nSimulating with existing CRUSH\n" | |
# simple CRUSH | |
# loop over all PGs, select one with weighted choice, remove that option for the next choice. | |
for i in xrange(NPGS): | |
choices = list(osds) | |
for r in xrange(R): | |
pick = weighted_choice(choices) | |
pgs[pick] += 1 | |
pgs_by_r[r][pick] += 1 | |
choices.remove((pick, weights[pick]),) | |
print 'Observed: ', pgs | |
for r in xrange(R): | |
print ' Replica %d: %s' % (r, pgs_by_r[r]) | |
print "\nNow trying your new algorithm\n" | |
# initialize PG and PG-by-replicanum counters for each OSD | |
pgs = { | |
0: 0, | |
1: 0, | |
2: 0, | |
3: 0, | |
} | |
pgs_by_r = [dict(pgs) for r in xrange(R)] | |
# modified CRUSH | |
# loop over each PG. | |
# After each replica selection, adjust the remaining choices' weights with your magic value. | |
sum_all_estimates = 0.0 | |
sum_all_totals = 0.0 | |
for pg in xrange(NPGS): | |
if DEBUG: print "\nPlacing PG", pg | |
choices = list(osds) | |
prob_of_getting_to_next_round = { | |
0: 1.0, | |
1: 1.0, | |
2: 1.0, | |
3: 1.0, | |
} | |
placement = [] # record where this pg is placed for debugging | |
# loop over R replicas, choosing a distinct OSD each time | |
for r in xrange(R): | |
if DEBUG: print "Placing replica %d.%d" % (pg,r) | |
if DEBUG: print ' OSD choices',choices | |
total_weight_start_of_round = float(sum(w for c, w in choices)) # keep track of the original sum of all weights | |
pick = weighted_choice(choices) # choose a OSD for this round | |
pgs[pick] += 1 # inc the chosen OSD's PG counter | |
pgs_by_r[r][pick] += 1 # inc the chosen OSD's PG-by-R counter | |
placement.append(pick) | |
if DEBUG: print ' Total weight =', total_weight_start_of_round | |
if DEBUG: print ' Picked OSD =', pick | |
new_choices = [i for i in choices if i[0] != pick] # make a list of new choices, dropping the selected one | |
choices = new_choices | |
total_weight_next_round = float(sum(w for c, w in choices)) # calculate the sum of remaining OSD weights | |
# reweight if we have more than one choice left | |
if len(choices) > 1: | |
if DEBUG: print "Tweaking for replica %d.%d" % (pg, r+1) | |
new_choices = [] | |
if DEBUG: print ' estimated_new_total = %s - (%s / %s)' % (total_weight_start_of_round, total_weight_start_of_round, float(len(choices)+1)) | |
#estimated_new_total = total_weight_start_of_round - (total_weight_start_of_round/(float(len(choices)+1))) | |
estimated_new_total = total_weight_next_round | |
if DEBUG: print ' estimated_new_total =', estimated_new_total | |
sum_all_estimates += estimated_new_total | |
sum_all_totals += total_weight_next_round | |
for id, w in choices: | |
if DEBUG: print ' tweaking weight of OSD', id, 'with current weight', w | |
# put your bright idea for adjusting the OSD weights here: | |
# new_weight = f(w, orig_weights[id], r, R, total_weight, total_weight_start_of_round, total_weight_next_round) | |
# | |
# thoughts: | |
# | |
# it seems clear that the weight of the small OSDs should become relatively smaller than the large OSDs for each round. | |
# also f cannot be a constant factor on w -- this doesn't change the distribution at all. | |
# | |
# ideas: | |
# new_weight = w # this is the current implementation | |
# new_weight = orig_weights[id] # this is also the current implementation | |
# | |
# new_weight = w * total_weight_start_of_round / total_weight_next_round # useless because same factor applied to all weights | |
# | |
# new_weight = w / (total_weight_start_of_round - w) # Sage's idea. kinda works for 3,3,3,1 example, not really for 1,3,3,1 | |
# | |
# new_weight = w*w # this goes in the right direction but is too aggressive | |
# new_weight = w*sqrt(w) # getting closer, but still too agressive | |
# new_weight = pow(w, sqrt(total_weight_start_of_round / total_weight_next_round)) # utter craziness, almost works | |
# | |
# None of above are continue working if we set OSD weights to 1,3,3,1 -- this is a hard problem. | |
# Loop invariant: | |
# P_thisround * W_current / T_current = W_original / T_original | |
# | |
# new_weight = W_current = ( W_original * T_current ) / ( T_original * P_thisround) | |
# Round 1: | |
# P_thisround = 1.0 | |
# T_cur = total_weight | |
# W_original = w | |
# T_original = total_weight | |
# | |
# 1.0 * W_current / total_weight = w / total_weight | |
# W_current = w | |
# | |
# Round 2: | |
# P_thisround = (1-W_round1/T_round1) | |
# T_current = Unknown. E(T_current) = T_round1 - (T_round1/N_round1) | |
# | |
# Round 3: | |
# P_thisround = (1-W_round1/T_round1)*(1-W_round2/T_round2) | |
# E(T_current) = T_round2 - (T_round2/N_round2) | |
# | |
if DEBUG: print " prob_of_getting_to_next_round[%s] = %s * (1-%s/%s)" % (id, prob_of_getting_to_next_round[id], w, total_weight_start_of_round) | |
prob_of_getting_to_next_round[id] *= (1-w/total_weight_start_of_round) | |
if DEBUG: print ' prob_of_getting_to_next_round[%d] = %f' % (id, prob_of_getting_to_next_round[id]) | |
new_weight = orig_weights[id] * estimated_new_total / (total_weight * prob_of_getting_to_next_round[id]) | |
if DEBUG: print ' new_weight = %s * %s / (%s * %s)' % (orig_weights[id], estimated_new_total, total_weight, prob_of_getting_to_next_round[id]) | |
if DEBUG: print ' new_weight = ', new_weight | |
assert new_weight > 0.0, 'Negative new_weight %s (w=%s, total_weight=%s, total_weight_start_of_round=%s, total_weight_next_round=%s,prob_of_getting_to_next_round=%s)' % (new_weight, w, total_weight, total_weight_start_of_round, total_weight_next_round, prob_of_getting_to_next_round) | |
new_choices.append((id, new_weight),) | |
weights[id] = new_weight | |
# renormalize | |
actual_new_total = float(sum(w for c, w in new_choices)) | |
factor = total_weight / actual_new_total | |
choices = [(id, w * factor) for id,w in new_choices] | |
if DEBUG: print " Actual new total %s / Estimated new total %s = %s" % (actual_new_total, estimated_new_total, actual_new_total/estimated_new_total) | |
if DEBUG: print 'PG %d placed: %s' % (pg, placement) | |
#print 'sum of all estimated totals', sum_all_estimates | |
#print 'sum of all actual totals', sum_all_totals | |
#print sum_all_estimates/sum_all_totals | |
print 'Observed: ', pgs | |
for r in xrange(R): | |
print ' Replica %d: %s' % (r, pgs_by_r[r]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment