Skip to content

Instantly share code, notes, and snippets.

@cernceph
Forked from anonymous/mpa.py
Last active February 7, 2017 09:12
Show Gist options
  • Save cernceph/59158b9c53f925cc778b44585ffc897c to your computer and use it in GitHub Desktop.
Save cernceph/59158b9c53f925cc778b44585ffc897c to your computer and use it in GitHub Desktop.
Simulation of the CRUSH Multi Pick Anomaly
#!/usr/bin/env python
import random
from math import sqrt, log, pow
DEBUG = True
NPGS = 100000 # num PGs
R = 3 # num replicas
if NPGS > 3:
DEBUG = False
print 'Num PGs:', NPGS
print 'Num Replicas:', R
# OSDs: (id, weight)
# TODO: everything below is hard coded for exactly 4 OSDs
osds = [(0, 3.0), (1, 3.0), (2, 3.0), (3, 1.0)]
weights = {} # OSD weights.. we might tweak these round by round
orig_weights = {} # OSD weights. Read only.
total_weight = 0
for id, w in osds:
weights[id] = float(w)
orig_weights[id] = float(w)
total_weight += float(w)
print 'OSDs (id: weight):', weights
# compute the expected # PGs for our 4 OSDs
expected = {
0: NPGS * R * weights[0]/total_weight,
1: NPGS * R * weights[1]/total_weight,
2: NPGS * R * weights[2]/total_weight,
3: NPGS * R * weights[3]/total_weight,
}
print '\nExpected: ', expected
for r in xrange(R):
print ' Replica %d: %s' % (r, [float(v/R) for k,v in expected.iteritems()])
# initialize PG and PG-by-replicanum counters for each OSD
pgs = {
0: 0,
1: 0,
2: 0,
3: 0,
}
pgs_by_r = [dict(pgs) for r in xrange(R)]
def weighted_choice(choices):
total = sum(w for c, w in choices)
r = random.uniform(0, total)
upto = 0
for c, w in choices:
if upto + w >= r:
return c
upto += w
assert False, "Shouldn't get here"
print "\nSimulating with existing CRUSH\n"
# simple CRUSH
# loop over all PGs, select one with weighted choice, remove that option for the next choice.
for i in xrange(NPGS):
choices = list(osds)
for r in xrange(R):
pick = weighted_choice(choices)
pgs[pick] += 1
pgs_by_r[r][pick] += 1
choices.remove((pick, weights[pick]),)
print 'Observed: ', pgs
for r in xrange(R):
print ' Replica %d: %s' % (r, pgs_by_r[r])
print "\nNow trying your new algorithm\n"
# initialize PG and PG-by-replicanum counters for each OSD
pgs = {
0: 0,
1: 0,
2: 0,
3: 0,
}
pgs_by_r = [dict(pgs) for r in xrange(R)]
# modified CRUSH
# loop over each PG.
# After each replica selection, adjust the remaining choices' weights with your magic value.
sum_all_estimates = 0.0
sum_all_totals = 0.0
for pg in xrange(NPGS):
if DEBUG: print "\nPlacing PG", pg
choices = list(osds)
prob_of_getting_to_next_round = {
0: 1.0,
1: 1.0,
2: 1.0,
3: 1.0,
}
placement = [] # record where this pg is placed for debugging
# loop over R replicas, choosing a distinct OSD each time
for r in xrange(R):
if DEBUG: print "Placing replica %d.%d" % (pg,r)
if DEBUG: print ' OSD choices',choices
total_weight_start_of_round = float(sum(w for c, w in choices)) # keep track of the original sum of all weights
pick = weighted_choice(choices) # choose a OSD for this round
pgs[pick] += 1 # inc the chosen OSD's PG counter
pgs_by_r[r][pick] += 1 # inc the chosen OSD's PG-by-R counter
placement.append(pick)
if DEBUG: print ' Total weight =', total_weight_start_of_round
if DEBUG: print ' Picked OSD =', pick
new_choices = [i for i in choices if i[0] != pick] # make a list of new choices, dropping the selected one
choices = new_choices
total_weight_next_round = float(sum(w for c, w in choices)) # calculate the sum of remaining OSD weights
# reweight if we have more than one choice left
if len(choices) > 1:
if DEBUG: print "Tweaking for replica %d.%d" % (pg, r+1)
new_choices = []
if DEBUG: print ' estimated_new_total = %s - (%s / %s)' % (total_weight_start_of_round, total_weight_start_of_round, float(len(choices)+1))
#estimated_new_total = total_weight_start_of_round - (total_weight_start_of_round/(float(len(choices)+1)))
estimated_new_total = total_weight_next_round
if DEBUG: print ' estimated_new_total =', estimated_new_total
sum_all_estimates += estimated_new_total
sum_all_totals += total_weight_next_round
for id, w in choices:
if DEBUG: print ' tweaking weight of OSD', id, 'with current weight', w
# put your bright idea for adjusting the OSD weights here:
# new_weight = f(w, orig_weights[id], r, R, total_weight, total_weight_start_of_round, total_weight_next_round)
#
# thoughts:
#
# it seems clear that the weight of the small OSDs should become relatively smaller than the large OSDs for each round.
# also f cannot be a constant factor on w -- this doesn't change the distribution at all.
#
# ideas:
# new_weight = w # this is the current implementation
# new_weight = orig_weights[id] # this is also the current implementation
#
# new_weight = w * total_weight_start_of_round / total_weight_next_round # useless because same factor applied to all weights
#
# new_weight = w / (total_weight_start_of_round - w) # Sage's idea. kinda works for 3,3,3,1 example, not really for 1,3,3,1
#
# new_weight = w*w # this goes in the right direction but is too aggressive
# new_weight = w*sqrt(w) # getting closer, but still too agressive
# new_weight = pow(w, sqrt(total_weight_start_of_round / total_weight_next_round)) # utter craziness, almost works
#
# None of above are continue working if we set OSD weights to 1,3,3,1 -- this is a hard problem.
# Loop invariant:
# P_thisround * W_current / T_current = W_original / T_original
#
# new_weight = W_current = ( W_original * T_current ) / ( T_original * P_thisround)
# Round 1:
# P_thisround = 1.0
# T_cur = total_weight
# W_original = w
# T_original = total_weight
#
# 1.0 * W_current / total_weight = w / total_weight
# W_current = w
#
# Round 2:
# P_thisround = (1-W_round1/T_round1)
# T_current = Unknown. E(T_current) = T_round1 - (T_round1/N_round1)
#
# Round 3:
# P_thisround = (1-W_round1/T_round1)*(1-W_round2/T_round2)
# E(T_current) = T_round2 - (T_round2/N_round2)
#
if DEBUG: print " prob_of_getting_to_next_round[%s] = %s * (1-%s/%s)" % (id, prob_of_getting_to_next_round[id], w, total_weight_start_of_round)
prob_of_getting_to_next_round[id] *= (1-w/total_weight_start_of_round)
if DEBUG: print ' prob_of_getting_to_next_round[%d] = %f' % (id, prob_of_getting_to_next_round[id])
new_weight = orig_weights[id] * estimated_new_total / (total_weight * prob_of_getting_to_next_round[id])
if DEBUG: print ' new_weight = %s * %s / (%s * %s)' % (orig_weights[id], estimated_new_total, total_weight, prob_of_getting_to_next_round[id])
if DEBUG: print ' new_weight = ', new_weight
assert new_weight > 0.0, 'Negative new_weight %s (w=%s, total_weight=%s, total_weight_start_of_round=%s, total_weight_next_round=%s,prob_of_getting_to_next_round=%s)' % (new_weight, w, total_weight, total_weight_start_of_round, total_weight_next_round, prob_of_getting_to_next_round)
new_choices.append((id, new_weight),)
weights[id] = new_weight
# renormalize
actual_new_total = float(sum(w for c, w in new_choices))
factor = total_weight / actual_new_total
choices = [(id, w * factor) for id,w in new_choices]
if DEBUG: print " Actual new total %s / Estimated new total %s = %s" % (actual_new_total, estimated_new_total, actual_new_total/estimated_new_total)
if DEBUG: print 'PG %d placed: %s' % (pg, placement)
#print 'sum of all estimated totals', sum_all_estimates
#print 'sum of all actual totals', sum_all_totals
#print sum_all_estimates/sum_all_totals
print 'Observed: ', pgs
for r in xrange(R):
print ' Replica %d: %s' % (r, pgs_by_r[r])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment