cernceph · February 7, 2017 09:12
diff --git a/mpa.py b/mpa.py
 #!/usr/bin/env python

 import random
 from math import sqrt, log, pow

 DEBUG = True
 NPGS = 100000 # num PGs
 R = 3  # num replicas

 if NPGS > 3:
  DEBUG = False

 print 'Num PGs:', NPGS
 print 'Num Replicas:', R

 # OSDs: (id, weight)
 # TODO: everything below is hard coded for exactly 4 OSDs
 osds = [(0, 3.0), (1, 3.0), (2, 3.0), (3, 1.0)]

 weights = {}  # OSD weights.. we might tweak these round by round
 orig_weights = {}  # OSD weights. Read only.
 total_weight = 0
 for id, w in osds:
  weights[id] = float(w)
  orig_weights[id] = float(w)
  total_weight += float(w)

 print 'OSDs (id: weight):', weights

 # compute the expected # PGs for our 4 OSDs
 expected = {
  0: NPGS * R * weights[0]/total_weight,
  1: NPGS * R * weights[1]/total_weight,
  2: NPGS * R * weights[2]/total_weight,
  3: NPGS * R * weights[3]/total_weight,
 }

 print '\nExpected:  ', expected
 for r in xrange(R):
  print '  Replica %d: %s' % (r, [float(v/R) for k,v in expected.iteritems()])

 # initialize PG and PG-by-replicanum counters for each OSD
 pgs = {
  0: 0,
  1: 0,
  2: 0,
  3: 0,
 }
 pgs_by_r = [dict(pgs) for r in xrange(R)]

 def weighted_choice(choices):
   total = sum(w for c, w in choices)
   r = random.uniform(0, total)
   upto = 0
   for c, w in choices:
      if upto + w >= r:
         return c
      upto += w
   assert False, "Shouldn't get here"

 print "\nSimulating with existing CRUSH\n"

 # simple CRUSH
 # loop over all PGs, select one with weighted choice, remove that option for the next choice.
 for i in xrange(NPGS):
  choices = list(osds)
  for r in xrange(R):
    pick = weighted_choice(choices)
    pgs[pick] += 1
    pgs_by_r[r][pick] += 1
    choices.remove((pick, weights[pick]),)

 print 'Observed:   ', pgs
 for r in xrange(R):
  print '  Replica %d: %s' % (r, pgs_by_r[r])


 print "\nNow trying your new algorithm\n"

 # initialize PG and PG-by-replicanum counters for each OSD
 pgs = {
  0: 0,
  1: 0,
  2: 0,
  3: 0,
 }
 pgs_by_r = [dict(pgs) for r in xrange(R)]

 # modified CRUSH
 # loop over each PG.
 # After each replica selection, adjust the remaining choices' weights with your magic value.
 sum_all_estimates = 0.0
 sum_all_totals = 0.0
 for pg in xrange(NPGS):
  if DEBUG: print "\nPlacing PG", pg
  choices = list(osds)
  prob_of_getting_to_next_round = {
    0: 1.0,
    1: 1.0,
    2: 1.0,
    3: 1.0,
  }
  placement = [] # record where this pg is placed for debugging

  # loop over R replicas, choosing a distinct OSD each time
  for r in xrange(R):
    if DEBUG: print "Placing replica %d.%d" % (pg,r)
    if DEBUG: print '  OSD choices',choices
    total_weight_start_of_round = float(sum(w for c, w in choices)) # keep track of the original sum of all weights
    pick = weighted_choice(choices) # choose a OSD for this round
    pgs[pick] += 1 # inc the chosen OSD's PG counter
    pgs_by_r[r][pick] += 1 # inc the chosen OSD's PG-by-R counter
    placement.append(pick)
    if DEBUG: print '  Total weight =', total_weight_start_of_round
    if DEBUG: print '  Picked OSD =', pick

    new_choices = [i for i in choices if i[0] != pick] # make a list of new choices, dropping the selected one
    choices = new_choices

    total_weight_next_round = float(sum(w for c, w in choices)) # calculate the sum of remaining OSD weights

    # reweight if we have more than one choice left
    if len(choices) > 1:
      if DEBUG: print "Tweaking for replica %d.%d" % (pg, r+1)
      new_choices = []
      if DEBUG: print '  estimated_new_total = %s - (%s / %s)' % (total_weight_start_of_round, total_weight_start_of_round, float(len(choices)+1))
      #estimated_new_total = total_weight_start_of_round - (total_weight_start_of_round/(float(len(choices)+1)))
      estimated_new_total = total_weight_next_round
      if DEBUG: print '  estimated_new_total =', estimated_new_total
      sum_all_estimates += estimated_new_total
      sum_all_totals += total_weight_next_round
      for id, w in choices:
        if DEBUG: print '  tweaking weight of OSD', id, 'with current weight', w
        # put your bright idea for adjusting the OSD weights here:
        #   new_weight = f(w, orig_weights[id], r, R, total_weight, total_weight_start_of_round, total_weight_next_round)
        #
        # thoughts:
        #   
        # it seems clear that the weight of the small OSDs should become relatively smaller than the large OSDs for each round.
        # also f cannot be a constant factor on w -- this doesn't change the distribution at all.
        #
        # ideas:
        #   new_weight = w  # this is the current implementation
        #   new_weight = orig_weights[id]  # this is also the current implementation
        #
        #   new_weight = w * total_weight_start_of_round / total_weight_next_round   # useless because same factor applied to all weights
        #
        #   new_weight = w / (total_weight_start_of_round - w)  # Sage's idea. kinda works for 3,3,3,1 example, not really for 1,3,3,1
        #
        #   new_weight = w*w # this goes in the right direction but is too aggressive
        #   new_weight = w*sqrt(w) # getting closer, but still too agressive
        #   new_weight = pow(w, sqrt(total_weight_start_of_round / total_weight_next_round))   # utter craziness, almost works
        #
        # None of above are continue working if we set OSD weights to 1,3,3,1 -- this is a hard problem.

        # Loop invariant:
        # P_thisround * W_current / T_current = W_original / T_original
        #
        # new_weight = W_current = ( W_original * T_current ) / ( T_original * P_thisround)
        # Round 1:
        #   P_thisround = 1.0
        #   T_cur = total_weight
        #   W_original = w
        #   T_original = total_weight
        #
        #   1.0 * W_current / total_weight = w / total_weight
        #   W_current = w
        # 
        # Round 2:
        #   P_thisround = (1-W_round1/T_round1)
        #   T_current = Unknown. E(T_current) = T_round1 - (T_round1/N_round1)
        #
        # Round 3:
        #   P_thisround = (1-W_round1/T_round1)*(1-W_round2/T_round2)
        #   E(T_current) = T_round2 - (T_round2/N_round2)
        #    
        if DEBUG: print "    prob_of_getting_to_next_round[%s] = %s * (1-%s/%s)" % (id, prob_of_getting_to_next_round[id], w, total_weight_start_of_round)
        prob_of_getting_to_next_round[id] *= (1-w/total_weight_start_of_round)
        if DEBUG: print '    prob_of_getting_to_next_round[%d] = %f' %  (id, prob_of_getting_to_next_round[id])
        new_weight = orig_weights[id] * estimated_new_total / (total_weight * prob_of_getting_to_next_round[id])
        if DEBUG: print '    new_weight = %s * %s / (%s * %s)' % (orig_weights[id], estimated_new_total, total_weight, prob_of_getting_to_next_round[id])
        if DEBUG: print '    new_weight = ', new_weight

        assert new_weight > 0.0, 'Negative new_weight %s (w=%s, total_weight=%s, total_weight_start_of_round=%s, total_weight_next_round=%s,prob_of_getting_to_next_round=%s)' % (new_weight, w, total_weight, total_weight_start_of_round, total_weight_next_round, prob_of_getting_to_next_round)
        new_choices.append((id, new_weight),)
        weights[id] = new_weight

      # renormalize
      actual_new_total = float(sum(w for c, w in new_choices))
      factor = total_weight / actual_new_total
      choices = [(id, w * factor) for id,w in new_choices]

      if DEBUG: print "    Actual new total %s / Estimated new total %s = %s" % (actual_new_total, estimated_new_total, actual_new_total/estimated_new_total)

  if DEBUG: print 'PG %d placed: %s' % (pg, placement)

 #print 'sum of all estimated totals', sum_all_estimates
 #print 'sum of all actual totals', sum_all_totals
 #print sum_all_estimates/sum_all_totals

 print 'Observed:   ', pgs
 for r in xrange(R):
  print '  Replica %d: %s' % (r, pgs_by_r[r])
	#!/usr/bin/env python

	import random
	from math import sqrt, log, pow

	DEBUG = True
	NPGS = 100000 # num PGs
	R = 3 # num replicas

	if NPGS > 3:
	DEBUG = False

	print 'Num PGs:', NPGS
	print 'Num Replicas:', R

	# OSDs: (id, weight)
	# TODO: everything below is hard coded for exactly 4 OSDs
	osds = [(0, 3.0), (1, 3.0), (2, 3.0), (3, 1.0)]

	weights = {} # OSD weights.. we might tweak these round by round
	orig_weights = {} # OSD weights. Read only.
	total_weight = 0
	for id, w in osds:
	weights[id] = float(w)
	orig_weights[id] = float(w)
	total_weight += float(w)

	print 'OSDs (id: weight):', weights

	# compute the expected # PGs for our 4 OSDs
	expected = {
	0: NPGS * R * weights[0]/total_weight,
	1: NPGS * R * weights[1]/total_weight,
	2: NPGS * R * weights[2]/total_weight,
	3: NPGS * R * weights[3]/total_weight,
	}

	print '\nExpected: ', expected
	for r in xrange(R):
	print ' Replica %d: %s' % (r, [float(v/R) for k,v in expected.iteritems()])

	# initialize PG and PG-by-replicanum counters for each OSD
	pgs = {
	0: 0,
	1: 0,
	2: 0,
	3: 0,
	}
	pgs_by_r = [dict(pgs) for r in xrange(R)]

	def weighted_choice(choices):
	total = sum(w for c, w in choices)
	r = random.uniform(0, total)
	upto = 0
	for c, w in choices:
	if upto + w >= r:
	return c
	upto += w
	assert False, "Shouldn't get here"

	print "\nSimulating with existing CRUSH\n"

	# simple CRUSH
	# loop over all PGs, select one with weighted choice, remove that option for the next choice.
	for i in xrange(NPGS):
	choices = list(osds)
	for r in xrange(R):
	pick = weighted_choice(choices)
	pgs[pick] += 1
	pgs_by_r[r][pick] += 1
	choices.remove((pick, weights[pick]),)

	print 'Observed: ', pgs
	for r in xrange(R):
	print ' Replica %d: %s' % (r, pgs_by_r[r])


	print "\nNow trying your new algorithm\n"

	# initialize PG and PG-by-replicanum counters for each OSD
	pgs = {
	0: 0,
	1: 0,
	2: 0,
	3: 0,
	}
	pgs_by_r = [dict(pgs) for r in xrange(R)]

	# modified CRUSH
	# loop over each PG.
	# After each replica selection, adjust the remaining choices' weights with your magic value.
	sum_all_estimates = 0.0
	sum_all_totals = 0.0
	for pg in xrange(NPGS):
	if DEBUG: print "\nPlacing PG", pg
	choices = list(osds)
	prob_of_getting_to_next_round = {
	0: 1.0,
	1: 1.0,
	2: 1.0,
	3: 1.0,
	}
	placement = [] # record where this pg is placed for debugging

	# loop over R replicas, choosing a distinct OSD each time
	for r in xrange(R):
	if DEBUG: print "Placing replica %d.%d" % (pg,r)
	if DEBUG: print ' OSD choices',choices
	total_weight_start_of_round = float(sum(w for c, w in choices)) # keep track of the original sum of all weights
	pick = weighted_choice(choices) # choose a OSD for this round
	pgs[pick] += 1 # inc the chosen OSD's PG counter
	pgs_by_r[r][pick] += 1 # inc the chosen OSD's PG-by-R counter
	placement.append(pick)
	if DEBUG: print ' Total weight =', total_weight_start_of_round
	if DEBUG: print ' Picked OSD =', pick

	new_choices = [i for i in choices if i[0] != pick] # make a list of new choices, dropping the selected one
	choices = new_choices

	total_weight_next_round = float(sum(w for c, w in choices)) # calculate the sum of remaining OSD weights

	# reweight if we have more than one choice left
	if len(choices) > 1:
	if DEBUG: print "Tweaking for replica %d.%d" % (pg, r+1)
	new_choices = []
	if DEBUG: print ' estimated_new_total = %s - (%s / %s)' % (total_weight_start_of_round, total_weight_start_of_round, float(len(choices)+1))
	#estimated_new_total = total_weight_start_of_round - (total_weight_start_of_round/(float(len(choices)+1)))
	estimated_new_total = total_weight_next_round
	if DEBUG: print ' estimated_new_total =', estimated_new_total
	sum_all_estimates += estimated_new_total
	sum_all_totals += total_weight_next_round
	for id, w in choices:
	if DEBUG: print ' tweaking weight of OSD', id, 'with current weight', w
	# put your bright idea for adjusting the OSD weights here:
	# new_weight = f(w, orig_weights[id], r, R, total_weight, total_weight_start_of_round, total_weight_next_round)
	#
	# thoughts:
	#
	# it seems clear that the weight of the small OSDs should become relatively smaller than the large OSDs for each round.
	# also f cannot be a constant factor on w -- this doesn't change the distribution at all.
	#
	# ideas:
	# new_weight = w # this is the current implementation
	# new_weight = orig_weights[id] # this is also the current implementation
	#
	# new_weight = w * total_weight_start_of_round / total_weight_next_round # useless because same factor applied to all weights
	#
	# new_weight = w / (total_weight_start_of_round - w) # Sage's idea. kinda works for 3,3,3,1 example, not really for 1,3,3,1
	#
	# new_weight = w*w # this goes in the right direction but is too aggressive
	# new_weight = w*sqrt(w) # getting closer, but still too agressive
	# new_weight = pow(w, sqrt(total_weight_start_of_round / total_weight_next_round)) # utter craziness, almost works
	#
	# None of above are continue working if we set OSD weights to 1,3,3,1 -- this is a hard problem.

	# Loop invariant:
	# P_thisround * W_current / T_current = W_original / T_original
	#
	# new_weight = W_current = ( W_original * T_current ) / ( T_original * P_thisround)
	# Round 1:
	# P_thisround = 1.0
	# T_cur = total_weight
	# W_original = w
	# T_original = total_weight
	#
	# 1.0 * W_current / total_weight = w / total_weight
	# W_current = w
	#
	# Round 2:
	# P_thisround = (1-W_round1/T_round1)
	# T_current = Unknown. E(T_current) = T_round1 - (T_round1/N_round1)
	#
	# Round 3:
	# P_thisround = (1-W_round1/T_round1)*(1-W_round2/T_round2)
	# E(T_current) = T_round2 - (T_round2/N_round2)
	#
	if DEBUG: print " prob_of_getting_to_next_round[%s] = %s * (1-%s/%s)" % (id, prob_of_getting_to_next_round[id], w, total_weight_start_of_round)
	prob_of_getting_to_next_round[id] *= (1-w/total_weight_start_of_round)
	if DEBUG: print ' prob_of_getting_to_next_round[%d] = %f' % (id, prob_of_getting_to_next_round[id])
	new_weight = orig_weights[id] * estimated_new_total / (total_weight * prob_of_getting_to_next_round[id])
	if DEBUG: print ' new_weight = %s * %s / (%s * %s)' % (orig_weights[id], estimated_new_total, total_weight, prob_of_getting_to_next_round[id])
	if DEBUG: print ' new_weight = ', new_weight

	assert new_weight > 0.0, 'Negative new_weight %s (w=%s, total_weight=%s, total_weight_start_of_round=%s, total_weight_next_round=%s,prob_of_getting_to_next_round=%s)' % (new_weight, w, total_weight, total_weight_start_of_round, total_weight_next_round, prob_of_getting_to_next_round)
	new_choices.append((id, new_weight),)
	weights[id] = new_weight

	# renormalize
	actual_new_total = float(sum(w for c, w in new_choices))
	factor = total_weight / actual_new_total
	choices = [(id, w * factor) for id,w in new_choices]

	if DEBUG: print " Actual new total %s / Estimated new total %s = %s" % (actual_new_total, estimated_new_total, actual_new_total/estimated_new_total)

	if DEBUG: print 'PG %d placed: %s' % (pg, placement)

	#print 'sum of all estimated totals', sum_all_estimates
	#print 'sum of all actual totals', sum_all_totals
	#print sum_all_estimates/sum_all_totals

	print 'Observed: ', pgs
	for r in xrange(R):
	print ' Replica %d: %s' % (r, pgs_by_r[r])