Skip to content

Instantly share code, notes, and snippets.

@daien
Created October 8, 2011 16:56

Revisions

  1. daien revised this gist Nov 14, 2011. 1 changed file with 1 addition and 1 deletion.
    2 changes: 1 addition & 1 deletion simplex_projection.py
    Original file line number Diff line number Diff line change
    @@ -56,7 +56,7 @@ def euclidean_proj_simplex(v, s=1):
    # get the number of > 0 components of the optimal solution
    rho = np.nonzero(u * np.arange(1, n+1) > (cssv - s))[0][-1]
    # compute the Lagrange multiplier associated to the simplex constraint
    theta = float(cssv[rho] - s) / rho
    theta = (cssv[rho] - s) / (rho + 1.0)
    # compute the projection by thresholding v using theta
    w = (v - theta).clip(min=0)
    return w
  2. daien revised this gist Nov 13, 2011. 1 changed file with 0 additions and 3 deletions.
    3 changes: 0 additions & 3 deletions simplex_projection.py
    Original file line number Diff line number Diff line change
    @@ -50,16 +50,13 @@ def euclidean_proj_simplex(v, s=1):
    if v.sum() == s and np.alltrue(v >= 0):
    # best projection: itself!
    return v
    # set to zero the negative part
    v = (v > 0) * v
    # get the array of cumulative sums of a sorted (decreasing) copy of v
    u = np.sort(v)[::-1]
    cssv = np.cumsum(u)
    # get the number of > 0 components of the optimal solution
    rho = np.nonzero(u * np.arange(1, n+1) > (cssv - s))[0][-1]
    # compute the Lagrange multiplier associated to the simplex constraint
    theta = float(cssv[rho] - s) / rho
    # theta = max(0, float(cssv[rho] - s) / rho) # used by Duchi: projects in ball
    # compute the projection by thresholding v using theta
    w = (v - theta).clip(min=0)
    return w
  3. daien revised this gist Nov 13, 2011. 1 changed file with 2 additions and 1 deletion.
    3 changes: 2 additions & 1 deletion simplex_projection.py
    Original file line number Diff line number Diff line change
    @@ -58,7 +58,8 @@ def euclidean_proj_simplex(v, s=1):
    # get the number of > 0 components of the optimal solution
    rho = np.nonzero(u * np.arange(1, n+1) > (cssv - s))[0][-1]
    # compute the Lagrange multiplier associated to the simplex constraint
    theta = max(0, float(cssv[rho] - s) / rho)
    theta = float(cssv[rho] - s) / rho
    # theta = max(0, float(cssv[rho] - s) / rho) # used by Duchi: projects in ball
    # compute the projection by thresholding v using theta
    w = (v - theta).clip(min=0)
    return w
  4. daien revised this gist Oct 27, 2011. 1 changed file with 7 additions and 13 deletions.
    20 changes: 7 additions & 13 deletions simplex_projection.py
    Original file line number Diff line number Diff line change
    @@ -50,23 +50,17 @@ def euclidean_proj_simplex(v, s=1):
    if v.sum() == s and np.alltrue(v >= 0):
    # best projection: itself!
    return v
    # set to zero the negative part
    v = (v > 0) * v
    # get the array of cumulative sums of a sorted (decreasing) copy of v
    cssv = np.r_[0, np.cumsum(np.sort(v)[::-1])] # with a sentinel!
    u = np.sort(v)[::-1]
    cssv = np.cumsum(u)
    # get the number of > 0 components of the optimal solution
    rho = -1
    # iterate from 1 to n to find the max index (+ use sentinel)
    for j in xrange(1, n+1):
    if (1 - 1.0 / j) * cssv[j] - cssv[j-1] + 1.0 / j * s <= 0:
    rho = j - 1 # previous index was the largest positive
    break
    # Note: better to iterate from start than from end or vectorizing as the
    # solution is expected to be sparse and therefore have a small rho
    # check we found rho
    assert rho > 0, "Bug: did not find rho (%d)" % rho
    rho = np.nonzero(u * np.arange(1, n+1) > (cssv - s))[0][-1]
    # compute the Lagrange multiplier associated to the simplex constraint
    theta = 1.0 / rho * (cssv[rho] - s)
    theta = max(0, float(cssv[rho] - s) / rho)
    # compute the projection by thresholding v using theta
    w = np.clip(v - theta, 0, np.inf).astype(v.dtype)
    w = (v - theta).clip(min=0)
    return w


  5. daien created this gist Oct 8, 2011.
    114 changes: 114 additions & 0 deletions simplex_projection.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,114 @@
    """ Module to compute projections on the positive simplex or the L1-ball
    A positive simplex is a set X = { \mathbf{x} | \sum_i x_i = s, x_i \geq 0 }
    The (unit) L1-ball is the set X = { \mathbf{x} | || x ||_1 \leq 1 }
    Adrien Gaidon - INRIA - 2011
    """


    import numpy as np


    def euclidean_proj_simplex(v, s=1):
    """ Compute the Euclidean projection on a positive simplex
    Solves the optimisation problem (using the algorithm from [1]):
    min_w 0.5 * || w - v ||_2^2 , s.t. \sum_i w_i = s, w_i >= 0
    Parameters
    ----------
    v: (n,) numpy array,
    n-dimensional vector to project
    s: int, optional, default: 1,
    radius of the simplex
    Returns
    -------
    w: (n,) numpy array,
    Euclidean projection of v on the simplex
    Notes
    -----
    The complexity of this algorithm is in O(n log(n)) as it involves sorting v.
    Better alternatives exist for high-dimensional sparse vectors (cf. [1])
    However, this implementation still easily scales to millions of dimensions.
    References
    ----------
    [1] Efficient Projections onto the .1-Ball for Learning in High Dimensions
    John Duchi, Shai Shalev-Shwartz, Yoram Singer, and Tushar Chandra.
    International Conference on Machine Learning (ICML 2008)
    http://www.cs.berkeley.edu/~jduchi/projects/DuchiSiShCh08.pdf
    """
    assert s > 0, "Radius s must be strictly positive (%d <= 0)" % s
    n, = v.shape # will raise ValueError if v is not 1-D
    # check if we are already on the simplex
    if v.sum() == s and np.alltrue(v >= 0):
    # best projection: itself!
    return v
    # get the array of cumulative sums of a sorted (decreasing) copy of v
    cssv = np.r_[0, np.cumsum(np.sort(v)[::-1])] # with a sentinel!
    # get the number of > 0 components of the optimal solution
    rho = -1
    # iterate from 1 to n to find the max index (+ use sentinel)
    for j in xrange(1, n+1):
    if (1 - 1.0 / j) * cssv[j] - cssv[j-1] + 1.0 / j * s <= 0:
    rho = j - 1 # previous index was the largest positive
    break
    # Note: better to iterate from start than from end or vectorizing as the
    # solution is expected to be sparse and therefore have a small rho
    # check we found rho
    assert rho > 0, "Bug: did not find rho (%d)" % rho
    # compute the Lagrange multiplier associated to the simplex constraint
    theta = 1.0 / rho * (cssv[rho] - s)
    # compute the projection by thresholding v using theta
    w = np.clip(v - theta, 0, np.inf).astype(v.dtype)
    return w


    def euclidean_proj_l1ball(v, s=1):
    """ Compute the Euclidean projection on a L1-ball
    Solves the optimisation problem (using the algorithm from [1]):
    min_w 0.5 * || w - v ||_2^2 , s.t. || w ||_1 <= s
    Parameters
    ----------
    v: (n,) numpy array,
    n-dimensional vector to project
    s: int, optional, default: 1,
    radius of the L1-ball
    Returns
    -------
    w: (n,) numpy array,
    Euclidean projection of v on the L1-ball of radius s
    Notes
    -----
    Solves the problem by a reduction to the positive simplex case
    See also
    --------
    euclidean_proj_simplex
    """
    assert s > 0, "Radius s must be strictly positive (%d <= 0)" % s
    n, = v.shape # will raise ValueError if v is not 1-D
    # compute the vector of absolute values
    u = np.abs(v)
    # check if v is already a solution
    if u.sum() <= s:
    # L1-norm is <= s
    return v
    # v is not already a solution: optimum lies on the boundary (norm == s)
    # project *u* on the simplex
    w = euclidean_proj_simplex(u, s=s)
    # compute the solution to the original problem on v
    w *= np.sign(v)
    return w