daien · October 8, 2011 16:56 · Nov 14, 2011 · Nov 13, 2011 · Nov 13, 2011 · Oct 27, 2011
diff --git a/simplex_projection.py b/simplex_projection.py
@@ -56,7 +56,7 @@ def euclidean_proj_simplex(v, s=1):
     # get the number of > 0 components of the optimal solution
     rho = np.nonzero(u * np.arange(1, n+1) > (cssv - s))[0][-1]
     # compute the Lagrange multiplier associated to the simplex constraint
-    theta = float(cssv[rho] - s) / rho
+    theta = (cssv[rho] - s) / (rho + 1.0)
     # compute the projection by thresholding v using theta
     w = (v - theta).clip(min=0)
     return w

diff --git a/simplex_projection.py b/simplex_projection.py
@@ -50,16 +50,13 @@ def euclidean_proj_simplex(v, s=1):
     if v.sum() == s and np.alltrue(v >= 0):
         # best projection: itself!
         return v
-    # set to zero the negative part
-    v = (v > 0) * v
     # get the array of cumulative sums of a sorted (decreasing) copy of v
     u = np.sort(v)[::-1]
     cssv = np.cumsum(u)
     # get the number of > 0 components of the optimal solution
     rho = np.nonzero(u * np.arange(1, n+1) > (cssv - s))[0][-1]
     # compute the Lagrange multiplier associated to the simplex constraint
     theta = float(cssv[rho] - s) / rho
-    # theta = max(0, float(cssv[rho] - s) / rho)  # used by Duchi: projects in ball
     # compute the projection by thresholding v using theta
     w = (v - theta).clip(min=0)
     return w

diff --git a/simplex_projection.py b/simplex_projection.py
@@ -58,7 +58,8 @@ def euclidean_proj_simplex(v, s=1):
     # get the number of > 0 components of the optimal solution
     rho = np.nonzero(u * np.arange(1, n+1) > (cssv - s))[0][-1]
     # compute the Lagrange multiplier associated to the simplex constraint
-    theta = max(0, float(cssv[rho] - s) / rho)
+    theta = float(cssv[rho] - s) / rho
+    # theta = max(0, float(cssv[rho] - s) / rho)  # used by Duchi: projects in ball
     # compute the projection by thresholding v using theta
     w = (v - theta).clip(min=0)
     return w

diff --git a/simplex_projection.py b/simplex_projection.py
@@ -50,23 +50,17 @@ def euclidean_proj_simplex(v, s=1):
     if v.sum() == s and np.alltrue(v >= 0):
         # best projection: itself!
         return v
+    # set to zero the negative part
+    v = (v > 0) * v
     # get the array of cumulative sums of a sorted (decreasing) copy of v
-    cssv = np.r_[0, np.cumsum(np.sort(v)[::-1])]  # with a sentinel!
+    u = np.sort(v)[::-1]
+    cssv = np.cumsum(u)
     # get the number of > 0 components of the optimal solution
-    rho = -1
-    # iterate from 1 to n to find the max index (+ use sentinel)
-    for j in xrange(1, n+1):
-        if (1 - 1.0 / j) * cssv[j] - cssv[j-1] + 1.0 / j * s <= 0:
-            rho = j - 1  # previous index was the largest positive
-            break
-    # Note: better to iterate from start than from end or vectorizing as the
-    #       solution is expected to be sparse and therefore have a small rho
-    # check we found rho
-    assert rho > 0, "Bug: did not find rho (%d)" % rho
+    rho = np.nonzero(u * np.arange(1, n+1) > (cssv - s))[0][-1]
     # compute the Lagrange multiplier associated to the simplex constraint
-    theta = 1.0 / rho * (cssv[rho] - s)
+    theta = max(0, float(cssv[rho] - s) / rho)
     # compute the projection by thresholding v using theta
-    w = np.clip(v - theta, 0, np.inf).astype(v.dtype)
+    w = (v - theta).clip(min=0)
     return w
 
 

diff --git a/simplex_projection.py b/simplex_projection.py
@@ -0,0 +1,114 @@
+""" Module to compute projections on the positive simplex or the L1-ball
+
+A positive simplex is a set X = { \mathbf{x} | \sum_i x_i = s, x_i \geq 0 }
+
+The (unit) L1-ball is the set X = { \mathbf{x} | || x ||_1 \leq 1 }
+
+Adrien Gaidon - INRIA - 2011
+"""
+
+
+import numpy as np
+
+
+def euclidean_proj_simplex(v, s=1):
+    """ Compute the Euclidean projection on a positive simplex
+
+    Solves the optimisation problem (using the algorithm from [1]):
+
+        min_w 0.5 * || w - v ||_2^2 , s.t. \sum_i w_i = s, w_i >= 0 
+
+    Parameters
+    ----------
+    v: (n,) numpy array,
+       n-dimensional vector to project
+
+    s: int, optional, default: 1,
+       radius of the simplex
+
+    Returns
+    -------
+    w: (n,) numpy array,
+       Euclidean projection of v on the simplex
+
+    Notes
+    -----
+    The complexity of this algorithm is in O(n log(n)) as it involves sorting v.
+    Better alternatives exist for high-dimensional sparse vectors (cf. [1])
+    However, this implementation still easily scales to millions of dimensions.
+
+    References
+    ----------
+    [1] Efficient Projections onto the .1-Ball for Learning in High Dimensions
+        John Duchi, Shai Shalev-Shwartz, Yoram Singer, and Tushar Chandra.
+        International Conference on Machine Learning (ICML 2008)
+        http://www.cs.berkeley.edu/~jduchi/projects/DuchiSiShCh08.pdf
+    """
+    assert s > 0, "Radius s must be strictly positive (%d <= 0)" % s
+    n, = v.shape  # will raise ValueError if v is not 1-D
+    # check if we are already on the simplex
+    if v.sum() == s and np.alltrue(v >= 0):
+        # best projection: itself!
+        return v
+    # get the array of cumulative sums of a sorted (decreasing) copy of v
+    cssv = np.r_[0, np.cumsum(np.sort(v)[::-1])]  # with a sentinel!
+    # get the number of > 0 components of the optimal solution
+    rho = -1
+    # iterate from 1 to n to find the max index (+ use sentinel)
+    for j in xrange(1, n+1):
+        if (1 - 1.0 / j) * cssv[j] - cssv[j-1] + 1.0 / j * s <= 0:
+            rho = j - 1  # previous index was the largest positive
+            break
+    # Note: better to iterate from start than from end or vectorizing as the
+    #       solution is expected to be sparse and therefore have a small rho
+    # check we found rho
+    assert rho > 0, "Bug: did not find rho (%d)" % rho
+    # compute the Lagrange multiplier associated to the simplex constraint
+    theta = 1.0 / rho * (cssv[rho] - s)
+    # compute the projection by thresholding v using theta
+    w = np.clip(v - theta, 0, np.inf).astype(v.dtype)
+    return w
+
+
+def euclidean_proj_l1ball(v, s=1):
+    """ Compute the Euclidean projection on a L1-ball
+
+    Solves the optimisation problem (using the algorithm from [1]):
+
+        min_w 0.5 * || w - v ||_2^2 , s.t. || w ||_1 <= s
+
+    Parameters
+    ----------
+    v: (n,) numpy array,
+       n-dimensional vector to project
+
+    s: int, optional, default: 1,
+       radius of the L1-ball
+
+    Returns
+    -------
+    w: (n,) numpy array,
+       Euclidean projection of v on the L1-ball of radius s
+
+    Notes
+    -----
+    Solves the problem by a reduction to the positive simplex case
+
+    See also
+    --------
+    euclidean_proj_simplex
+    """
+    assert s > 0, "Radius s must be strictly positive (%d <= 0)" % s
+    n, = v.shape  # will raise ValueError if v is not 1-D
+    # compute the vector of absolute values
+    u = np.abs(v)
+    # check if v is already a solution
+    if u.sum() <= s:
+        # L1-norm is <= s
+        return v
+    # v is not already a solution: optimum lies on the boundary (norm == s)
+    # project *u* on the simplex
+    w = euclidean_proj_simplex(u, s=s)
+    # compute the solution to the original problem on v
+    w *= np.sign(v)
+    return w