added weights and warm_start

PABannier · PABannier · commit 51b4cfed75dd · 2022-04-22T17:53:27.000+02:00
diff --git a/skglm/gram_solver.py b/skglm/gram_solver.py
@@ -19,14 +19,17 @@
 
 alpha = alpha_max * reg / n_samples
 
+weights = np.random.normal(2, 0.4, n_features)
+weights_grp = np.random.normal(2, 0.4, n_features // group_size)
+
 # Lasso
 print("#" * 15)
 print("Lasso")
 print("#" * 15)
 start = time()
-w = gram_lasso(X, y, alpha, max_iter, tol)
+w = gram_lasso(X, y, alpha, max_iter, tol, weights=weights)
 gram_lasso_time = time() - start
-clf_sk = Lasso(alpha, tol=tol, fit_intercept=False)
+clf_sk = Lasso(alpha, weights=weights, tol=tol, fit_intercept=False)
 start = time()
 clf_sk.fit(X, y)
 celer_lasso_time = time() - start
@@ -42,9 +45,10 @@
 print("Group Lasso")
 print("#" * 15)
 start = time()
-w = gram_group_lasso(X, y, alpha, group_size, max_iter, tol)
+w = gram_group_lasso(X, y, alpha, group_size, max_iter, tol, weights=weights_grp)
 gram_group_lasso_time = time() - start
-clf_celer = GroupLasso(group_size, alpha, tol=tol, fit_intercept=False)
+clf_celer = GroupLasso(group_size, alpha, weights=weights_grp, tol=tol,
+                       fit_intercept=False)
 start = time()
 clf_celer.fit(X, y)
 celer_group_lasso_time = time() - start
diff --git a/skglm/solvers/gram.py b/skglm/solvers/gram.py
@@ -7,36 +7,37 @@
 
 
 @njit
-def primal(alpha, y, X, w):
+def primal(alpha, y, X, w, weights):
     r = y - X @ w
     p_obj = (r @ r) / (2 * len(y))
-    return p_obj + alpha * np.sum(np.abs(w))
+    return p_obj + alpha * np.sum(np.abs(w * weights))
 
 
 @njit
-def primal_grp(alpha, y, X, w, grp_ptr, grp_indices):
+def primal_grp(alpha, y, X, w, grp_ptr, grp_indices, weights):
     r = y - X @ w
     p_obj = (r @ r) / (2 * len(y))
     for g in range(len(grp_ptr) - 1):
         w_g = w[grp_indices[grp_ptr[g]:grp_ptr[g + 1]]]
-        p_obj += alpha * norm(w_g, ord=2)
+        p_obj += alpha * norm(w_g * weights[g], ord=2)
     return p_obj
 
 
-def gram_lasso(X, y, alpha, max_iter, tol, check_freq=10):
+def gram_lasso(X, y, alpha, max_iter, tol, w_init=None, weights=None, check_freq=10):
     p_obj_prev = np.inf
     n_features = X.shape[1]
     grads = X.T @ y / len(y)
     G = X.T @ X
     lipschitz = np.zeros(n_features, dtype=X.dtype)
     for j in range(n_features):
         lipschitz[j] = (X[:, j] ** 2).sum() / len(y)
-    w = np.zeros(n_features)
+    w = w_init if w_init is not None else np.zeros(n_features)
+    weights = weights if weights is not None else np.ones(n_features)
     # CD
     for n_iter in range(max_iter):
-        cd_epoch(X, G, grads, w, alpha, lipschitz)
+        cd_epoch(X, G, grads, w, alpha, lipschitz, weights)
         if n_iter % check_freq == 0:
-            p_obj = primal(alpha, y, X, w)
+            p_obj = primal(alpha, y, X, w, weights)
             if p_obj_prev - p_obj < tol:
                 print("Convergence reached!")
                 break
@@ -45,7 +46,8 @@ def gram_lasso(X, y, alpha, max_iter, tol, check_freq=10):
     return w
 
 
-def gram_group_lasso(X, y, alpha, groups, max_iter, tol, check_freq=50):
+def gram_group_lasso(X, y, alpha, groups, max_iter, tol, w_init=None, weights=None, 
+                     check_freq=50):
     p_obj_prev = np.inf
     n_features = X.shape[1]
     grp_ptr, grp_indices = _grp_converter(groups, X.shape[1])
@@ -56,12 +58,13 @@ def gram_group_lasso(X, y, alpha, groups, max_iter, tol, check_freq=50):
     for g in range(n_groups):
         X_g = X[:, grp_indices[grp_ptr[g]:grp_ptr[g + 1]]]
         lipschitz[g] = norm(X_g, ord=2) ** 2 / len(y)
-    w = np.zeros(n_features)
+    w = w_init if w_init is not None else np.zeros(n_features)
+    weights = weights if weights is not None else np.ones(n_groups)
     # BCD
     for n_iter in range(max_iter):
-        bcd_epoch(X, G, grads, w, alpha, lipschitz, grp_indices, grp_ptr)
+        bcd_epoch(X, G, grads, w, alpha, lipschitz, grp_indices, grp_ptr, weights)
         if n_iter % check_freq == 0:
-            p_obj = primal_grp(alpha, y, X, w, grp_ptr, grp_indices)
+            p_obj = primal_grp(alpha, y, X, w, grp_ptr, grp_indices, weights)
             if p_obj_prev - p_obj < tol:
                 print("Convergence reached!")
                 break
@@ -71,26 +74,27 @@ def gram_group_lasso(X, y, alpha, groups, max_iter, tol, check_freq=50):
 
 
 @njit
-def cd_epoch(X, G, grads, w, alpha, lipschitz):
+def cd_epoch(X, G, grads, w, alpha, lipschitz, weights):
     n_features = X.shape[1]
     for j in range(n_features):
-        if lipschitz[j] == 0.:
+        if lipschitz[j] == 0. or weights[j] == np.inf:
             continue
         old_w_j = w[j]
-        w[j] = ST(w[j] + grads[j] / lipschitz[j], alpha / lipschitz[j])
+        w[j] = ST(w[j] + grads[j] / lipschitz[j], alpha / lipschitz[j] * weights[j])
         if old_w_j != w[j]:
             grads += G[j, :] * (old_w_j - w[j]) / len(X)
 
 
 @njit
-def bcd_epoch(X, G, grads, w, alpha, lipschitz, grp_indices, grp_ptr):
+def bcd_epoch(X, G, grads, w, alpha, lipschitz, grp_indices, grp_ptr, weights):
     n_groups = len(grp_ptr) - 1
     for g in range(n_groups):
-        if lipschitz[g] == 0.:
+        if lipschitz[g] == 0. and weights[g] == np.inf:
             continue
         idx = grp_indices[grp_ptr[g]:grp_ptr[g + 1]]
         old_w_g = w[idx].copy()
-        w[idx] = BST(w[idx] + grads[idx] / lipschitz[g], alpha / lipschitz[g])
+        w[idx] = BST(w[idx] + grads[idx] / lipschitz[g], alpha / lipschitz[g]
+                     * weights[g])
         diff = old_w_g - w[idx]
         if np.any(diff != 0.):
             grads += diff @ G[idx, :] / len(X)