Skip to content

Commit 096d220

Browse files
authored
Now precomputing WX \odot X to index into for validation sums instead of recomputing WX \odot X for every validation partition (#14)
1 parent ea2379b commit 096d220

File tree

3 files changed

+32
-28
lines changed

3 files changed

+32
-28
lines changed

cvmatrix/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = "3.1.1"
1+
__version__ = "3.1.2"

cvmatrix/cvmatrix.py

Lines changed: 30 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,16 @@ class CVMatrix:
126126
`weights` are provided and otherwise the row of column-wise sums of
127127
:math:`\mathbf{Y}\odot\mathbf{Y}`.
128128
129+
sq_X : np.ndarray or None
130+
The total weighted squared predictor matrix `X` for the entire dataset. This is
131+
:math:`\mathbf{W}\mathbf{X}\odot\mathbf{X}`. This is computed only if
132+
`scale_X` is `True`.
133+
134+
sq_Y : np.ndarray or None
135+
The total weighted squared response matrix `Y` for the entire dataset. This is
136+
:math:`\mathbf{W}\mathbf{Y}\odot\mathbf{Y}`. This is computed only if
137+
`scale_Y` is `True` and `Y` is not `None`.
138+
129139
WX : np.ndarray or None
130140
The total weighted predictor matrix `X` for the entire dataset. This is
131141
:math:`\mathbf{W}\mathbf{X}`.
@@ -176,6 +186,8 @@ def __init__(
176186
self.sum_Y = None
177187
self.sum_sq_X = None
178188
self.sum_sq_Y = None
189+
self.sq_X = None
190+
self.sq_Y = None
179191
self.WX = None
180192
self.WY = None
181193
self.weights = None
@@ -608,21 +620,21 @@ def _compute_training_stats(
608620
if return_X_std or return_Y_std:
609621
divisor = self._compute_std_divisor(sum_w_train, num_nonzero_w_train)
610622
if return_X_std:
623+
sum_sq_X_val = np.sum(self.sq_X[val_indices], axis=0, keepdims=True)
624+
sum_sq_X_train = self._compute_train_mat_sum(sum_sq_X_val, self.sum_sq_X)
611625
X_train_std = self._compute_training_mat_std(
612-
X_val,
613-
X_val_unweighted,
626+
sum_sq_X_train,
614627
X_train_mean,
615-
self.sum_sq_X,
616628
sum_X_train,
617629
sum_w_train,
618630
divisor,
619631
)
620632
if return_Y_std:
633+
sum_sq_Y_val = np.sum(self.sq_Y[val_indices], axis=0, keepdims=True)
634+
sum_sq_Y_train = self._compute_train_mat_sum(sum_sq_Y_val, self.sum_sq_Y)
621635
Y_train_std = self._compute_training_mat_std(
622-
Y_val,
623-
Y_val_unweighted,
636+
sum_sq_Y_train,
624637
Y_train_mean,
625-
self.sum_sq_Y,
626638
sum_Y_train,
627639
sum_w_train,
628640
divisor,
@@ -960,10 +972,8 @@ def _compute_std_divisor(
960972

961973
def _compute_training_mat_std(
962974
self,
963-
mat_val: np.ndarray,
964-
mat_val_unweighted: np.ndarray,
975+
sum_sq_mat_train: np.ndarray,
965976
mat_train_mean: np.ndarray,
966-
sum_sq_mat: np.ndarray,
967977
sum_mat_train: np.ndarray,
968978
sum_w_train: float,
969979
divisor: float,
@@ -974,21 +984,16 @@ def _compute_training_mat_std(
974984
975985
Parameters
976986
----------
977-
mat_val : Array of shape (N_val, K) or (N_val, M)
978-
The validation set of `WX` or `WY`.
979-
980-
mat_val_unweighted : Array of shape (N_val, K) or (N_val, M)
981-
The validation set of `X` or `Y`.
987+
sum_sq_mat_train : Array of shape (1, K) or (1, M)
988+
The row of column-wise sums of products between the total weighted matrix
989+
and the total unweighted matrix. This is :math:`\mathbf{W}\mathbf{X}\odot\mathbf{X}`
990+
or :math:`\mathbf{W}\mathbf{Y}\odot\mathbf{Y}`.
982991
983992
mat_train_mean : Array of shape (1, K) or (1, M)
984993
The row of column-wise weighted means of the training matrix.
985994
986-
sum_sq_mat : Array of shape (1, K) or (1, M)
987-
The row of column-wise sums of products between the total weighted matrix
988-
and the total unweighted matrix.
989-
990-
sum_mat_val : Array of shape (1, K) or (1, M)
991-
The row of column-wise sums of validation set of `WX` or `WY`.
995+
sum_mat_train : Array of shape (1, K) or (1, M)
996+
The row of column-wise sums of the training set of `WX` or `WY`.
992997
993998
sum_w_train : float
994999
The size of the training set.
@@ -1002,13 +1007,10 @@ def _compute_training_mat_std(
10021007
Array of shape (1, K) or (1, M)
10031008
The row of column-wise standard deviations of the training set matrix.
10041009
"""
1005-
train_sum_sq_mat = sum_sq_mat - np.sum(
1006-
mat_val * mat_val_unweighted, axis=0, keepdims=True
1007-
)
10081010
mat_train_var = (
10091011
-2 * mat_train_mean * sum_mat_train
10101012
+ sum_w_train * mat_train_mean**2
1011-
+ train_sum_sq_mat
1013+
+ sum_sq_mat_train
10121014
) / divisor
10131015
mat_train_var[mat_train_var < 0] = 0
10141016
mat_train_std = np.sqrt(mat_train_var)
@@ -1119,10 +1121,12 @@ def _init_stats(self) -> None:
11191121
if (self.center_X or self.center_Y or self.scale_Y) and self.Y is not None:
11201122
self.sum_Y = np.sum(self.WY, axis=0, keepdims=True)
11211123
if self.scale_X:
1122-
self.sum_sq_X = np.sum(self.WX * self.X, axis=0, keepdims=True)
1124+
self.sq_X = self.WX * self.X
1125+
self.sum_sq_X = np.sum(self.sq_X, axis=0, keepdims=True)
11231126
else:
11241127
self.sum_sq_X = None
11251128
if self.scale_Y and self.Y is not None:
1126-
self.sum_sq_Y = np.sum(self.WY * self.Y, axis=0, keepdims=True)
1129+
self.sq_Y = self.WY * self.Y
1130+
self.sum_sq_Y = np.sum(self.sq_Y, axis=0, keepdims=True)
11271131
else:
11281132
self.sum_sq_Y = None

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[tool.poetry]
22
name = "cvmatrix"
3-
version = "3.1.1"
3+
version = "3.1.2"
44
description = "Fast computation of possibly weighted and possibly centered/scaled training set kernel matrices in a cross-validation setting."
55
authors = ["Sm00thix <oleemail@icloud.com>"]
66
maintainers = ["Sm00thix <oleemail@icloud.com>"]

0 commit comments

Comments
 (0)