Skip to content

Commit 93af958

Browse files
authored
Add _expected_retention_elasticity to ShiftedBetaGeoModel (#2028)
* add expected_retention_elasticity * docstrings * unit tests
1 parent d9a2cbf commit 93af958

File tree

2 files changed

+195
-14
lines changed

2 files changed

+195
-14
lines changed

pymc_marketing/clv/models/shifted_beta_geo.py

Lines changed: 86 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ class ShiftedBetaGeoModel(CLVModel):
4242
This model requires data to be summarized by *recency*, *T*, and *cohort* for each customer.
4343
Modeling assumptions require *1 <= recency <= T*, and *T >= 2*.
4444
45-
First introduced by Fader & Hardie in [1]_.
45+
First introduced by Fader & Hardie in [1]_, with additional expressions described in [2]_.
4646
4747
Parameters
4848
----------
@@ -98,23 +98,39 @@ class ShiftedBetaGeoModel(CLVModel):
9898
model.fit(method="mcmc")
9999
model.fit_summary()
100100
101+
101102
# Predict probability customers are still active
102103
expected_alive_probability = model.expected_probability_alive(
103104
active_customers,
104105
future_t=0,
105106
)
106107
107-
# Predict retention rate of a given cohort in the current time period
108+
# Predict retention rate for a specific cohort
109+
cohort_name = "2025-02-01"
110+
108111
expected_alive_probability = model.expected_retention_rate(
109112
future_t=0,
110-
).sel(cohort=cohort)
113+
).sel(cohort=cohort_name)
114+
115+
# Predict expected remaining lifetime for all customers with a 5% discount rate
116+
expected_alive_probability = model.expected_residual_lifetime(
117+
discount_rate=0.05,
118+
)
119+
120+
# Predict expected retention elasticity for all customers in a specific cohort
121+
expected_alive_probability = model.expected_retention_elasticity(
122+
discount_rate=0.05,
123+
).sel(cohort=cohort_name)
111124
112125
113126
References
114127
----------
115-
.. [1] Fader, P. S., & Hardie, B. G. (2007). How to project customer retention.
116-
Journal of Interactive Marketing, 21(1), 76-90.
117-
https://faculty.wharton.upenn.edu/wp-content/uploads/2012/04/Fader_hardie_jim_07.pdf
128+
.. [1] Fader, P. S., & Hardie, B. G. (2007). "How to project customer retention."
129+
Journal of Interactive Marketing, 21(1), 76-90.
130+
https://faculty.wharton.upenn.edu/wp-content/uploads/2012/04/Fader_hardie_jim_07.pdf
131+
.. [2] Fader, P. S., & Hardie, B. G. (2010). "Customer-Base Valuation in a Contractual Setting:
132+
The Perils of Ignoring Heterogeneity." Marketing Science, 29(1), 85-93.
133+
https://faculty.wharton.upenn.edu/wp-content/uploads/2012/04/Fader_hardie_contractual_mksc_10.pdf
118134
"""
119135

120136
_model_type = "Shifted Beta-Geometric"
@@ -328,7 +344,10 @@ def expected_retention_rate(
328344
*,
329345
future_t: int | np.ndarray | pd.Series | None = None,
330346
) -> xarray.DataArray:
331-
"""Compute expected retention rate by cohort.
347+
"""Compute expected retention rate for each customer.
348+
349+
This is the percentage of customers who were active in the previous time period
350+
and are still active in the current period. Retention rates are expected to increase over time.
332351
333352
The *data* parameter is only required for out-of-sample customers.
334353
@@ -346,7 +365,7 @@ def expected_retention_rate(
346365
347366
References
348367
----------
349-
.. [1] Fader, P. S., & Hardie, B. G. (2007). How to project customer retention.
368+
.. [1] Fader, P. S., & Hardie, B. G. (2007). "How to project customer retention."
350369
Journal of Interactive Marketing, 21(1), 76-90.
351370
https://faculty.wharton.upenn.edu/wp-content/uploads/2012/04/Fader_hardie_jim_07.pdf
352371
"""
@@ -376,7 +395,7 @@ def expected_probability_alive(
376395
*,
377396
future_t: int | np.ndarray | pd.Series | None = None,
378397
) -> xarray.DataArray:
379-
"""Compute expected probability of contract renewal by cohort.
398+
"""Compute expected probability of contract renewal for each customer.
380399
381400
The *data* parameter is only required for out-of-sample customers.
382401
@@ -394,7 +413,7 @@ def expected_probability_alive(
394413
395414
References
396415
----------
397-
.. [1] Fader, P. S., & Hardie, B. G. (2007). How to project customer retention.
416+
.. [1] Fader, P. S., & Hardie, B. G. (2007). "How to project customer retention."
398417
Journal of Interactive Marketing, 21(1), 76-90.
399418
https://faculty.wharton.upenn.edu/wp-content/uploads/2012/04/Fader_hardie_jim_07.pdf
400419
"""
@@ -431,10 +450,11 @@ def expected_residual_lifetime(
431450
*,
432451
discount_rate: float | np.ndarray | pd.Series | None = 0.0,
433452
) -> xarray.DataArray:
434-
"""Compute expected residual lifetime of customers by cohort.
453+
"""Compute expected residual lifetime of each customer.
435454
436455
This is the expected number of periods a customer will remain active after the current time period,
437-
given a discount rate. If no discount rate is provided, infinite lifetime estimates may be returned.
456+
subject to a discount rate for net present value (NPV) calculations.
457+
It is recommended to set a discount rate > 0 to avoid infinite lifetime estimates.
438458
439459
Adapted from equation (6) in [1]_.
440460
@@ -450,8 +470,8 @@ def expected_residual_lifetime(
450470
451471
References
452472
----------
453-
.. [1] Fader, P. S., & Hardie, B. G. (2010). Customer-Base Valuation in a Contractual Setting:
454-
The Perils of Ignoring Heterogeneity. Marketing Science, 29(1), 85-93.
473+
.. [1] Fader, P. S., & Hardie, B. G. (2010). "Customer-Base Valuation in a Contractual Setting:
474+
The Perils of Ignoring Heterogeneity". Marketing Science, 29(1), 85-93.
455475
https://faculty.wharton.upenn.edu/wp-content/uploads/2012/04/Fader_hardie_contractual_mksc_10.pdf
456476
"""
457477
if data is None:
@@ -476,6 +496,58 @@ def expected_residual_lifetime(
476496
"chain", "draw", "customer_id", "cohort", missing_dims="ignore"
477497
)
478498

499+
def expected_retention_elasticity(
500+
self,
501+
data: pd.DataFrame | None = None,
502+
*,
503+
discount_rate: float | np.ndarray | pd.Series | None = 0.0,
504+
) -> xarray.DataArray:
505+
"""Compute expected retention elasticity for each customer.
506+
507+
This is the percent increase in expected residual lifetime given a 1% increase in the retention rate,
508+
subject to a discount rate for net present value (NPV) calculations.
509+
It is recommended to set a discount rate > 0 to avoid infinite retention elasticity estimates.
510+
511+
Adapted from equation (8) in [1]_.
512+
513+
Parameters
514+
----------
515+
discount_rate : float
516+
Discount rate to apply for net present value estimations.
517+
data : ~pandas.DataFrame
518+
Optional dataframe containing the following columns:
519+
* `customer_id`: Unique customer identifier
520+
* `T`: Number of time periods customer has been active
521+
* `cohort`: Customer cohort label
522+
523+
References
524+
----------
525+
.. [1] Fader, P. S., & Hardie, B. G. (2010). "Customer-Base Valuation in a Contractual Setting:
526+
The Perils of Ignoring Heterogeneity". Marketing Science, 29(1), 85-93.
527+
https://faculty.wharton.upenn.edu/wp-content/uploads/2012/04/Fader_hardie_contractual_mksc_10.pdf
528+
"""
529+
if data is None:
530+
data = self.data
531+
532+
if discount_rate is not None:
533+
data = data.assign(discount_rate=discount_rate)
534+
535+
dataset = self._extract_predictive_variables(
536+
data, customer_varnames=["T", "discount_rate"]
537+
)
538+
539+
alpha = dataset["alpha"]
540+
beta = dataset["beta"]
541+
T = dataset["T"]
542+
d = dataset["discount_rate"]
543+
544+
retention_elasticity = hyp2f1(
545+
1, beta + T - 1, alpha + beta + T - 1, 1 / (1 + d)
546+
)
547+
return retention_elasticity.transpose(
548+
"chain", "draw", "customer_id", "cohort", missing_dims="ignore"
549+
)
550+
479551

480552
class ShiftedBetaGeoModelIndividual(CLVModel):
481553
"""Shifted Beta Geometric model for individual customers.

tests/clv/models/test_shifted_beta_geo.py

Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -609,6 +609,115 @@ def test_expected_residual_lifetime(self, prediction_targets, erl_test_data):
609609
expected_residual_lifetime_case2, residual_lifetime_case2_obs, rtol=1e-2
610610
)
611611

612+
def test_expected_retention_elasticity(self, erl_test_data):
613+
"""Test expected_retention_elasticity against Table 3 values from the paper.
614+
Note no values for retention elasticity are provided in the paper,
615+
but can be derived from the DERL and retention rate:
616+
retention_elasticity(T) = DERL(T-1) / retention_rate(T)
617+
"""
618+
# Compute expected retention elasticity from the model with discount_rate=0.1
619+
expected_retention_elasticity_cohorts = (
620+
self.erl_model.expected_retention_elasticity(
621+
erl_test_data,
622+
discount_rate=0.1,
623+
).mean(("chain", "draw"))
624+
)
625+
626+
# Extract elasticity predictions by cohort
627+
expected_retention_elasticity_case1 = expected_retention_elasticity_cohorts.sel(
628+
cohort="case1"
629+
).values
630+
expected_retention_elasticity_case2 = expected_retention_elasticity_cohorts.sel(
631+
cohort="case2"
632+
).values
633+
634+
### DERIVE VALUES FROM OTHER EXPRESSIONS FOR VALIDATION ###
635+
636+
# compute expected retention rates for T (current period)
637+
expected_retention_rate_cohorts = self.erl_model.expected_retention_rate(
638+
erl_test_data,
639+
future_t=0,
640+
).mean(("chain", "draw"))
641+
642+
expected_retention_rate_case1 = expected_retention_rate_cohorts.sel(
643+
cohort="case1"
644+
).values
645+
expected_retention_rate_case2 = expected_retention_rate_cohorts.sel(
646+
cohort="case2"
647+
).values
648+
649+
# Create test data for DERL T-1 (one period earlier) to compute DERL(T-1)
650+
erl_test_data_T_minus_1 = erl_test_data.copy()
651+
erl_test_data_T_minus_1["T"] = erl_test_data_T_minus_1["T"] - 1
652+
# Filter out T=0 (can't have T < 1 in the model)
653+
erl_test_data_T_minus_1 = erl_test_data_T_minus_1[
654+
erl_test_data_T_minus_1["T"] >= 1
655+
]
656+
657+
# Compute DERL for T-1
658+
expected_residual_lifetime_cohorts_T_minus_1 = (
659+
self.erl_model.expected_residual_lifetime(
660+
erl_test_data_T_minus_1,
661+
discount_rate=0.1,
662+
).mean(("chain", "draw"))
663+
)
664+
665+
expected_residual_lifetime_case1_T_minus_1 = (
666+
expected_residual_lifetime_cohorts_T_minus_1.sel(cohort="case1").values
667+
)
668+
expected_residual_lifetime_case2_T_minus_1 = (
669+
expected_residual_lifetime_cohorts_T_minus_1.sel(cohort="case2").values
670+
)
671+
672+
# Compute expected elasticity using the formula: DERL(T-1) / retention_rate(T)
673+
# Skip T=1 since we don't have T-1=0
674+
expected_elasticity_case1_formula = (
675+
expected_residual_lifetime_case1_T_minus_1
676+
/ expected_retention_rate_case1[1:]
677+
)
678+
expected_elasticity_case2_formula = (
679+
expected_residual_lifetime_case2_T_minus_1
680+
/ expected_retention_rate_case2[1:]
681+
)
682+
683+
### ASSERTIONS ###
684+
# Compare model's elasticity against the formula-based elasticity
685+
# Skip first value (T=1) since formula needs T-1
686+
np.testing.assert_allclose(
687+
expected_retention_elasticity_case1[1:],
688+
expected_elasticity_case1_formula,
689+
rtol=1e-1,
690+
err_msg="Case1: Model elasticity doesn't match DERL(T-1)/retention_rate(T)",
691+
)
692+
np.testing.assert_allclose(
693+
expected_retention_elasticity_case2[1:],
694+
expected_elasticity_case2_formula,
695+
rtol=2.0, # T=1 variance is very high, but var for T=5 is 1e-2
696+
err_msg="Case2: Model elasticity doesn't match DERL(T-1)/retention_rate(T)",
697+
)
698+
699+
# Additional validation: elasticity properties
700+
# The elasticity should be positive
701+
assert np.all(expected_retention_elasticity_case1 > 0)
702+
assert np.all(expected_retention_elasticity_case2 > 0)
703+
704+
# Elasticity should generally be higher for the low retention case
705+
# (customers with lower retention are more sensitive to retention changes)
706+
assert np.mean(expected_retention_elasticity_case2) > np.mean(
707+
expected_retention_elasticity_case1
708+
)
709+
710+
# Verify retention rates are valid (between 0 and 1) and increasing with T
711+
assert np.all(
712+
(expected_retention_rate_case1 > 0) & (expected_retention_rate_case1 < 1)
713+
)
714+
assert np.all(
715+
(expected_retention_rate_case2 > 0) & (expected_retention_rate_case2 < 1)
716+
)
717+
# Retention rates should increase over time (with more renewals)
718+
assert np.all(np.diff(expected_retention_rate_case1) >= 0)
719+
assert np.all(np.diff(expected_retention_rate_case2) >= 0)
720+
612721

613722
class TestShiftedBetaGeoModelIndividual:
614723
@classmethod

0 commit comments

Comments
 (0)